Skip to content

Commit 73ae8dd

Browse files
committed
Renamed params to min_update/max_update and min_key/max_key
1 parent e3b78b1 commit 73ae8dd

File tree

4 files changed

+46
-45
lines changed

4 files changed

+46
-45
lines changed

data_diff/__init__.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ def diff_tables(
3737
# Extra columns to compare
3838
extra_columns: Tuple[str, ...] = (),
3939
# Start/end key_column values, used to restrict the segment
40-
start_key: DbKey = None,
41-
end_key: DbKey = None,
40+
min_key: DbKey = None,
41+
max_key: DbKey = None,
4242
# Start/end update_column values, used to restrict the segment
43-
min_time: DbTime = None,
44-
max_time: DbTime = None,
43+
min_updated: DbTime = None,
44+
max_updated: DbTime = None,
4545
# Into how many segments to bisect per iteration
4646
bisection_factor: int = DEFAULT_BISECTION_FACTOR,
4747
# When should we stop bisecting and compare locally (in row count)
@@ -68,10 +68,10 @@ def diff_tables(
6868
key_column=key_column,
6969
update_column=update_column,
7070
extra_columns=extra_columns,
71-
start_key=start_key,
72-
end_key=end_key,
73-
min_time=min_time,
74-
max_time=max_time,
71+
min_key=min_key,
72+
max_key=max_key,
73+
min_updated=min_updated,
74+
max_updated=max_updated,
7575
)
7676
for t in tables
7777
]

data_diff/__main__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ def main(
111111

112112
try:
113113
options = dict(
114-
min_time=min_age and parse_time_before_now(min_age), max_time=max_age and parse_time_before_now(max_age)
114+
min_updated=max_age and parse_time_before_now(max_age),
115+
max_updated=min_age and parse_time_before_now(min_age),
115116
)
116117
except ParseError as e:
117118
logging.error("Error while parsing age expression: %s" % e)

data_diff/diff_tables.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -54,28 +54,28 @@ class TableSegment:
5454
extra_columns: Tuple[str, ...] = ()
5555

5656
# Start/end key_column values, used to restrict the segment
57-
start_key: DbKey = None
58-
end_key: DbKey = None
57+
min_key: DbKey = None
58+
max_key: DbKey = None
5959

6060
# Start/end update_column values, used to restrict the segment
61-
min_time: DbTime = None
62-
max_time: DbTime = None
61+
min_update: DbTime = None
62+
max_update: DbTime = None
6363

6464
def __post_init__(self):
65-
if not self.update_column and (self.min_time or self.max_time):
66-
raise ValueError("Error: min_time/max_time feature requires to specify 'update_column'")
65+
if not self.update_column and (self.min_update or self.max_update):
66+
raise ValueError("Error: min_update/max_update feature requires to specify 'update_column'")
6767

6868
def _make_key_range(self):
69-
if self.start_key is not None:
70-
yield Compare("<=", str(self.start_key), self.key_column)
71-
if self.end_key is not None:
72-
yield Compare("<", self.key_column, str(self.end_key))
69+
if self.min_key is not None:
70+
yield Compare("<=", str(self.min_key), self.key_column)
71+
if self.max_key is not None:
72+
yield Compare("<", self.key_column, str(self.max_key))
7373

7474
def _make_update_range(self):
75-
if self.min_time is not None:
76-
yield Compare("<=", Time(self.min_time), self.update_column)
77-
if self.max_time is not None:
78-
yield Compare("<", self.update_column, Time(self.max_time))
75+
if self.min_update is not None:
76+
yield Compare("<=", Time(self.min_update), self.update_column)
77+
if self.max_update is not None:
78+
yield Compare("<", self.update_column, Time(self.max_update))
7979

8080
def _make_select(self, *, table=None, columns=None, where=None, group_by=None, order_by=None):
8181
if columns is None:
@@ -98,21 +98,21 @@ def get_values(self) -> list:
9898
def choose_checkpoints(self, count: int) -> List[DbKey]:
9999
"Suggests a bunch of evenly-spaced checkpoints to split by (not including start, end)"
100100
assert self.is_bounded
101-
return split_space(self.start_key, self.end_key, count)
101+
return split_space(self.min_key, self.max_key, count)
102102

103103
def segment_by_checkpoints(self, checkpoints: List[DbKey]) -> List["TableSegment"]:
104104
"Split the current TableSegment to a bunch of smaller ones, separate by the given checkpoints"
105105

106-
if self.start_key and self.end_key:
107-
assert all(self.start_key <= c < self.end_key for c in checkpoints)
106+
if self.min_key and self.max_key:
107+
assert all(self.min_key <= c < self.max_key for c in checkpoints)
108108
checkpoints.sort()
109109

110110
# Calculate sub-segments
111-
positions = [self.start_key] + checkpoints + [self.end_key]
111+
positions = [self.min_key] + checkpoints + [self.max_key]
112112
ranges = list(zip(positions[:-1], positions[1:]))
113113

114114
# Create table segments
115-
tables = [self.new(start_key=s, end_key=e) for s, e in ranges]
115+
tables = [self.new(min_key=s, max_key=e) for s, e in ranges]
116116

117117
return tables
118118

@@ -159,7 +159,7 @@ def query_key_range(self) -> Tuple[int, int]:
159159

160160
@property
161161
def is_bounded(self):
162-
return self.start_key is not None and self.end_key is not None
162+
return self.min_key is not None and self.max_key is not None
163163

164164

165165
def diff_sets(a: set, b: set) -> Iterator:
@@ -229,11 +229,11 @@ def diff_tables(self, table1: TableSegment, table2: TableSegment) -> DiffResult:
229229
mins, maxs = zip(*key_ranges)
230230

231231
# We add 1 because our ranges are exclusive of the end (like in Python)
232-
start_key = min(mins)
233-
end_key = max(maxs) + 1
232+
min_key = min(mins)
233+
max_key = max(maxs) + 1
234234

235-
table1 = table1.new(start_key=start_key, end_key=end_key)
236-
table2 = table2.new(start_key=start_key, end_key=end_key)
235+
table1 = table1.new(min_key=min_key, max_key=max_key)
236+
table2 = table2.new(min_key=min_key, max_key=max_key)
237237

238238
return self._bisect_and_diff_tables(table1, table2)
239239

@@ -242,7 +242,7 @@ def _bisect_and_diff_tables(self, table1, table2, level=0, max_rows=None):
242242

243243
if max_rows is None:
244244
# We can be sure that row_count <= max_rows
245-
max_rows = table1.end_key - table1.start_key
245+
max_rows = table1.max_key - table1.min_key
246246

247247
# If count is below the threshold, just download and compare the columns locally
248248
# This saves time, as bisection speed is limited by ping and query performance.
@@ -253,7 +253,7 @@ def _bisect_and_diff_tables(self, table1, table2, level=0, max_rows=None):
253253
yield from diff
254254
return
255255

256-
# Choose evenly spaced checkpoints (according to start_key and end_key)
256+
# Choose evenly spaced checkpoints (according to min_key and max_key)
257257
checkpoints = table1.choose_checkpoints(self.bisection_factor - 1)
258258

259259
# Create new instances of TableSegment between each checkpoint
@@ -272,8 +272,8 @@ def _bisect_and_diff_tables(self, table1, table2, level=0, max_rows=None):
272272
def _diff_tables(self, table1, table2, level=0, segment_index=None, segment_count=None):
273273
logger.info(
274274
". " * level + f"Diffing segment {segment_index}/{segment_count}, "
275-
f"key-range: {table1.start_key}..{table2.end_key}, "
276-
f"size: {table2.end_key-table1.start_key}"
275+
f"key-range: {table1.min_key}..{table2.max_key}, "
276+
f"size: {table2.max_key-table1.min_key}"
277277
)
278278

279279
(count1, checksum1), (count2, checksum2) = self._threaded_call("count_and_checksum", [table1, table2])

tests/test_diff_tables.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ def setUp(self):
6363
self.preql.commit()
6464

6565
def test_init(self):
66-
a = TableSegment(self.connection, ("a",), "id", "datetime", max_time=self.now.datetime)
67-
self.assertRaises(ValueError, TableSegment, self.connection, ("a",), "id", max_time=self.now.datetime)
66+
a = TableSegment(self.connection, ("a",), "id", "datetime", max_update=self.now.datetime)
67+
self.assertRaises(ValueError, TableSegment, self.connection, ("a",), "id", max_update=self.now.datetime)
6868

6969
def test_basic(self):
7070
differ = TableDiffer(10, 100)
@@ -79,24 +79,24 @@ def test_basic(self):
7979
def test_offset(self):
8080
differ = TableDiffer(2, 10)
8181
sec1 = self.now.shift(seconds=-1).datetime
82-
a = TableSegment(self.connection, ("a",), "id", "datetime", max_time=sec1)
83-
b = TableSegment(self.connection, ("b",), "id", "datetime", max_time=sec1)
82+
a = TableSegment(self.connection, ("a",), "id", "datetime", max_update=sec1)
83+
b = TableSegment(self.connection, ("b",), "id", "datetime", max_update=sec1)
8484
assert a.count() == 4
8585
assert b.count() == 3
8686

8787
assert not list(differ.diff_tables(a, a))
8888
self.assertEqual(len(list(differ.diff_tables(a, b))), 1)
8989

90-
a = TableSegment(self.connection, ("a",), "id", "datetime", min_time=sec1)
91-
b = TableSegment(self.connection, ("b",), "id", "datetime", min_time=sec1)
90+
a = TableSegment(self.connection, ("a",), "id", "datetime", min_update=sec1)
91+
b = TableSegment(self.connection, ("b",), "id", "datetime", min_update=sec1)
9292
assert a.count() == 2
9393
assert b.count() == 2
9494
assert not list(differ.diff_tables(a, b))
9595

9696
day1 = self.now.shift(days=-1).datetime
9797

98-
a = TableSegment(self.connection, ("a",), "id", "datetime", min_time=day1, max_time=sec1)
99-
b = TableSegment(self.connection, ("b",), "id", "datetime", min_time=day1, max_time=sec1)
98+
a = TableSegment(self.connection, ("a",), "id", "datetime", min_update=day1, max_update=sec1)
99+
b = TableSegment(self.connection, ("b",), "id", "datetime", min_update=day1, max_update=sec1)
100100
assert a.count() == 3
101101
assert b.count() == 2
102102
assert not list(differ.diff_tables(a, a))

0 commit comments

Comments
 (0)