Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 245aeb6

Browse files
committed
Updated docs; Ran black
1 parent abaabe8 commit 245aeb6

File tree

11 files changed

+54
-36
lines changed

11 files changed

+54
-36
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ Options:
252252

253253
Same-DB diff only:
254254
- `-m`, `--materialize` - Materialize the diff results into a new table in the database.
255+
If a table exists by that name, it will be replaced.
255256
Use `%t` in the name to place a timestamp.
256257
Example: `-m test_mat_%t`
257258
- `--assume-unique-key` - Skip validating the uniqueness of the key column during joindiff, which is costly in non-cloud dbs.

data_diff/__main__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
8080
@click.argument("table1", required=False)
8181
@click.argument("database2", required=False)
8282
@click.argument("table2", required=False)
83-
@click.option("-k", "--key-columns", default=[], multiple=True, help="Names of primary key columns. Default='id'.", metavar="NAME")
83+
@click.option(
84+
"-k", "--key-columns", default=[], multiple=True, help="Names of primary key columns. Default='id'.", metavar="NAME"
85+
)
8486
@click.option("-t", "--update-column", default=None, help="Name of updated_at/last_updated column", metavar="NAME")
8587
@click.option(
8688
"-c",
@@ -110,7 +112,7 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
110112
"--materialize",
111113
default=None,
112114
metavar="TABLE_NAME",
113-
help="Materialize the diff results into a new table in the database. (joindiff only)",
115+
help="(joindiff only) Materialize the diff results into a new table in the database. If a table exists by that name, it will be replaced.",
114116
)
115117
@click.option(
116118
"--min-age",
@@ -345,7 +347,7 @@ def _main(
345347
*key_columns,
346348
update_column,
347349
*columns,
348-
)
350+
),
349351
)
350352

351353
logging.info(f"Diffing using columns: key={key_columns} update={update_column} extra={columns}")
@@ -385,7 +387,7 @@ def _main(
385387
if differ.stats:
386388
print("Extra-Info:")
387389
for k, v in differ.stats.items():
388-
print(f' {k} = {v}')
390+
print(f" {k} = {v}")
389391
else:
390392
for op, values in diff_iter:
391393
color = COLOR_SCHEME[op]

data_diff/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ def _apply_config(config: Dict[str, Any], run_name: str, kw: Dict[str, Any]):
2626
else:
2727
run_name = "default"
2828

29-
if 'database1' in kw:
30-
for attr in ('table1', 'database2', 'table2'):
29+
if "database1" in kw:
30+
for attr in ("table1", "database2", "table2"):
3131
if kw[attr] is None:
3232
raise ValueError(f"Specified database1 but not {attr}. Must specify all 4 arguments, or niether.")
3333

3434
for index in "12":
35-
run_args[index] = {attr: kw.pop(f"{attr}{index}") for attr in ('database', 'table')}
35+
run_args[index] = {attr: kw.pop(f"{attr}{index}") for attr in ("database", "table")}
3636

3737
# Process databases + tables
3838
for index in "12":

data_diff/databases/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ def query(self, sql_ast: Union[Expr, Generator], res_type: type = list):
129129
if getattr(self, "_interactive", False) and isinstance(sql_ast, Select):
130130
explained_sql = compiler.compile(Explain(sql_ast))
131131
explain = self._query(explained_sql)
132-
for row, in explain:
133-
logger.debug(f'EXPLAIN: {row}')
132+
for (row,) in explain:
133+
logger.debug(f"EXPLAIN: {row}")
134134
answer = input("Continue? [y/n] ")
135135
if not answer.lower() in ["y", "yes"]:
136136
sys.exit(1)

data_diff/diff_tables.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ def _bisect_and_diff_tables(self, table1, table2):
157157
raise NotImplementedError("Composite key not supported yet!")
158158
if len(table2.key_columns) > 1:
159159
raise NotImplementedError("Composite key not supported yet!")
160-
key1 ,= table1.key_columns
161-
key2 ,= table2.key_columns
160+
(key1,) = table1.key_columns
161+
(key2,) = table2.key_columns
162162

163163
key_type = table1._schema[key1]
164164
key_type2 = table2._schema[key2]
@@ -214,7 +214,7 @@ def _bisect_and_diff_segments(
214214
assert table1.is_bounded and table2.is_bounded
215215

216216
# Choose evenly spaced checkpoints (according to min_key and max_key)
217-
biggest_table = max(table1, table2, key=methodcaller('approximate_size'))
217+
biggest_table = max(table1, table2, key=methodcaller("approximate_size"))
218218
checkpoints = biggest_table.choose_checkpoints(self.bisection_factor - 1)
219219

220220
# Create new instances of TableSegment between each checkpoint

data_diff/joindiff_tables.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,7 @@ def append_to_table_oracle(path: DbPath, expr: Expr):
9292

9393

9494
def append_to_table(path: DbPath, expr: Expr):
95-
"""Append to table
96-
"""
95+
"""Append to table"""
9796
assert expr.schema, expr
9897
t = table(path, schema=expr.schema)
9998
yield t.create(if_not_exists=True) # uses expr.schema

data_diff/queries/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def cte(expr: Expr, *, name: Optional[str] = None, params: Sequence[str] = None)
3232

3333
def table(*path: str, schema: Schema = None) -> TablePath:
3434
if len(path) == 1 and isinstance(path[0], tuple):
35-
path ,= path
35+
(path,) = path
3636
if not all(isinstance(i, str) for i in path):
3737
raise TypeError(f"All elements of table path must be of type 'str'. Got: {path}")
3838
return TablePath(path, schema)

data_diff/queries/ast_classes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def type(self):
257257
types = {get_type(i) for i in self.args}
258258
if len(types) > 1:
259259
raise TypeError(f"Expected all args to have the same type, got {types}")
260-
t ,= types
260+
(t,) = types
261261
return t
262262

263263

@@ -607,6 +607,7 @@ class Explain(ExprNode):
607607
def compile(self, c: Compiler) -> str:
608608
return c.database.explain_as_text(c.compile(self.select))
609609

610+
610611
# DDL
611612

612613

data_diff/table_segment.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ def with_schema(self) -> "TableSegment":
8181
def _make_key_range(self):
8282
if self.min_key is not None:
8383
assert len(self.key_columns) == 1
84-
k ,= self.key_columns
84+
(k,) = self.key_columns
8585
yield self.min_key <= this[k]
8686
if self.max_key is not None:
8787
assert len(self.key_columns) == 1
88-
k ,= self.key_columns
88+
(k,) = self.key_columns
8989
yield this[k] < self.max_key
9090

9191
def _make_update_range(self):
@@ -178,7 +178,7 @@ def query_key_range(self) -> Tuple[int, int]:
178178
"""Query database for minimum and maximum key. This is used for setting the initial bounds."""
179179
# Normalizes the result (needed for UUIDs) after the min/max computation
180180
# TODO better error if there is no schema
181-
k ,= self.key_columns
181+
(k,) = self.key_columns
182182
select = self._make_select().select(
183183
ApplyFuncAndNormalizeAsString(this[k], min_),
184184
ApplyFuncAndNormalizeAsString(this[k], max_),

tests/test_diff_tables.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def _class_per_db_dec(filter_name=None):
4444
]
4545
return parameterized_class(("name", "db_name"), names)
4646

47+
4748
def _table_segment(database, table_path, key_columns, *args, **kw):
4849
if isinstance(key_columns, str):
4950
key_columns = (key_columns,)
@@ -195,16 +196,24 @@ def test_basic(self):
195196
def test_offset(self):
196197
differ = HashDiffer(bisection_factor=2, bisection_threshold=10)
197198
sec1 = self.now.shift(seconds=-1).datetime
198-
a = _table_segment(self.connection, self.table_src_path, "id", "datetime", max_update=sec1, case_sensitive=False)
199-
b = _table_segment(self.connection, self.table_dst_path, "id", "datetime", max_update=sec1, case_sensitive=False)
199+
a = _table_segment(
200+
self.connection, self.table_src_path, "id", "datetime", max_update=sec1, case_sensitive=False
201+
)
202+
b = _table_segment(
203+
self.connection, self.table_dst_path, "id", "datetime", max_update=sec1, case_sensitive=False
204+
)
200205
assert a.count() == 4
201206
assert b.count() == 3
202207

203208
assert not list(differ.diff_tables(a, a))
204209
self.assertEqual(len(list(differ.diff_tables(a, b))), 1)
205210

206-
a = _table_segment(self.connection, self.table_src_path, "id", "datetime", min_update=sec1, case_sensitive=False)
207-
b = _table_segment(self.connection, self.table_dst_path, "id", "datetime", min_update=sec1, case_sensitive=False)
211+
a = _table_segment(
212+
self.connection, self.table_src_path, "id", "datetime", min_update=sec1, case_sensitive=False
213+
)
214+
b = _table_segment(
215+
self.connection, self.table_dst_path, "id", "datetime", min_update=sec1, case_sensitive=False
216+
)
208217
assert a.count() == 2
209218
assert b.count() == 2
210219
assert not list(differ.diff_tables(a, b))

0 commit comments

Comments
 (0)