datafold · erezsh · Aug 26, 2022 · Aug 26, 2022
diff --git a/README.md b/README.md
@@ -476,7 +476,7 @@ If you pass `--stats` you'll see e.g. what % of rows were different.
 
 data-diff collects anonymous usage data to help our team improve the tool and to apply development efforts to where our users need them most.
 
-We capture two events, one when the data-diff run starts and one when it finished. No user data or potentially sensitive information is or ever will be collected. The captured data is limited to:
+We capture two events, one when the data-diff run starts and one when it is finished. No user data or potentially sensitive information is or ever will be collected. The captured data is limited to:
 
 - Operating System and Python version
 

diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py
@@ -154,9 +154,6 @@ def _parse_type(
  elif issubclass(cls, Decimal):
  if numeric_scale is None:
  numeric_scale = 0 # Needed for Oracle.
- # raise ValueError(
- # f"{self.name}: Unexpected numeric_scale is NULL, for column {'.'.join(table_path)}.{col_name} of type {type_repr}."
- # )
  return cls(precision=numeric_scale)
 
  elif issubclass(cls, Float):
@@ -176,7 +173,8 @@ def select_table_schema(self, path: DbPath) -> str:
  schema, table = self._normalize_table_path(path)
 
  return (
- "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM information_schema.columns "
+ "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale "
+ "FROM information_schema.columns "
  f"WHERE table_name = '{table}' AND table_schema = '{schema}'"
  )
 

diff --git a/data_diff/databases/presto.py b/data_diff/databases/presto.py
@@ -89,7 +89,8 @@ def select_table_schema(self, path: DbPath) -> str:
  schema, table = self._normalize_table_path(path)
 
  return (
- f"SELECT column_name, data_type, 3 as datetime_precision, 3 as numeric_precision FROM INFORMATION_SCHEMA.COLUMNS "
+ "SELECT column_name, data_type, 3 as datetime_precision, 3 as numeric_precision "
+ "FROM INFORMATION_SCHEMA.COLUMNS "
  f"WHERE table_name = '{table}' AND table_schema = '{schema}'"
  )
 

diff --git a/data_diff/databases/snowflake.py b/data_diff/databases/snowflake.py
@@ -31,17 +31,15 @@ def __init__(self, *, schema: str, **kw):
  snowflake, serialization, default_backend = import_snowflake()
  logging.getLogger("snowflake.connector").setLevel(logging.WARNING)
 
- # Got an error: snowflake.connector.network.RetryRequest: could not find io module state (interpreter shutdown?)
+ # Ignore the error: snowflake.connector.network.RetryRequest: could not find io module state
  # It's a known issue: https://github.com/snowflakedb/snowflake-connector-python/issues/145
- # Found a quick solution in comments
  logging.getLogger("snowflake.connector.network").disabled = True
 
  assert '"' not in schema, "Schema name should not contain quotes!"
- if (
- "key" in kw
- ): # if private keys are used for Snowflake connection, read in key from path specified and pass as "private_key" to connector.
+ # If a private key is used, read it from the specified path and pass it as "private_key" to the connector.
+ if "key" in kw:
  with open(kw.get("key"), "rb") as key:
- if 'password' in kw:
+ if "password" in kw:
  raise ConnectError("Cannot use password and key at the same time")
  p_key = serialization.load_pem_private_key(
  key.read(),

diff --git a/data_diff/tracking.py b/data_diff/tracking.py
@@ -2,7 +2,6 @@
 # This module contains all the functionality related to the anonymous tracking of data-diff use.
 #
 
-import toml
 import logging
 import os
 import json
@@ -12,6 +11,8 @@
 import urllib.request
 from uuid import uuid4
 
+import toml
+
 TRACK_URL = "https://api.perfalytics.com/track"
 START_EVENT = "os_diff_run_start"
 END_EVENT = "os_diff_run_end"
@@ -25,7 +26,7 @@ def _load_profile():
  try:
  with open(DEFAULT_PROFILE) as f:
  conf = toml.load(f)
- except FileNotFoundError as e:
+ except FileNotFoundError:
  conf = {}
 
  if "anonymous_id" not in conf: