Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ If you pass `--stats` you'll see e.g. what % of rows were different.

data-diff collects anonymous usage data to help our team improve the tool and to apply development efforts to where our users need them most.

We capture two events, one when the data-diff run starts and one when it finished. No user data or potentially sensitive information is or ever will be collected. The captured data is limited to:
We capture two events, one when the data-diff run starts and one when it is finished. No user data or potentially sensitive information is or ever will be collected. The captured data is limited to:

- Operating System and Python version

Expand Down
6 changes: 2 additions & 4 deletions data_diff/databases/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,6 @@ def _parse_type(
elif issubclass(cls, Decimal):
if numeric_scale is None:
numeric_scale = 0 # Needed for Oracle.
# raise ValueError(
# f"{self.name}: Unexpected numeric_scale is NULL, for column {'.'.join(table_path)}.{col_name} of type {type_repr}."
# )
return cls(precision=numeric_scale)

elif issubclass(cls, Float):
Expand All @@ -176,7 +173,8 @@ def select_table_schema(self, path: DbPath) -> str:
schema, table = self._normalize_table_path(path)

return (
"SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM information_schema.columns "
"SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale "
"FROM information_schema.columns "
f"WHERE table_name = '{table}' AND table_schema = '{schema}'"
)

Expand Down
3 changes: 2 additions & 1 deletion data_diff/databases/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def select_table_schema(self, path: DbPath) -> str:
schema, table = self._normalize_table_path(path)

return (
f"SELECT column_name, data_type, 3 as datetime_precision, 3 as numeric_precision FROM INFORMATION_SCHEMA.COLUMNS "
"SELECT column_name, data_type, 3 as datetime_precision, 3 as numeric_precision "
"FROM INFORMATION_SCHEMA.COLUMNS "
f"WHERE table_name = '{table}' AND table_schema = '{schema}'"
)

Expand Down
10 changes: 4 additions & 6 deletions data_diff/databases/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,15 @@ def __init__(self, *, schema: str, **kw):
snowflake, serialization, default_backend = import_snowflake()
logging.getLogger("snowflake.connector").setLevel(logging.WARNING)

# Got an error: snowflake.connector.network.RetryRequest: could not find io module state (interpreter shutdown?)
# Ignore the error: snowflake.connector.network.RetryRequest: could not find io module state
# It's a known issue: https://github.com/snowflakedb/snowflake-connector-python/issues/145
# Found a quick solution in comments
logging.getLogger("snowflake.connector.network").disabled = True

assert '"' not in schema, "Schema name should not contain quotes!"
if (
"key" in kw
): # if private keys are used for Snowflake connection, read in key from path specified and pass as "private_key" to connector.
# If a private key is used, read it from the specified path and pass it as "private_key" to the connector.
if "key" in kw:
with open(kw.get("key"), "rb") as key:
if 'password' in kw:
if "password" in kw:
raise ConnectError("Cannot use password and key at the same time")
p_key = serialization.load_pem_private_key(
key.read(),
Expand Down
5 changes: 3 additions & 2 deletions data_diff/tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# This module contains all the functionality related to the anonymous tracking of data-diff use.
#

import toml
import logging
import os
import json
Expand All @@ -12,6 +11,8 @@
import urllib.request
from uuid import uuid4

import toml

TRACK_URL = "https://api.perfalytics.com/track"
START_EVENT = "os_diff_run_start"
END_EVENT = "os_diff_run_end"
Expand All @@ -25,7 +26,7 @@ def _load_profile():
try:
with open(DEFAULT_PROFILE) as f:
conf = toml.load(f)
except FileNotFoundError as e:
except FileNotFoundError:
conf = {}

if "anonymous_id" not in conf:
Expand Down