Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.
Next Next commit
add tests for checking precision
  • Loading branch information
vvkh committed Dec 29, 2023
commit c13e21b45452fcf3bde2db83f36b04b1f124c6dc
1 change: 1 addition & 0 deletions data_diff/databases/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ class BaseDialect(abc.ABC):
SUPPORTS_INDEXES: ClassVar[bool] = False
PREVENT_OVERFLOW_WHEN_CONCAT: ClassVar[bool] = False
TYPE_CLASSES: ClassVar[Dict[str, Type[ColType]]] = {}
DEFAULT_NUMERIC_PRECISION: ClassVar[int] = 0 # effective precision when type is just "NUMERIC"

PLACEHOLDER_TABLE = None # Used for Oracle

Expand Down
3 changes: 3 additions & 0 deletions data_diff/databases/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ class Dialect(BaseDialect):
}
TYPE_ARRAY_RE = re.compile(r"ARRAY<(.+)>")
TYPE_STRUCT_RE = re.compile(r"STRUCT<(.+)>")
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#parameterized_decimal_type
# The default scale is 9, which means a number can have up to 9 digits after the decimal point.
DEFAULT_NUMERIC_PRECISION = 9

def random(self) -> str:
return "RAND()"
Expand Down
4 changes: 4 additions & 0 deletions data_diff/databases/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ class Dialect(BaseDialect):
SUPPORTS_PRIMARY_KEY = True
SUPPORTS_INDEXES = True

# https://duckdb.org/docs/sql/data_types/numeric#fixed-point-decimals
# The default WIDTH and SCALE is DECIMAL(18, 3), if none are specified.
DEFAULT_NUMERIC_PRECISION = 3

TYPE_CLASSES = {
# Timestamps
"TIMESTAMP WITH TIME ZONE": TimestampTZ,
Expand Down
6 changes: 6 additions & 0 deletions data_diff/databases/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ class PostgresqlDialect(BaseDialect):
SUPPORTS_PRIMARY_KEY: ClassVar[bool] = True
SUPPORTS_INDEXES = True

# https://www.postgresql.org/docs/current/datatype-numeric.html#DATATYPE-NUMERIC-DECIMAL
# without any precision or scale creates an “unconstrained numeric” column
# in which numeric values of any length can be stored, up to the implementation limits.
# https://www.postgresql.org/docs/current/datatype-numeric.html#DATATYPE-NUMERIC-TABLE
DEFAULT_NUMERIC_PRECISION = 16383

TYPE_CLASSES: ClassVar[Dict[str, Type[ColType]]] = {
# Timestamps
"timestamp with time zone": TimestampTZ,
Expand Down
33 changes: 33 additions & 0 deletions tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,36 @@ def test_three_part_support(self):
d = db.query_table_schema(part.path)
assert len(d) == 1
db.query(part.drop())


@test_each_database
class TestNumericPrecisionParsing(unittest.TestCase):
def test_specified_precision(self):
name = "tbl_" + random_table_suffix()
db = get_conn(self.db_cls)
tbl = table(name, schema={"value": "NUMERIC(10, 2)"})
db.query(tbl.create())
t = table(name)
raw_schema = db.query_table_schema(t.path)
schema = db._process_table_schema(t.path, raw_schema)
self.assertEqual(schema["value"].precision, 2)

def test_specified_zero_precision(self):
name = "tbl_" + random_table_suffix()
db = get_conn(self.db_cls)
tbl = table(name, schema={"value": "NUMERIC(10)"})
db.query(tbl.create())
t = table(name)
raw_schema = db.query_table_schema(t.path)
schema = db._process_table_schema(t.path, raw_schema)
self.assertEqual(schema["value"].precision, 0)

def test_default_precision(self):
name = "tbl_" + random_table_suffix()
db = get_conn(self.db_cls)
tbl = table(name, schema={"value": "NUMERIC"})
db.query(tbl.create())
t = table(name)
raw_schema = db.query_table_schema(t.path)
schema = db._process_table_schema(t.path, raw_schema)
self.assertEqual(schema["value"].precision, db.dialect.DEFAULT_NUMERIC_PRECISION)