Skip to content

Commit 1f314b2

Browse files
committed
Fix casting of floating types to decimal. The problem is similar to PostgreSQL one.
1 parent 0680fac commit 1f314b2

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

data_diff/databases/databricks.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ def md5_to_int(self, s: str) -> str:
6666
def to_string(self, s: str) -> str:
6767
return f"cast({s} as string)"
6868

69+
def _convert_db_precision_to_digits(self, p: int) -> int:
70+
# Subtracting 2 due to wierd precision issues in Databricks for the FLOAT type
71+
return super()._convert_db_precision_to_digits(p) - 2
72+
73+
6974
def query_table_schema(self, path: DbPath, filter_columns: Optional[Sequence[str]] = None) -> Dict[str, ColType]:
7075
# Databricks has INFORMATION_SCHEMA only for Databricks Runtime, not for Databricks SQL.
7176
# https://docs.databricks.com/spark/latest/spark-sql/language-manual/information-schema/columns.html
@@ -107,7 +112,10 @@ def query_table_schema(self, path: DbPath, filter_columns: Optional[Sequence[str
107112
row = (row.COLUMN_NAME, row_type, None, None, None)
108113

109114
resulted_rows.append(row)
110-
return {row[0]: self._parse_type(path, *row) for row in resulted_rows}
115+
col_dict: Dict[str, ColType] = {row[0]: self._parse_type(path, *row) for row in resulted_rows}
116+
117+
self._refine_coltypes(path, col_dict)
118+
return col_dict
111119

112120
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
113121
"""Databricks timestamp contains no more than 6 digits in precision"""

0 commit comments

Comments
 (0)