@@ -80,6 +80,7 @@ class DiffStats:
8080 table2_count : int
8181 unchanged : int
8282 diff_percent : float
83+ extra_column_diffs : Optional [Dict [str , int ]]
8384
8485
8586@dataclass
@@ -95,17 +96,33 @@ def __iter__(self):
9596 self .result_list .append (i )
9697 yield i
9798
98- def _get_stats (self ) -> DiffStats :
99+ def _get_stats (self , is_dbt : bool = False ) -> DiffStats :
99100 list (self ) # Consume the iterator into result_list, if we haven't already
100101
102+ key_columns = self .info_tree .info .tables [0 ].key_columns
103+ len_key_columns = len (key_columns )
101104 diff_by_key = {}
105+ extra_column_diffs = None
106+ if is_dbt :
107+ extra_column_values_store = {}
108+ extra_columns = self .info_tree .info .tables [0 ].extra_columns
109+ extra_column_diffs = {k : 0 for k in extra_columns }
110+
102111 for sign , values in self .result_list :
103- k = values [: len (self .info_tree .info .tables [0 ].key_columns )]
112+ k = values [:len_key_columns ]
113+ if is_dbt :
114+ extra_column_values = values [len_key_columns :]
104115 if k in diff_by_key :
105116 assert sign != diff_by_key [k ]
106117 diff_by_key [k ] = "!"
118+ if is_dbt :
119+ for i in range (0 , len (extra_columns )):
120+ if extra_column_values [i ] != extra_column_values_store [k ][i ]:
121+ extra_column_diffs [extra_columns [i ]] += 1
107122 else :
108123 diff_by_key [k ] = sign
124+ if is_dbt :
125+ extra_column_values_store [k ] = extra_column_values
109126
110127 diff_by_sign = {k : 0 for k in "+-!" }
111128 for sign in diff_by_key .values ():
@@ -116,23 +133,41 @@ def _get_stats(self) -> DiffStats:
116133 unchanged = table1_count - diff_by_sign ["-" ] - diff_by_sign ["!" ]
117134 diff_percent = 1 - unchanged / max (table1_count , table2_count )
118135
119- return DiffStats (diff_by_sign , table1_count , table2_count , unchanged , diff_percent )
136+ return DiffStats (diff_by_sign , table1_count , table2_count , unchanged , diff_percent , extra_column_diffs )
120137
121- def get_stats_string (self ):
122- diff_stats = self ._get_stats ()
123- string_output = ""
124- string_output += f"{ diff_stats .table1_count } rows in table A\n "
125- string_output += f"{ diff_stats .table2_count } rows in table B\n "
126- string_output += f"{ diff_stats .diff_by_sign ['-' ]} rows exclusive to table A (not present in B)\n "
127- string_output += f"{ diff_stats .diff_by_sign ['+' ]} rows exclusive to table B (not present in A)\n "
128- string_output += f"{ diff_stats .diff_by_sign ['!' ]} rows updated\n "
129- string_output += f"{ diff_stats .unchanged } rows unchanged\n "
130- string_output += f"{ 100 * diff_stats .diff_percent :.2f} % difference score\n "
131-
132- if self .stats :
133- string_output += "\n Extra-Info:\n "
134- for k , v in sorted (self .stats .items ()):
135- string_output += f" { k } = { v } \n "
138+
139+ def get_stats_string (self , is_dbt : bool = False ):
140+ diff_stats = self ._get_stats (is_dbt )
141+
142+ if is_dbt :
143+ string_output = "\n | Rows Added\t | Rows Removed\n "
144+ string_output += "------------------------------------------------------------\n "
145+
146+ string_output += f"| { diff_stats .diff_by_sign ['-' ]} \t \t | { diff_stats .diff_by_sign ['+' ]} \n "
147+ string_output += "------------------------------------------------------------\n \n "
148+ string_output += f"Updated Rows: { diff_stats .diff_by_sign ['!' ]} \n "
149+ string_output += f"Unchanged Rows: { diff_stats .unchanged } \n \n "
150+
151+ string_output += f"Values Updated:"
152+
153+ for k , v in diff_stats .extra_column_diffs .items ():
154+ string_output += f"\n { k } : { v } "
155+
156+ else :
157+
158+ string_output = ""
159+ string_output += f"{ diff_stats .table1_count } rows in table A\n "
160+ string_output += f"{ diff_stats .table2_count } rows in table B\n "
161+ string_output += f"{ diff_stats .diff_by_sign ['-' ]} rows exclusive to table A (not present in B)\n "
162+ string_output += f"{ diff_stats .diff_by_sign ['+' ]} rows exclusive to table B (not present in A)\n "
163+ string_output += f"{ diff_stats .diff_by_sign ['!' ]} rows updated\n "
164+ string_output += f"{ diff_stats .unchanged } rows unchanged\n "
165+ string_output += f"{ 100 * diff_stats .diff_percent :.2f} % difference score\n "
166+
167+ if self .stats :
168+ string_output += "\n Extra-Info:\n "
169+ for k , v in sorted (self .stats .items ()):
170+ string_output += f" { k } = { v } \n "
136171
137172 return string_output
138173
0 commit comments