@@ -199,10 +199,19 @@ def apply_query(callback: Callable[[str], Any], sql_code: Union[str, ThreadLocal
199199class BaseDialect (abc .ABC ):
200200 SUPPORTS_PRIMARY_KEY : ClassVar [bool ] = False
201201 SUPPORTS_INDEXES : ClassVar [bool ] = False
202+ PREVENT_OVERFLOW_WHEN_CONCAT : ClassVar [bool ] = False
202203 TYPE_CLASSES : ClassVar [Dict [str , Type [ColType ]]] = {}
203204
204205 PLACEHOLDER_TABLE = None # Used for Oracle
205206
207+ # Some database do not support long string so concatenation might lead to type overflow
208+
209+ _prevent_overflow_when_concat : bool = False
210+
211+ def enable_preventing_type_overflow (self ) -> None :
212+ logger .info ("Preventing type overflow when concatenation is enabled" )
213+ self ._prevent_overflow_when_concat = True
214+
206215 def parse_table_name (self , name : str ) -> DbPath :
207216 "Parse the given table name into a DbPath"
208217 return parse_table_name (name )
@@ -392,10 +401,19 @@ def render_checksum(self, c: Compiler, elem: Checksum) -> str:
392401 return f"sum({ md5 } )"
393402
394403 def render_concat (self , c : Compiler , elem : Concat ) -> str :
404+ if self ._prevent_overflow_when_concat :
405+ items = [
406+ f"{ self .compile (c , Code (self .md5_as_hex (self .to_string (self .compile (c , expr )))))} "
407+ for expr in elem .exprs
408+ ]
409+
395410 # We coalesce because on some DBs (e.g. MySQL) concat('a', NULL) is NULL
396- items = [
397- f"coalesce({ self .compile (c , Code (self .to_string (self .compile (c , expr ))))} , '<null>')" for expr in elem .exprs
398- ]
411+ else :
412+ items = [
413+ f"coalesce({ self .compile (c , Code (self .to_string (self .compile (c , expr ))))} , '<null>')"
414+ for expr in elem .exprs
415+ ]
416+
399417 assert items
400418 if len (items ) == 1 :
401419 return items [0 ]
@@ -769,6 +787,10 @@ def set_timezone_to_utc(self) -> str:
769787 def md5_as_int (self , s : str ) -> str :
770788 "Provide SQL for computing md5 and returning an int"
771789
790+ @abstractmethod
791+ def md5_as_hex (self , s : str ) -> str :
792+ """Method to calculate MD5"""
793+
772794 @abstractmethod
773795 def normalize_timestamp (self , value : str , coltype : TemporalType ) -> str :
774796 """Creates an SQL expression, that converts 'value' to a normalized timestamp.
@@ -885,13 +907,16 @@ class Database(abc.ABC):
885907 Instanciated using :meth:`~data_diff.connect`
886908 """
887909
910+ DIALECT_CLASS : ClassVar [Type [BaseDialect ]] = BaseDialect
911+
888912 SUPPORTS_ALPHANUMS : ClassVar [bool ] = True
889913 SUPPORTS_UNIQUE_CONSTAINT : ClassVar [bool ] = False
890914 CONNECT_URI_KWPARAMS : ClassVar [List [str ]] = []
891915
892916 default_schema : Optional [str ] = None
893917 _interactive : bool = False
894918 is_closed : bool = False
919+ _dialect : BaseDialect = None
895920
896921 @property
897922 def name (self ):
@@ -1120,10 +1145,13 @@ def close(self):
11201145 return super ().close ()
11211146
11221147 @property
1123- @abstractmethod
11241148 def dialect (self ) -> BaseDialect :
11251149 "The dialect of the database. Used internally by Database, and also available publicly."
11261150
1151+ if not self ._dialect :
1152+ self ._dialect = self .DIALECT_CLASS ()
1153+ return self ._dialect
1154+
11271155 @property
11281156 @abstractmethod
11291157 def CONNECT_URI_HELP (self ) -> str :
0 commit comments