@@ -187,25 +187,30 @@ def query_table_schema(self, path: DbPath) -> Dict[str, tuple]:
187187 assert len (d ) == len (rows )
188188 return d
189189
190- def _process_table_schema (self , path : DbPath , raw_schema : Dict [str , tuple ], filter_columns : Sequence [str ]):
190+ def _process_table_schema (
191+ self , path : DbPath , raw_schema : Dict [str , tuple ], filter_columns : Sequence [str ], where : str = None
192+ ):
191193 accept = {i .lower () for i in filter_columns }
192194
193195 col_dict = {row [0 ]: self ._parse_type (path , * row ) for name , row in raw_schema .items () if name .lower () in accept }
194196
195- self ._refine_coltypes (path , col_dict )
197+ self ._refine_coltypes (path , col_dict , where )
196198
197199 # Return a dict of form {name: type} after normalization
198200 return col_dict
199201
200- def _refine_coltypes (self , table_path : DbPath , col_dict : Dict [str , ColType ]):
201- "Refine the types in the column dict, by querying the database for a sample of their values"
202+ def _refine_coltypes (self , table_path : DbPath , col_dict : Dict [str , ColType ], where : str = None ):
203+ """Refine the types in the column dict, by querying the database for a sample of their values
204+
205+ 'where' restricts the rows to be sampled.
206+ """
202207
203208 text_columns = [k for k , v in col_dict .items () if isinstance (v , Text )]
204209 if not text_columns :
205210 return
206211
207212 fields = [self .normalize_uuid (c , String_UUID ()) for c in text_columns ]
208- samples_by_row = self .query (Select (fields , TableName (table_path ), limit = 16 ), list )
213+ samples_by_row = self .query (Select (fields , TableName (table_path ), limit = 16 , where = where and [ where ] ), list )
209214 if not samples_by_row :
210215 raise ValueError (f"Table { table_path } is empty." )
211216
0 commit comments