@@ -35,9 +35,14 @@ class PHashFilter(ImageFilter):
3535 PHashFilter class
3636 """
3737
38- def __init__ (self , sim_hash_size : int = 8 , workers : int = 16 , pbar : bool = True ):
39- super ().__init__ (pbar )
40-
38+ def __init__ (
39+ self ,
40+ sim_hash_size : int = 8 ,
41+ workers : int = 16 ,
42+ pbar : bool = True ,
43+ _pbar_position : int = 0
44+ ):
45+ super ().__init__ (pbar , _pbar_position )
4146 self .num_workers = workers
4247 self .sim_hash_size = sim_hash_size
4348
@@ -68,39 +73,3 @@ def process_batch(self, batch) -> dict:
6873 df_batch_labels [f"image_phash_{ self .sim_hash_size } " ].extend (img_simhashes )
6974
7075 return df_batch_labels
71-
72-
73- class MD5Filter (ImageFilter ):
74- """
75- MD5Filter class
76- """
77-
78- def __init__ (
79- self ,
80- pbar : bool = True ,
81- workers : int = 16 ,
82- ):
83- super ().__init__ (pbar )
84-
85- self .num_workers = workers
86-
87- self .schema = ["image_path" , "image_md5" ]
88- self .dataloader_kwargs = {
89- "num_workers" : self .num_workers ,
90- "batch_size" : 1 ,
91- "drop_last" : False ,
92- }
93-
94- def preprocess (self , img_bytes : bytes , data : dict ):
95- image_path = data ["image_path" ]
96- img_md5 = get_md5_hash (img_bytes )
97- return image_path , img_md5
98-
99- def process_batch (self , batch ) -> dict :
100- df_batch_labels = self ._generate_dict_from_schema ()
101-
102- image_paths , img_md5s = list (zip (* batch ))
103- df_batch_labels ["image_path" ].extend (image_paths )
104- df_batch_labels ["image_md5" ].extend (img_md5s )
105-
106- return df_batch_labels
0 commit comments