55import random
66import sys
77from itertools import product
8+ from collections import defaultdict
89
910import numpy as np
1011import pandas as pd
@@ -159,7 +160,7 @@ def _generate_metadata(dataset):
159160 if dataset != "test_images" :
160161 images_path = os .path .join (images_path , "images" )
161162
162- if public_paths : # TODO: implement public generating public path
163+ if public_paths : # TODO: implement public generating public path
163164 raise NotImplementedError
164165 else :
165166 images_path_to_write = images_path
@@ -171,11 +172,7 @@ def _generate_metadata(dataset):
171172 if masks_dir_name .startswith (masks_dir_prefix ):
172173 masks_overlayed_sufix_to_write .append (masks_dir_name [len (masks_dir_prefix ):])
173174
174- df_columns = ['ImageId' , 'file_path_image' , 'is_train' , 'is_valid' , 'is_test' , 'n_buildings' ]
175- for mask_dir_sufix in masks_overlayed_sufix_to_write :
176- df_columns .append ('file_path_mask' + mask_dir_sufix )
177-
178- df_metadata = pd .DataFrame (columns = df_columns )
175+ df_dict = defaultdict (lambda : [])
179176
180177 for image_file_name in tqdm (sorted (os .listdir (images_path ))):
181178 file_path_image = os .path .join (images_path_to_write , image_file_name )
@@ -188,35 +185,34 @@ def _generate_metadata(dataset):
188185 if dataset == "test_images" :
189186 n_buildings = None
190187 is_test = 1
191- row = { 'ImageId' : image_id ,
192- 'file_path_image' : file_path_image ,
193- 'is_train' : is_train ,
194- 'is_valid' : is_valid ,
195- 'is_test' : is_test ,
196- 'n_buildings' : n_buildings }
188+ df_dict [ 'ImageId' ]. append ( image_id )
189+ df_dict [ 'file_path_image' ]. append ( file_path_image )
190+ df_dict [ 'is_train' ]. append ( is_train )
191+ df_dict [ 'is_valid' ]. append ( is_valid )
192+ df_dict [ 'is_test' ]. append ( is_test )
193+ df_dict [ 'n_buildings' ]. append ( n_buildings )
197194 for mask_dir_sufix in masks_overlayed_sufix_to_write :
198- row ['file_path_mask' + mask_dir_sufix ] = None
195+ df_dict ['file_path_mask' + mask_dir_sufix ]. append ( None )
199196
200197 else :
201198 n_buildings = None
202199 if dataset == "val" :
203200 is_valid = 1
204201 else :
205202 is_train = 1
206- row = { 'ImageId' : image_id ,
207- 'file_path_image' : file_path_image ,
208- 'is_train' : is_train ,
209- 'is_valid' : is_valid ,
210- 'is_test' : is_test ,
211- 'n_buildings' : n_buildings }
203+ df_dict [ 'ImageId' ]. append ( image_id )
204+ df_dict [ 'file_path_image' ]. append ( file_path_image )
205+ df_dict [ 'is_train' ]. append ( is_train )
206+ df_dict [ 'is_valid' ]. append ( is_valid )
207+ df_dict [ 'is_test' ]. append ( is_test )
208+ df_dict [ 'n_buildings' ]. append ( n_buildings )
212209
213210 for mask_dir_sufix in masks_overlayed_sufix_to_write :
214211 file_path_mask = os .path .join (meta_dir , masks_dir_prefix + mask_dir_sufix , dataset , "masks" ,
215212 image_file_name [:- 4 ] + ".png" )
216- row ['file_path_mask' + mask_dir_sufix ] = file_path_mask
217- df_metadata = df_metadata .append (row , ignore_index = True )
213+ df_dict ['file_path_mask' + mask_dir_sufix ].append (file_path_mask )
218214
219- return df_metadata
215+ return pd . DataFrame . from_dict ( df_dict )
220216
221217 metadata = pd .DataFrame ()
222218 if process_train_data :
0 commit comments