3232from pandas .tools .merge import concat
3333from pandas import compat
3434from pandas .io .common import PerformanceWarning
35+ from pandas .core .config import get_option
3536
3637import pandas .lib as lib
3738import pandas .algos as algos
@@ -165,6 +166,17 @@ class DuplicateWarning(Warning):
165166 Panel4D : [1 , 2 , 3 ],
166167}
167168
169+ # register our configuration options
170+ from pandas .core import config
171+ dropna_doc = """
172+ : boolean
173+ drop ALL nan rows when appending to a table
174+ """
175+
176+ with config .config_prefix ('io.hdf' ):
177+ config .register_option ('dropna_table' , True , dropna_doc ,
178+ validator = config .is_bool )
179+
168180# oh the troubles to reduce import time
169181_table_mod = None
170182_table_supports_index = False
@@ -730,7 +742,7 @@ def remove(self, key, where=None, start=None, stop=None):
730742 'can only remove with where on objects written as tables' )
731743 return s .delete (where = where , start = start , stop = stop )
732744
733- def append (self , key , value , fmt = None , append = True , columns = None , ** kwargs ):
745+ def append (self , key , value , fmt = None , append = True , columns = None , dropna = None , ** kwargs ):
734746 """
735747 Append to Table in file. Node must already exist and be Table
736748 format.
@@ -751,7 +763,8 @@ def append(self, key, value, fmt=None, append=True, columns=None, **kwargs):
751763 chunksize : size to chunk the writing
752764 expectedrows : expected TOTAL row size of this table
753765 encoding : default None, provide an encoding for strings
754-
766+ dropna : boolean, default True, do not write an ALL nan row to the store
767+ settable by the option 'io.hdf.dropna_table'
755768 Notes
756769 -----
757770 Does *not* check if data being appended overlaps with existing
@@ -761,8 +774,10 @@ def append(self, key, value, fmt=None, append=True, columns=None, **kwargs):
761774 raise Exception (
762775 "columns is not a supported keyword in append, try data_columns" )
763776
777+ if dropna is None :
778+ dropna = get_option ("io.hdf.dropna_table" )
764779 kwargs = self ._validate_format (fmt or 't' , kwargs )
765- self ._write_to_group (key , value , append = append , ** kwargs )
780+ self ._write_to_group (key , value , append = append , dropna = dropna , ** kwargs )
766781
767782 def append_to_multiple (self , d , value , selector , data_columns = None , axes = None , ** kwargs ):
768783 """
@@ -3219,7 +3234,7 @@ class AppendableTable(LegacyTable):
32193234
32203235 def write (self , obj , axes = None , append = False , complib = None ,
32213236 complevel = None , fletcher32 = None , min_itemsize = None , chunksize = None ,
3222- expectedrows = None , ** kwargs ):
3237+ expectedrows = None , dropna = True , ** kwargs ):
32233238
32243239 if not append and self .is_exists :
32253240 self ._handle .removeNode (self .group , 'table' )
@@ -3254,29 +3269,36 @@ def write(self, obj, axes=None, append=False, complib=None,
32543269 a .validate_and_set (table , append )
32553270
32563271 # add the rows
3257- self .write_data (chunksize )
3272+ self .write_data (chunksize , dropna = dropna )
32583273
3259- def write_data (self , chunksize ):
3274+ def write_data (self , chunksize , dropna = True ):
32603275 """ we form the data into a 2-d including indexes,values,mask
32613276 write chunk-by-chunk """
32623277
32633278 names = self .dtype .names
32643279 nrows = self .nrows_expected
32653280
3266- # create the masks & values
3267- masks = []
3268- for a in self .values_axes :
3281+ # if dropna==True, then drop ALL nan rows
3282+ if dropna :
3283+
3284+ masks = []
3285+ for a in self .values_axes :
3286+
3287+ # figure the mask: only do if we can successfully process this
3288+ # column, otherwise ignore the mask
3289+ mask = com .isnull (a .data ).all (axis = 0 )
3290+ masks .append (mask .astype ('u1' ))
32693291
3270- # figure the mask: only do if we can successfully process this
3271- # column, otherwise ignore the mask
3272- mask = com .isnull (a .data ).all (axis = 0 )
3273- masks .append (mask .astype ('u1' ))
3292+ # consolidate masks
3293+ mask = masks [0 ]
3294+ for m in masks [1 :]:
3295+ mask = mask & m
3296+ mask = mask .ravel ()
3297+
3298+ else :
32743299
3275- # consolidate masks
3276- mask = masks [0 ]
3277- for m in masks [1 :]:
3278- mask = mask & m
3279- mask = mask .ravel ()
3300+ mask = np .empty (nrows , dtype = 'u1' )
3301+ mask .fill (False )
32803302
32813303 # broadcast the indexes if needed
32823304 indexes = [a .cvalues for a in self .index_axes ]
0 commit comments