@@ -566,6 +566,22 @@ def _concat_objects(self, keys, values, not_indexed_same=False):
566566
567567 return result
568568
569+ def _apply_filter (self , indices , dropna ):
570+ if len (indices ) == 0 :
571+ indices = []
572+ else :
573+ indices = np .sort (np .concatenate (indices ))
574+ if dropna :
575+ filtered = self .obj .take (indices )
576+ else :
577+ mask = np .empty (len (self .obj .index ), dtype = bool )
578+ mask .fill (False )
579+ mask [indices .astype (int )] = True
580+ # mask fails to broadcast when passed to where; broadcast manually.
581+ mask = np .tile (mask , list (self .obj .shape [1 :]) + [1 ]).T
582+ filtered = self .obj .where (mask ) # Fill with NaNs.
583+ return filtered
584+
569585
570586@Appender (GroupBy .__doc__ )
571587def groupby (obj , by , ** kwds ):
@@ -1585,14 +1601,13 @@ def transform(self, func, *args, **kwargs):
15851601 group = com .ensure_float (group )
15861602 object .__setattr__ (group , 'name' , name )
15871603 res = wrapper (group )
1588- indexer = self .obj .index .get_indexer (group .index )
15891604 if hasattr (res ,'values' ):
15901605 res = res .values
15911606
15921607 # need to do a safe put here, as the dtype may be different
15931608 # this needs to be an ndarray
15941609 result = Series (result )
1595- result .loc [ indexer ] = res
1610+ result .iloc [ self . indices [ name ] ] = res
15961611 result = result .values
15971612
15981613 # downcast if we can (and need)
@@ -1630,22 +1645,15 @@ def true_and_notnull(x, *args, **kwargs):
16301645 return b and notnull (b )
16311646
16321647 try :
1633- indexers = [self .obj .index .get_indexer (group .index ) \
1634- if true_and_notnull (group ) else [] \
1635- for _ , group in self ]
1648+ indices = [self .indices [name ] if true_and_notnull (group ) else []
1649+ for name , group in self ]
16361650 except ValueError :
16371651 raise TypeError ("the filter must return a boolean result" )
16381652 except TypeError :
16391653 raise TypeError ("the filter must return a boolean result" )
16401654
1641- if len (indexers ) == 0 :
1642- filtered = self .obj .take ([]) # because np.concatenate would fail
1643- else :
1644- filtered = self .obj .take (np .sort (np .concatenate (indexers )))
1645- if dropna :
1646- return filtered
1647- else :
1648- return filtered .reindex (self .obj .index ) # Fill with NaNs.
1655+ filtered = self ._apply_filter (indices , dropna )
1656+ return filtered
16491657
16501658
16511659class NDFrameGroupBy (GroupBy ):
@@ -2125,7 +2133,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
21252133 """
21262134 from pandas .tools .merge import concat
21272135
2128- indexers = []
2136+ indices = []
21292137
21302138 obj = self ._obj_with_exclusions
21312139 gen = self .grouper .get_iterator (obj , axis = self .axis )
@@ -2146,31 +2154,25 @@ def filter(self, func, dropna=True, *args, **kwargs):
21462154 else :
21472155 res = path (group )
21482156
2149- def add_indexer ():
2150- indexers .append (self .obj . index . get_indexer ( group . index ) )
2157+ def add_indices ():
2158+ indices .append (self .indices [ name ] )
21512159
21522160 # interpret the result of the filter
21532161 if isinstance (res ,(bool ,np .bool_ )):
21542162 if res :
2155- add_indexer ()
2163+ add_indices ()
21562164 else :
21572165 if getattr (res ,'ndim' ,None ) == 1 :
21582166 val = res .ravel ()[0 ]
21592167 if val and notnull (val ):
2160- add_indexer ()
2168+ add_indices ()
21612169 else :
21622170
21632171 # in theory you could do .all() on the boolean result ?
21642172 raise TypeError ("the filter must return a boolean result" )
21652173
2166- if len (indexers ) == 0 :
2167- filtered = self .obj .take ([]) # because np.concatenate would fail
2168- else :
2169- filtered = self .obj .take (np .sort (np .concatenate (indexers )))
2170- if dropna :
2171- return filtered
2172- else :
2173- return filtered .reindex (self .obj .index ) # Fill with NaNs.
2174+ filtered = self ._apply_filter (indices , dropna )
2175+ return filtered
21742176
21752177
21762178class DataFrameGroupBy (NDFrameGroupBy ):
0 commit comments