@@ -105,6 +105,7 @@ class Index(IndexOpsMixin, PandasObject):
105105 _is_numeric_dtype = False
106106
107107 _engine_type = _index .ObjectEngine
108+ _isin_type = lib .ismember
108109
109110 def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False ,
110111 tupleize_cols = True , ** kwargs ):
@@ -1838,7 +1839,7 @@ def isin(self, values, level=None):
18381839 value_set = set (values )
18391840 if level is not None :
18401841 self ._validate_index_level (level )
1841- return lib . ismember (np .array (self ), value_set )
1842+ return self . _isin_type (np .array (self ), value_set )
18421843
18431844 def _can_reindex (self , indexer ):
18441845 """
@@ -3379,6 +3380,7 @@ class Int64Index(NumericIndex):
33793380 _outer_indexer = _algos .outer_join_indexer_int64
33803381
33813382 _engine_type = _index .Int64Engine
3383+ _isin_type = lib .ismember_int64
33823384
33833385 def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False , ** kwargs ):
33843386
@@ -5235,13 +5237,39 @@ def partial_selection(key, indexer=None):
52355237 indexer = self ._get_level_indexer (key , level = level )
52365238 return indexer , maybe_droplevels (indexer , [level ], drop_level )
52375239
5238- def _get_level_indexer (self , key , level = 0 ):
5239- # return a boolean indexer or a slice showing where the key is
5240+ def _get_level_indexer (self , key , level = 0 , indexer = None ):
5241+ # return an indexer, boolean array or a slice showing where the key is
52405242 # in the totality of values
5243+ # if the indexer is provided, then use this
52415244
52425245 level_index = self .levels [level ]
52435246 labels = self .labels [level ]
52445247
5248+ def convert_indexer (start , stop , step , indexer = indexer , labels = labels ):
5249+ # given the inputs and the labels/indexer, compute an indexer set
5250+ # if we have a provided indexer, then this need not consider
5251+ # the entire labels set
5252+
5253+ r = np .arange (start ,stop ,step )
5254+ if indexer is not None and len (indexer ) != len (labels ):
5255+
5256+ # we have an indexer which maps the locations in the labels that we
5257+ # have already selected (and is not an indexer for the entire set)
5258+ # otherwise this is wasteful
5259+ # so we only need to examine locations that are in this set
5260+ # the only magic here is that the result are the mappings to the
5261+ # set that we have selected
5262+ from pandas import Series
5263+ mapper = Series (indexer )
5264+ result = Series (Index (labels .take (indexer )).isin (r ).nonzero ()[0 ])
5265+ m = result .map (mapper ).values
5266+
5267+ else :
5268+ m = np .zeros (len (labels ),dtype = bool )
5269+ m [np .in1d (labels ,r ,assume_unique = True )] = True
5270+
5271+ return m
5272+
52455273 if isinstance (key , slice ):
52465274 # handle a slice, returnig a slice if we can
52475275 # otherwise a boolean indexer
@@ -5267,17 +5295,13 @@ def _get_level_indexer(self, key, level=0):
52675295 # a partial date slicer on a DatetimeIndex generates a slice
52685296 # note that the stop ALREADY includes the stopped point (if
52695297 # it was a string sliced)
5270- m = np .zeros (len (labels ),dtype = bool )
5271- m [np .in1d (labels ,np .arange (start .start ,stop .stop ,step ))] = True
5272- return m
5298+ return convert_indexer (start .start ,stop .stop ,step )
52735299
52745300 elif level > 0 or self .lexsort_depth == 0 or step is not None :
52755301 # need to have like semantics here to right
52765302 # searching as when we are using a slice
52775303 # so include the stop+1 (so we include stop)
5278- m = np .zeros (len (labels ),dtype = bool )
5279- m [np .in1d (labels ,np .arange (start ,stop + 1 ,step ))] = True
5280- return m
5304+ return convert_indexer (start ,stop + 1 ,step )
52815305 else :
52825306 # sorted, so can return slice object -> view
52835307 i = labels .searchsorted (start , side = 'left' )
@@ -5315,59 +5339,73 @@ def get_locs(self, tup):
53155339 raise KeyError ('MultiIndex Slicing requires the index to be fully lexsorted'
53165340 ' tuple len ({0}), lexsort depth ({1})' .format (len (tup ), self .lexsort_depth ))
53175341
5318- def _convert_indexer (r ):
5342+ # indexer
5343+ # this is the list of all values that we want to select
5344+ n = len (self )
5345+ indexer = None
5346+
5347+ def _convert_to_indexer (r ):
5348+ # return an indexer
53195349 if isinstance (r , slice ):
5320- m = np .zeros (len ( self ) ,dtype = bool )
5350+ m = np .zeros (n ,dtype = bool )
53215351 m [r ] = True
5322- return m
5323- return r
5352+ r = m .nonzero ()[0 ]
5353+ elif is_bool_indexer (r ):
5354+ if len (r ) != n :
5355+ raise ValueError ("cannot index with a boolean indexer that is"
5356+ " not the same length as the index" )
5357+ r = r .nonzero ()[0 ]
5358+ return Int64Index (r )
5359+
5360+ def _update_indexer (idxr , indexer = indexer ):
5361+ if indexer is None :
5362+ indexer = Index (np .arange (n ))
5363+ if idxr is None :
5364+ return indexer
5365+ return indexer & idxr
53245366
5325- ranges = []
53265367 for i ,k in enumerate (tup ):
53275368
53285369 if is_bool_indexer (k ):
53295370 # a boolean indexer, must be the same length!
53305371 k = np .asarray (k )
5331- if len (k ) != len (self ):
5332- raise ValueError ("cannot index with a boolean indexer that is"
5333- " not the same length as the index" )
5334- ranges .append (k )
5372+ indexer = _update_indexer (_convert_to_indexer (k ), indexer = indexer )
5373+
53355374 elif is_list_like (k ):
53365375 # a collection of labels to include from this level (these are or'd)
5337- indexers = []
5376+ indexers = None
53385377 for x in k :
53395378 try :
5340- indexers .append (_convert_indexer (self ._get_level_indexer (x , level = i )))
5379+ idxrs = _convert_to_indexer (self ._get_level_indexer (x , level = i , indexer = indexer ))
5380+ indexers = idxrs if indexers is None else indexers | idxrs
53415381 except (KeyError ):
53425382
53435383 # ignore not founds
53445384 continue
5345- if len (k ):
5346- ranges .append (reduce (np .logical_or , indexers ))
5385+
5386+ if indexers is not None :
5387+ indexer = _update_indexer (indexers , indexer = indexer )
53475388 else :
5348- ranges .append (np .zeros (self .labels [i ].shape , dtype = bool ))
5389+
5390+ # no matches we are done
5391+ return Int64Index ([]).values
53495392
53505393 elif is_null_slice (k ):
53515394 # empty slice
5352- pass
5395+ indexer = _update_indexer ( None , indexer = indexer )
53535396
53545397 elif isinstance (k ,slice ):
53555398
53565399 # a slice, include BOTH of the labels
5357- ranges . append ( self ._get_level_indexer (k ,level = i ) )
5400+ indexer = _update_indexer ( _convert_to_indexer ( self ._get_level_indexer (k ,level = i , indexer = indexer )), indexer = indexer )
53585401 else :
53595402 # a single label
5360- ranges .append (self .get_loc_level (k ,level = i ,drop_level = False )[0 ])
5361-
5362- # identity
5363- if len (ranges ) == 0 :
5364- return slice (0 ,len (self ))
5365-
5366- elif len (ranges ) == 1 :
5367- return ranges [0 ]
5403+ indexer = _update_indexer (_convert_to_indexer (self .get_loc_level (k ,level = i ,drop_level = False )[0 ]), indexer = indexer )
53685404
5369- # construct a boolean indexer if we have a slice or boolean indexer
5370- return reduce (np .logical_and ,[ _convert_indexer (r ) for r in ranges ])
5405+ # empty indexer
5406+ if indexer is None :
5407+ return Int64Index ([]).values
5408+ return indexer .values
53715409
53725410 def truncate (self , before = None , after = None ):
53735411 """
0 commit comments