@@ -105,6 +105,7 @@ class Index(IndexOpsMixin, PandasObject):
105105 _is_numeric_dtype = False
106106
107107 _engine_type = _index .ObjectEngine
108+ _isin_type = lib .ismember
108109
109110 def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False ,
110111 tupleize_cols = True , ** kwargs ):
@@ -1838,7 +1839,7 @@ def isin(self, values, level=None):
18381839 value_set = set (values )
18391840 if level is not None :
18401841 self ._validate_index_level (level )
1841- return lib . ismember (np .array (self ), value_set )
1842+ return self . _isin_type (np .array (self ), value_set )
18421843
18431844 def _can_reindex (self , indexer ):
18441845 """
@@ -3381,6 +3382,7 @@ class Int64Index(NumericIndex):
33813382 _outer_indexer = _algos .outer_join_indexer_int64
33823383
33833384 _engine_type = _index .Int64Engine
3385+ _isin_type = lib .ismember_int64
33843386
33853387 def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False , ** kwargs ):
33863388
@@ -5237,13 +5239,39 @@ def partial_selection(key, indexer=None):
52375239 indexer = self ._get_level_indexer (key , level = level )
52385240 return indexer , maybe_droplevels (indexer , [level ], drop_level )
52395241
5240- def _get_level_indexer (self , key , level = 0 ):
5241- # return a boolean indexer or a slice showing where the key is
5242+ def _get_level_indexer (self , key , level = 0 , indexer = None ):
5243+ # return an indexer, boolean array or a slice showing where the key is
52425244 # in the totality of values
5245+ # if the indexer is provided, then use this
52435246
52445247 level_index = self .levels [level ]
52455248 labels = self .labels [level ]
52465249
5250+ def convert_indexer (start , stop , step , indexer = indexer , labels = labels ):
5251+ # given the inputs and the labels/indexer, compute an indexer set
5252+ # if we have a provided indexer, then this need not consider
5253+ # the entire labels set
5254+
5255+ r = np .arange (start ,stop ,step )
5256+ if indexer is not None and len (indexer ) != len (labels ):
5257+
5258+ # we have an indexer which maps the locations in the labels that we
5259+ # have already selected (and is not an indexer for the entire set)
5260+ # otherwise this is wasteful
5261+ # so we only need to examine locations that are in this set
5262+ # the only magic here is that the result are the mappings to the
5263+ # set that we have selected
5264+ from pandas import Series
5265+ mapper = Series (indexer )
5266+ result = Series (Index (labels .take (indexer )).isin (r ).nonzero ()[0 ])
5267+ m = result .map (mapper ).values
5268+
5269+ else :
5270+ m = np .zeros (len (labels ),dtype = bool )
5271+ m [np .in1d (labels ,r ,assume_unique = True )] = True
5272+
5273+ return m
5274+
52475275 if isinstance (key , slice ):
52485276 # handle a slice, returnig a slice if we can
52495277 # otherwise a boolean indexer
@@ -5269,17 +5297,13 @@ def _get_level_indexer(self, key, level=0):
52695297 # a partial date slicer on a DatetimeIndex generates a slice
52705298 # note that the stop ALREADY includes the stopped point (if
52715299 # it was a string sliced)
5272- m = np .zeros (len (labels ),dtype = bool )
5273- m [np .in1d (labels ,np .arange (start .start ,stop .stop ,step ))] = True
5274- return m
5300+ return convert_indexer (start .start ,stop .stop ,step )
52755301
52765302 elif level > 0 or self .lexsort_depth == 0 or step is not None :
52775303 # need to have like semantics here to right
52785304 # searching as when we are using a slice
52795305 # so include the stop+1 (so we include stop)
5280- m = np .zeros (len (labels ),dtype = bool )
5281- m [np .in1d (labels ,np .arange (start ,stop + 1 ,step ))] = True
5282- return m
5306+ return convert_indexer (start ,stop + 1 ,step )
52835307 else :
52845308 # sorted, so can return slice object -> view
52855309 i = labels .searchsorted (start , side = 'left' )
@@ -5317,59 +5341,73 @@ def get_locs(self, tup):
53175341 raise KeyError ('MultiIndex Slicing requires the index to be fully lexsorted'
53185342 ' tuple len ({0}), lexsort depth ({1})' .format (len (tup ), self .lexsort_depth ))
53195343
5320- def _convert_indexer (r ):
5344+ # indexer
5345+ # this is the list of all values that we want to select
5346+ n = len (self )
5347+ indexer = None
5348+
5349+ def _convert_to_indexer (r ):
5350+ # return an indexer
53215351 if isinstance (r , slice ):
5322- m = np .zeros (len ( self ) ,dtype = bool )
5352+ m = np .zeros (n ,dtype = bool )
53235353 m [r ] = True
5324- return m
5325- return r
5354+ r = m .nonzero ()[0 ]
5355+ elif is_bool_indexer (r ):
5356+ if len (r ) != n :
5357+ raise ValueError ("cannot index with a boolean indexer that is"
5358+ " not the same length as the index" )
5359+ r = r .nonzero ()[0 ]
5360+ return Int64Index (r )
5361+
5362+ def _update_indexer (idxr , indexer = indexer ):
5363+ if indexer is None :
5364+ indexer = Index (np .arange (n ))
5365+ if idxr is None :
5366+ return indexer
5367+ return indexer & idxr
53265368
5327- ranges = []
53285369 for i ,k in enumerate (tup ):
53295370
53305371 if is_bool_indexer (k ):
53315372 # a boolean indexer, must be the same length!
53325373 k = np .asarray (k )
5333- if len (k ) != len (self ):
5334- raise ValueError ("cannot index with a boolean indexer that is"
5335- " not the same length as the index" )
5336- ranges .append (k )
5374+ indexer = _update_indexer (_convert_to_indexer (k ), indexer = indexer )
5375+
53375376 elif is_list_like (k ):
53385377 # a collection of labels to include from this level (these are or'd)
5339- indexers = []
5378+ indexers = None
53405379 for x in k :
53415380 try :
5342- indexers .append (_convert_indexer (self ._get_level_indexer (x , level = i )))
5381+ idxrs = _convert_to_indexer (self ._get_level_indexer (x , level = i , indexer = indexer ))
5382+ indexers = idxrs if indexers is None else indexers | idxrs
53435383 except (KeyError ):
53445384
53455385 # ignore not founds
53465386 continue
5347- if len (k ):
5348- ranges .append (reduce (np .logical_or , indexers ))
5387+
5388+ if indexers is not None :
5389+ indexer = _update_indexer (indexers , indexer = indexer )
53495390 else :
5350- ranges .append (np .zeros (self .labels [i ].shape , dtype = bool ))
5391+
5392+ # no matches we are done
5393+ return Int64Index ([]).values
53515394
53525395 elif is_null_slice (k ):
53535396 # empty slice
5354- pass
5397+ indexer = _update_indexer ( None , indexer = indexer )
53555398
53565399 elif isinstance (k ,slice ):
53575400
53585401 # a slice, include BOTH of the labels
5359- ranges . append ( self ._get_level_indexer (k ,level = i ) )
5402+ indexer = _update_indexer ( _convert_to_indexer ( self ._get_level_indexer (k ,level = i , indexer = indexer )), indexer = indexer )
53605403 else :
53615404 # a single label
5362- ranges .append (self .get_loc_level (k ,level = i ,drop_level = False )[0 ])
5363-
5364- # identity
5365- if len (ranges ) == 0 :
5366- return slice (0 ,len (self ))
5367-
5368- elif len (ranges ) == 1 :
5369- return ranges [0 ]
5405+ indexer = _update_indexer (_convert_to_indexer (self .get_loc_level (k ,level = i ,drop_level = False )[0 ]), indexer = indexer )
53705406
5371- # construct a boolean indexer if we have a slice or boolean indexer
5372- return reduce (np .logical_and ,[ _convert_indexer (r ) for r in ranges ])
5407+ # empty indexer
5408+ if indexer is None :
5409+ return Int64Index ([]).values
5410+ return indexer .values
53735411
53745412 def truncate (self , before = None , after = None ):
53755413 """
0 commit comments