第04章选取数据子集 - 7. 按照字母切片 - 《Pandas Cookbook 带注释源码》

7. 按照字母切片

#  读取college数据集；尝试选取字母顺序在‘Sp’和‘Su’之间的学校
 In[57]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
         college.loc['Sp':'Su']
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
   3483             try:
-> 3484                 return self._searchsorted_monotonic(label, side)
   3485             except ValueError:
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in _searchsorted_monotonic(self, label, side)
   3442 
-> 3443         raise ValueError('index must be monotonic increasing or decreasing')
   3444 
ValueError: index must be monotonic increasing or decreasing
During handling of the above exception, another exception occurred:
KeyError                                  Traceback (most recent call last)
<ipython-input-57-c9f1c69a918b> in <module>()
      1 college = pd.read_csv('data/college.csv', index_col='INSTNM')
----> 2 college.loc['Sp':'Su']
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
   1326         else:
   1327             key = com._apply_if_callable(key, self.obj)
-> 1328             return self._getitem_axis(key, axis=0)
   1329 
   1330     def _is_scalar_access(self, key):
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1504         if isinstance(key, slice):
   1505             self._has_valid_type(key, axis)
-> 1506             return self._get_slice_axis(key, axis=axis)
   1507         elif is_bool_indexer(key):
   1508             return self._getbool_axis(key, axis=axis)
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis)
   1354         labels = obj._get_axis(axis)
   1355         indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop,
-> 1356                                        slice_obj.step, kind=self.name)
   1357 
   1358         if isinstance(indexer, slice):
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in slice_indexer(self, start, end, step, kind)
   3348         """
   3349         start_slice, end_slice = self.slice_locs(start, end, step=step,
-> 3350                                                  kind=kind)
   3351 
   3352         #  return a slice
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in slice_locs(self, start, end, step, kind)
   3536         start_slice = None
   3537         if start is not None:
-> 3538             start_slice = self.get_slice_bound(start, 'left', kind)
   3539         if start_slice is None:
   3540             start_slice = 0
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
   3485             except ValueError:
   3486                 #  raise the original KeyError
-> 3487                 raise err
   3488 
   3489         if isinstance(slc, np.ndarray):
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_slice_bound(self, label, side, kind)
   3479         #  we need to look up the label
   3480         try:
-> 3481             slc = self._get_loc_only_exact_matches(label)
   3482         except KeyError as err:
   3483             try:
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in _get_loc_only_exact_matches(self, key)
   3448         get_slice_bound.
   3449         """
-> 3450         return self.get_loc(key)
   3451 
   3452     def get_slice_bound(self, label, side, kind):
/Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2442                 return self._engine.get_loc(key)
   2443             except KeyError:
-> 2444                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2445 
   2446         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()
KeyError: 'Sp'

#  对college进行排序
 In[58]: college = college.sort_index()
 In[59]: college = college.head()
Out[59]:

#  再尝试选取字母顺序在‘Sp’和‘Su’之间的学校
 In[60]: pd.options.display.max_rows = 6
 In[61]: college.loc['Sp':'Su']
Out[61]:

#  可以用is_monotonic_increasing或is_monotonic_decreasing检测字母排序的顺序
 In[62]: college = college.sort_index(ascending=False)
         college.index.is_monotonic_decreasing
Out[62]: True

#  字母逆序选取
 In[63]: college.loc['E':'B']
Out[63]: