1. 检查索引

  1. # 读取college数据集,提取所有的列
  2. In[2]: college = pd.read_csv('data/college.csv')
  3. columns = college.columns
  4. columns
  5. Out[2]: Index(['INSTNM', 'CITY', 'STABBR', 'HBCU', 'MENONLY', 'WOMENONLY', 'RELAFFIL',
  6. 'SATVRMID', 'SATMTMID', 'DISTANCEONLY', 'UGDS', 'UGDS_WHITE',
  7. 'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN', 'UGDS_AIAN', 'UGDS_NHPI',
  8. 'UGDS_2MOR', 'UGDS_NRA', 'UGDS_UNKN', 'PPTUG_EF', 'CURROPER', 'PCTPELL',
  9. 'PCTFLOAN', 'UG25ABV', 'MD_EARN_WNE_P10', 'GRAD_DEBT_MDN_SUPP'], dtype='object')
  1. # 用values属性,访问底层的NumPy数组
  2. In[3]: columns.values
  3. Out[3]: array(['INSTNM', 'CITY', 'STABBR', 'HBCU', 'MENONLY', 'WOMENONLY',
  4. 'RELAFFIL', 'SATVRMID', 'SATMTMID', 'DISTANCEONLY', 'UGDS',
  5. 'UGDS_WHITE', 'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN', 'UGDS_AIAN',
  6. 'UGDS_NHPI', 'UGDS_2MOR', 'UGDS_NRA', 'UGDS_UNKN', 'PPTUG_EF',
  7. 'CURROPER', 'PCTPELL', 'PCTFLOAN', 'UG25ABV', 'MD_EARN_WNE_P10',
  8. 'GRAD_DEBT_MDN_SUPP'], dtype=object)
  1. # 取出该数组的第6个值
  2. In[4]: columns[5]
  3. Out[4]: 'WOMENONLY'
  1. # 取出该数组的第2\9\11
  2. In[5]: columns[[1,8,10]]
  3. Out[5]: Index(['CITY', 'SATMTMID', 'UGDS'], dtype='object')
  1. # 逆序切片选取
  2. In[6]: columns[-7:-4]
  3. Out[6]: Index(['PPTUG_EF', 'CURROPER', 'PCTPELL'], dtype='object')
  1. # 索引有许多和Series和DataFrame相同的方法
  2. In[7]: columns.min(), columns.max(), columns.isnull().sum()
  3. Out[7]: ('CITY', 'WOMENONLY', 0)
  1. # 索引对象可以直接通过字符串方法修改
  2. In[8]: columns + '_A'
  3. Out[8]: Index(['INSTNM_A', 'CITY_A', 'STABBR_A', 'HBCU_A', 'MENONLY_A', 'WOMENONLY_A',
  4. 'RELAFFIL_A', 'SATVRMID_A', 'SATMTMID_A', 'DISTANCEONLY_A', 'UGDS_A',
  5. 'UGDS_WHITE_A', 'UGDS_BLACK_A', 'UGDS_HISP_A', 'UGDS_ASIAN_A',
  6. 'UGDS_AIAN_A', 'UGDS_NHPI_A', 'UGDS_2MOR_A', 'UGDS_NRA_A',
  7. 'UGDS_UNKN_A', 'PPTUG_EF_A', 'CURROPER_A', 'PCTPELL_A', 'PCTFLOAN_A',
  8. 'UG25ABV_A', 'MD_EARN_WNE_P10_A', 'GRAD_DEBT_MDN_SUPP_A'],
  9. dtype='object')
  1. # 索引对象也可以通过比较运算符,得到布尔索引
  2. In[9]: columns > 'G'
  3. Out[9]: array([ True, False, True, True, True, True, True, True, True,
  4. False, True, True, True, True, True, True, True, True,
  5. True, True, True, False, True, True, True, True, True], dtype=bool)
  1. # 尝试用赋值的方法,修改索引对象的一个值,会导致类型错误,因为索引对象是不可变类型
  2. In[10]: columns[1] = 'city'
  3. ---------------------------------------------------------------------------
  4. TypeError Traceback (most recent call last)
  5. <ipython-input-10-1e9e8e3125de> in <module>()
  6. ----> 1 columns[1] = 'city'
  7. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in __setitem__(self, key, value)
  8. 1668
  9. 1669 def __setitem__(self, key, value):
  10. -> 1670 raise TypeError("Index does not support mutable operations")
  11. 1671
  12. 1672 def __getitem__(self, key):
  13. TypeError: Index does not support mutable operations

更多

索引对象支持集合运算:联合、交叉、求差、对称差

  1. # 切片
  2. In[11]: c1 = columns[:4]
  3. c1
  4. Out[11]: Index(['INSTNM', 'CITY', 'STABBR', 'HBCU'], dtype='object')
  5. In[12]: c2 = columns[2:5]
  6. c2
  7. Out[12]: Index(['STABBR', 'HBCU', 'MENONLY'], dtype='object')
  1. # 联合
  2. In[13]: c1.union(c2)
  3. Out[13]: Index(['CITY', 'HBCU', 'INSTNM', 'MENONLY', 'STABBR'], dtype='object')
  4. In[14]: c1 | c2
  5. Out[14]: Index(['CITY', 'HBCU', 'INSTNM', 'MENONLY', 'STABBR'], dtype='object')
  1. # 对称差
  2. In[15]: c1.symmetric_difference(c2)
  3. Out[15]: Index(['CITY', 'INSTNM', 'MENONLY'], dtype='object')
  4. In[16]: c1 ^ c2
  5. Out[16]: Index(['CITY', 'INSTNM', 'MENONLY'], dtype='object')