1. 选取多个DataFrame列

  1. # 用列表选取多个列
  2. In[2]: movie = pd.read_csv('data/movie.csv')
  3. movie_actor_director = movie[['actor_1_name', 'actor_2_name', 'actor_3_name', 'director_name']]
  4. movie_actor_director.head()
  5. Out[2]:

1. 选取多个DataFrame列 - 图1

  1. # 选取单列
  2. In[3]: movie[['director_name']].head()
  3. Out[3]:

1. 选取多个DataFrame列 - 图2

  1. # 错误的选取多列的方式
  2. In[4]: movie['actor_1_name', 'actor_2_name', 'actor_3_name', 'director_name']
  3. ---------------------------------------------------------------------------
  4. KeyError Traceback (most recent call last)
  5. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
  6. 2441 try:
  7. -> 2442 return self._engine.get_loc(key)
  8. 2443 except KeyError:
  9. pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()
  10. pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()
  11. pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()
  12. pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()
  13. KeyError: ('actor_1_name', 'actor_2_name', 'actor_3_name', 'director_name')
  14. During handling of the above exception, another exception occurred:
  15. KeyError Traceback (most recent call last)
  16. <ipython-input-4-954222273e42> in <module>()
  17. ----> 1 movie['actor_1_name', 'actor_2_name', 'actor_3_name', 'director_name']
  18. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
  19. 1962 return self._getitem_multilevel(key)
  20. 1963 else:
  21. -> 1964 return self._getitem_column(key)
  22. 1965
  23. 1966 def _getitem_column(self, key):
  24. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
  25. 1969 # get column
  26. 1970 if self.columns.is_unique:
  27. -> 1971 return self._get_item_cache(key)
  28. 1972
  29. 1973 # duplicate columns & possible reduce dimensionality
  30. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
  31. 1643 res = cache.get(item)
  32. 1644 if res is None:
  33. -> 1645 values = self._data.get(item)
  34. 1646 res = self._box_item_values(item, values)
  35. 1647 cache[item] = res
  36. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
  37. 3588
  38. 3589 if not isnull(item):
  39. -> 3590 loc = self.items.get_loc(item)
  40. 3591 else:
  41. 3592 indexer = np.arange(len(self.items))[isnull(self.items)]
  42. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
  43. 2442 return self._engine.get_loc(key)
  44. 2443 except KeyError:
  45. -> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
  46. 2445
  47. 2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
  48. pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()
  49. pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()
  50. pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()
  51. pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()
  52. KeyError: ('actor_1_name', 'actor_2_name', 'actor_3_name', 'director_name')

更多

  1. # 将列表赋值给一个变量,便于多选
  2. In[6]: cols =['actor_1_name', 'actor_2_name', 'actor_3_name', 'director_name']
  3. movie_actor_director = movie[cols]
  4. Out[6]: float64 13
  5. int64 3
  6. object 11
  7. dtype: int64
  1. # 使用select_dtypes(),选取整数列
  2. In[7]: movie.select_dtypes(include=['int']).head()
  3. Out[7]:

1. 选取多个DataFrame列 - 图3

  1. # 选取所有的数值列
  2. In[8]: movie.select_dtypes(include=['number']).head()
  3. Out[8]:

1. 选取多个DataFrame列 - 图4

  1. # 通过filter()函数过滤选取多列
  2. In[9]: movie.filter(like='facebook').head()
  3. Out[9]:

1. 选取多个DataFrame列 - 图5

  1. # 通过正则表达式选取多列
  2. In[10]: movie.filter(regex='\d').head()
  3. Out[10]:

1. 选取多个DataFrame列 - 图6

  1. # filter()函数,传递列表到参数items,选取多列
  2. In[11]: movie.filter(items=['actor_1_name', 'asdf']).head()
  3. Out[11]:

1. 选取多个DataFrame列 - 图7