2. 对列名进行排序

  1. # 读取movie数据集
  2. In[12]: movie = pd.read_csv('data/movie.csv')
  3. In[13]: movie.head()
  4. Out[13]:

2. 对列名进行排序 - 图1

  1. # 打印列索引
  2. In[14]: movie.columns
  3. Out[14]: Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
  4. 'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
  5. 'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
  6. 'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
  7. 'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
  8. 'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
  9. 'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
  10. 'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
  11. dtype='object')
  1. # 将列索引按照指定的顺序排列
  2. In[15]: disc_core = ['movie_title','title_year', 'content_rating','genres']
  3. disc_people = ['director_name','actor_1_name', 'actor_2_name','actor_3_name']
  4. disc_other = ['color','country','language','plot_keywords','movie_imdb_link']
  5. cont_fb = ['director_facebook_likes','actor_1_facebook_likes','actor_2_facebook_likes',
  6. 'actor_3_facebook_likes', 'cast_total_facebook_likes', 'movie_facebook_likes']
  7. cont_finance = ['budget','gross']
  8. cont_num_reviews = ['num_voted_users','num_user_for_reviews', 'num_critic_for_reviews']
  9. cont_other = ['imdb_score','duration', 'aspect_ratio', 'facenumber_in_poster']
  10. In[16]: new_col_order = disc_core + disc_people + disc_other + \
  11. cont_fb + cont_finance + cont_num_reviews + cont_other
  12. set(movie.columns) == set(new_col_order)
  13. Out[16]: True
  14. In[17]: movie2 = movie[new_col_order]
  15. movie2.head()
  16. Out[17]:

2. 对列名进行排序 - 图2