10. 创建、删除列

  1. # 通过[列名]添加新列
  2. In[96]: movie = pd.read_csv('data/movie.csv')
  3. In[97]: movie['has_seen'] = 0
  4. In[98]: movie.columns
  5. Out[98]: Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
  6. 'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
  7. 'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
  8. 'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
  9. 'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
  10. 'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
  11. 'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
  12. 'imdb_score', 'aspect_ratio', 'movie_facebook_likes', 'has_seen'],
  13. dtype='object')
  1. # 给新列赋值
  2. In[99]: movie['actor_director_facebook_likes'] = (movie['actor_1_facebook_likes'] +
  3. movie['actor_2_facebook_likes'] +
  4. movie['actor_3_facebook_likes'] +
  5. movie['director_facebook_likes'])
  6. In[100]: movie['actor_director_facebook_likes'].isnull().sum()
  7. Out[100]: 122
  1. # 用all()检查是否所有的布尔值都为True
  2. In[101]: movie['actor_director_facebook_likes'] = movie['actor_director_facebook_likes'].fillna(0)
  3. In[102]: movie['is_cast_likes_more'] = (movie['cast_total_facebook_likes'] >=
  4. movie['actor_director_facebook_likes'])
  5. In[103]: movie['is_cast_likes_more'].all()
  6. Out[103]: False
  7. In[104]: movie = movie.drop('actor_director_facebook_likes', axis='columns')
  8. In[105]: movie['actor_total_facebook_likes'] = (movie['actor_1_facebook_likes'] +
  9. movie['actor_2_facebook_likes'] +
  10. movie['actor_3_facebook_likes'])
  11. movie['actor_total_facebook_likes'] = movie['actor_total_facebook_likes'].fillna(0)
  12. In[106]: movie['is_cast_likes_more'] = movie['cast_total_facebook_likes'] >= \
  13. movie['actor_total_facebook_likes']
  14. movie['is_cast_likes_more'].all()
  15. Out[106]: True
  1. In[107]: movie['pct_actor_cast_like'] = (movie['actor_total_facebook_likes'] /
  2. movie['cast_total_facebook_likes'])
  3. In[108]: movie['pct_actor_cast_like'].min(), movie['pct_actor_cast_like'].max()
  4. Out[108]: (0.0, 1.0)
  5. In[109]: movie.set_index('movie_title')['pct_actor_cast_like'].head()
  6. Out[109]: movie_title
  7. Avatar 0.577369
  8. Pirates of the Caribbean: At World's End 0.951396
  9. Spectre 0.987521
  10. The Dark Knight Rises 0.683783
  11. Star Wars: Episode VII - The Force Awakens 0.000000
  12. Name: pct_actor_cast_like, dtype: float64

更多

  1. # 用insert()方法原地插入列
  2. In[110]: profit_index = movie.columns.get_loc('gross') + 1
  3. profit_index
  4. In[111]: movie.insert(loc=profit_index,
  5. column='profit',
  6. value=movie['gross'] - movie['budget'])
  7. In[112]: movie.head()
  8. Out[112]:

10. 创建、删除列 - 图1