7. 串联Series方法

  1. # value_counts().head(3),计数,查看前三
  2. In[76]: movie = pd.read_csv('data/movie.csv')
  3. actor_1_fb_likes = movie['actor_1_facebook_likes']
  4. director = movie['director_name']
  5. In[77]: director.value_counts().head(3)
  6. Out[77]: Steven Spielberg 26
  7. Woody Allen 22
  8. Clint Eastwood 20
  9. Name: director_name, dtype: int64
  1. # 统计缺失值的数量
  2. In[78]: actor_1_fb_likes.isnull().sum()
  3. Out[78]: 7
  1. # actor_1_fb_likes的数据类型
  2. In[79]: actor_1_fb_likes.dtype
  3. Out[79]: dtype('float64')
  1. # 缺失值填充为0、转换为整型、查看前五
  2. In[80]: actor_1_fb_likes.fillna(0)\
  3. .astype(int)\
  4. .head()
  5. Out[80]: 0 1000
  6. 1 40000
  7. 2 11000
  8. 3 27000
  9. 4 131
  10. Name: actor_1_facebook_likes, dtype: int64

更多

  1. # 缺失值的比例
  2. In[81]: actor_1_fb_likes.isnull().mean()
  3. Out[81]: 0.0014239218877135883
  1. # 使用括号串联
  2. In[82]: (actor_1_fb_likes.fillna(0)
  3. .astype(int)
  4. .head())
  5. Out[82]: 0 1000
  6. 1 40000
  7. 2 11000
  8. 3 27000
  9. 4 131
  10. Name: actor_1_facebook_likes, dtype: int64