8. 减肥对赌

  1. # 读取减肥数据集,查看一月的数据
  2. In[63]: weight_loss = pd.read_csv('data/weight_loss.csv')
  3. weight_loss.query('Month == "Jan"')
  4. Out[63]:

8. 减肥对赌 - 图1

  1. # 定义一个求减肥比例的函数
  2. In[64]: def find_perc_loss(s):
  3. return (s - s.iloc[0]) / s.iloc[0]
  4. # 查看Bob在一月的减肥成果
  5. In[65]: bob_jan = weight_loss.query('Name=="Bob" and Month=="Jan"')
  6. find_perc_loss(bob_jan['Weight'])
  7. Out[65]: 0 0.000000
  8. 2 -0.010309
  9. 4 -0.027491
  10. 6 -0.027491
  11. Name: Weight, dtype: float64
  1. # 对Name和Month进行分组,然后使用transform方法,传入函数,对数值进行转换
  2. In[66]: pcnt_loss = weight_loss.groupby(['Name', 'Month'])['Weight'].transform(find_perc_loss)
  3. pcnt_loss.head(8)
  4. Out[66]: 0 0.000000
  5. 1 0.000000
  6. 2 -0.010309
  7. 3 -0.040609
  8. 4 -0.027491
  9. 5 -0.040609
  10. 6 -0.027491
  11. 7 -0.035533
  12. Name: Weight, dtype: float64
  1. # transform之后的结果,行数不变,可以赋值给原始DataFrame作为一个新列;
  2. # 为了缩短输出,只选择Bob的前两个月数据
  3. In[67]: weight_loss['Perc Weight Loss'] = pcnt_loss.round(3)
  4. weight_loss.query('Name=="Bob" and Month in ["Jan", "Feb"]')
  5. Out[67]:

8. 减肥对赌 - 图2

  1. # 因为最重要的是每个月的第4周,只选择第4周的数据
  2. In[68]: week4 = weight_loss.query('Week == "Week 4"')
  3. week4
  4. Out[68]:

8. 减肥对赌 - 图3

  1. # 用pivot重构DataFrame,让Amy和Bob的数据并排放置
  2. In[69]: winner = week4.pivot(index='Month', columns='Name', values='Perc Weight Loss')
  3. winner
  4. Out[69]:

8. 减肥对赌 - 图4

  1. # 用where方法选出每月的赢家
  2. In[70]: winner['Winner'] = np.where(winner['Amy'] < winner['Bob'], 'Amy', 'Bob')
  3. winner.style.highlight_min(axis=1)
  4. Out[70]:

8. 减肥对赌 - 图5

  1. # 用value_counts()返回最后的比分
  2. In[71]: winner.Winner.value_counts()
  3. Out[71]: Amy 3
  4. Bob 1
  5. Name: Winner, dtype: int64

更多

  1. # Pandas默认是按字母排序的
  2. In[72]: week4a = week4.copy()
  3. month_chron = week4a['Month'].unique()
  4. month_chron
  5. Out[72]: array(['Jan', 'Feb', 'Mar', 'Apr'], dtype=object)
  1. # 转换为Categorical变量,可以做成按时间排序
  2. In[73]: week4a['Month'] = pd.Categorical(week4a['Month'],
  3. categories=month_chron,
  4. ordered=True)
  5. week4a.pivot(index='Month', columns='Name', values='Perc Weight Loss')
  6. Out[73]:

8. 减肥对赌 - 图6