7. 矩阵转置

  1. In[62]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
  2. college_ugds_ = college.filter(like='UGDS_')
  3. college_ugds_.head()
  4. Out[62]:

7. 矩阵转置 - 图1

  1. # count()返回非缺失值的个数
  2. In[63]: college_ugds_.count()
  3. Out[63]: UGDS_WHITE 6874
  4. UGDS_BLACK 6874
  5. UGDS_HISP 6874
  6. UGDS_ASIAN 6874
  7. ...
  8. UGDS_NHPI 6874
  9. UGDS_2MOR 6874
  10. UGDS_NRA 6874
  11. UGDS_UNKN 6874
  12. Length: 9, dtype: int64
  1. # axis默认设为0
  2. In[64]: college_ugds_.count(axis=0)
  3. Out[64]: UGDS_WHITE 6874
  4. UGDS_BLACK 6874
  5. UGDS_HISP 6874
  6. UGDS_ASIAN 6874
  7. ...
  8. UGDS_NHPI 6874
  9. UGDS_2MOR 6874
  10. UGDS_NRA 6874
  11. UGDS_UNKN 6874
  12. Length: 9, dtype: int64
  1. # 等价于axis='index'
  2. In[65]: college_ugds_.count(axis='index')
  3. Out[65]: UGDS_WHITE 6874
  4. UGDS_BLACK 6874
  5. UGDS_HISP 6874
  6. UGDS_ASIAN 6874
  7. ...
  8. UGDS_NHPI 6874
  9. UGDS_2MOR 6874
  10. UGDS_NRA 6874
  11. UGDS_UNKN 6874
  12. Length: 9, dtype: int64
  1. # 统计每行的非缺失值个数
  2. In[66]: college_ugds_.count(axis='columns').head()
  3. Out[66]: INSTNM
  4. Alabama A & M University 9
  5. University of Alabama at Birmingham 9
  6. Amridge University 9
  7. University of Alabama in Huntsville 9
  8. Alabama State University 9
  9. dtype: int64
  1. # 除了统计每行的非缺失值个数,也可以求和加以确认
  2. In[67]: college_ugds_.sum(axis='columns').head()
  3. Out[67]: INSTNM
  4. Alabama A & M University 1.0000
  5. University of Alabama at Birmingham 0.9999
  6. Amridge University 1.0000
  7. University of Alabama in Huntsville 1.0000
  8. Alabama State University 1.0000
  9. dtype: float64
  1. # 用中位数了解每列的分布
  2. In[68]: college_ugds_.median(axis='index')
  3. Out[68]: UGDS_WHITE 0.55570
  4. UGDS_BLACK 0.10005
  5. UGDS_HISP 0.07140
  6. UGDS_ASIAN 0.01290
  7. ...
  8. UGDS_NHPI 0.00000
  9. UGDS_2MOR 0.01750
  10. UGDS_NRA 0.00000
  11. UGDS_UNKN 0.01430
  12. Length: 9, dtype: float64

更多

  1. # 使用累积求和cumsum()可以很容易看到白人、黑人、西班牙裔的比例
  2. In[69]: college_ugds_cumsum = college_ugds_.cumsum(axis=1)
  3. college_ugds_cumsum.head()
  4. Out[69]:

7. 矩阵转置 - 图2

  1. # UGDS_HISP一列降序排列
  2. In[70]: college_ugds_cumsum.sort_values('UGDS_HISP', ascending=False)
  3. Out[70]:

7. 矩阵转置 - 图3