8. 确定大学校园多样性

  1. # US News给出的美国10所最具多样性的大学
  2. In[71]: pd.read_csv('data/college_diversity.csv', index_col='School')
  3. Out[71]:

8. 确定大学校园多样性 - 图1

  1. In[72]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
  2. college_ugds_ = college.filter(like='UGDS_')
  3. college_ugds_.head()
  4. Out[72]:

8. 确定大学校园多样性 - 图2

  1. In[73]: college_ugds_.isnull().sum(axis=1).sort_values(ascending=False).head()
  2. Out[73]: INSTNM
  3. Excel Learning Center-San Antonio South 9
  4. Philadelphia College of Osteopathic Medicine 9
  5. Assemblies of God Theological Seminary 9
  6. Episcopal Divinity School 9
  7. Phillips Graduate Institute 9
  8. dtype: int64
  1. # 如果所有列都是缺失值,则将其去除
  2. In[74]: college_ugds_ = college_ugds_.dropna(how='all')
  3. In[75]: college_ugds_.isnull().sum()
  4. Out[75]: UGDS_WHITE 0
  5. UGDS_BLACK 0
  6. UGDS_HISP 0
  7. UGDS_ASIAN 0
  8. ..
  9. UGDS_NHPI 0
  10. UGDS_2MOR 0
  11. UGDS_NRA 0
  12. UGDS_UNKN 0
  13. Length: 9, dtype: int64
  1. # 用大于或等于方法ge(),将DataFrame变为布尔值矩阵
  2. In[76]: college_ugds_.ge(.15).head()
  3. Out[76]:

8. 确定大学校园多样性 - 图3

  1. # 对所有True值求和
  2. In[77]: diversity_metric = college_ugds_.ge(.15).sum(axis='columns')
  3. diversity_metric.head()
  4. Out[77]: INSTNM
  5. Alabama A & M University 1
  6. University of Alabama at Birmingham 2
  7. Amridge University 3
  8. University of Alabama in Huntsville 1
  9. Alabama State University 1
  10. dtype: int64
  1. # 使用value_counts(),查看分布情况
  2. In[78]: diversity_metric.value_counts()
  3. Out[78]: 1 3042
  4. 2 2884
  5. 3 876
  6. 4 63
  7. 0 7
  8. 5 2
  9. dtype: int64
  1. # 查看哪些学校种群比例超过15%的数量多
  2. In[79]: diversity_metric.sort_values(ascending=False).head()
  3. Out[79]: INSTNM
  4. Regency Beauty Institute-Austin 5
  5. Central Texas Beauty College-Temple 5
  6. Sullivan and Cogliano Training Center 4
  7. Ambria College of Nursing 4
  8. Berkeley College-New York 4
  9. dtype: int64
  1. # 用loc()方法查看对应行索引的行
  2. In[80]: college_ugds_.loc[['Regency Beauty Institute-Austin',
  3. 'Central Texas Beauty College-Temple']]
  4. Out[80]:

8. 确定大学校园多样性 - 图4

  1. # 查看US News前五所最具多样性的大学在diversity_metric中的情况
  2. In[81]: us_news_top = ['Rutgers University-Newark',
  3. 'Andrews University',
  4. 'Stanford University',
  5. 'University of Houston',
  6. 'University of Nevada-Las Vegas']
  7. In[82]: diversity_metric.loc[us_news_top]
  8. Out[82]: INSTNM
  9. Rutgers University-Newark 4
  10. Andrews University 3
  11. Stanford University 3
  12. University of Houston 3
  13. University of Nevada-Las Vegas 3
  14. dtype: int64

更多

  1. # 可以用最大种群比例查看哪些学校最不具有多样性
  2. In[83]: college_ugds_.max(axis=1).sort_values(ascending=False).head(10)
  3. Out[83]: INSTNM
  4. Dewey University-Manati 1.0
  5. Yeshiva and Kollel Harbotzas Torah 1.0
  6. Mr Leon's School of Hair Design-Lewiston 1.0
  7. Dewey University-Bayamon 1.0
  8. ...
  9. Monteclaro Escuela de Hoteleria y Artes Culinarias 1.0
  10. Yeshiva Shaar Hatorah 1.0
  11. Bais Medrash Elyon 1.0
  12. Yeshiva of Nitra Rabbinical College 1.0
  13. Length: 10, dtype: float64
  1. # 查看Talmudical Seminary Oholei Torah哲学学校
  2. In[84]: college_ugds_.loc['Talmudical Seminary Oholei Torah']
  3. Out[84]: UGDS_WHITE 1.0
  4. UGDS_BLACK 0.0
  5. UGDS_HISP 0.0
  6. UGDS_ASIAN 0.0
  7. ...
  8. UGDS_NHPI 0.0
  9. UGDS_2MOR 0.0
  10. UGDS_NRA 0.0
  11. UGDS_UNKN 0.0
  12. Name: Talmudical Seminary Oholei Torah, Length: 9, dtype: float64
  1. # 查看是否有学校九个种族的比例都超过了1%
  2. In[85]: (college_ugds_ > .01).all(axis=1).any()
  3. Out[85]: True