5. 在DataFrame上使用运算符

  1. # college数据集的值既有数值也有对象,整数5不能与字符串相加
  2. In[37]: college = pd.read_csv('data/college.csv')
  3. college + 5
  4. ---------------------------------------------------------------------------
  5. TypeError Traceback (most recent call last)
  6. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/ops.py in na_op(x, y)
  7. 1175 result = expressions.evaluate(op, str_rep, x, y,
  8. -> 1176 raise_on_error=True, **eval_kwargs)
  9. 1177 except TypeError:
  10. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/computation/expressions.py in evaluate(op, op_str, a, b, raise_on_error, use_numexpr, **eval_kwargs)
  11. 210 return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
  12. --> 211 **eval_kwargs)
  13. 212 return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)
  14. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/computation/expressions.py in _evaluate_numexpr(op, op_str, a, b, raise_on_error, truediv, reversed, **eval_kwargs)
  15. 121 if result is None:
  16. --> 122 result = _evaluate_standard(op, op_str, a, b, raise_on_error)
  17. 123
  18. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/computation/expressions.py in _evaluate_standard(op, op_str, a, b, raise_on_error, **eval_kwargs)
  19. 63 with np.errstate(all='ignore'):
  20. ---> 64 return op(a, b)
  21. 65
  22. TypeError: must be str, not int
  23. During handling of the above exception, another exception occurred:
  24. TypeError Traceback (most recent call last)
  25. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, func, other, raise_on_error, try_cast, mgr)
  26. 1183 with np.errstate(all='ignore'):
  27. -> 1184 result = get_result(other)
  28. 1185
  29. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in get_result(other)
  30. 1152 else:
  31. -> 1153 result = func(values, other)
  32. 1154
  33. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/ops.py in na_op(x, y)
  34. 1201 with np.errstate(all='ignore'):
  35. -> 1202 result[mask] = op(xrav, y)
  36. 1203 else:
  37. TypeError: must be str, not int
  38. During handling of the above exception, another exception occurred:
  39. TypeError Traceback (most recent call last)
  40. <ipython-input-37-4749f68a2501> in <module>()
  41. 1 college = pd.read_csv('data/college.csv')
  42. ----> 2 college + 5
  43. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/ops.py in f(self, other, axis, level, fill_value)
  44. 1239 self = self.fillna(fill_value)
  45. 1240
  46. -> 1241 return self._combine_const(other, na_op)
  47. 1242
  48. 1243 f.__name__ = name
  49. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in _combine_const(self, other, func, raise_on_error)
  50. 3541 def _combine_const(self, other, func, raise_on_error=True):
  51. 3542 new_data = self._data.eval(func=func, other=other,
  52. -> 3543 raise_on_error=raise_on_error)
  53. 3544 return self._constructor(new_data)
  54. 3545
  55. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, **kwargs)
  56. 3195
  57. 3196 def eval(self, **kwargs):
  58. -> 3197 return self.apply('eval', **kwargs)
  59. 3198
  60. 3199 def quantile(self, **kwargs):
  61. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
  62. 3089
  63. 3090 kwargs['mgr'] = self
  64. -> 3091 applied = getattr(b, f)(**kwargs)
  65. 3092 result_blocks = _extend_blocks(applied, result_blocks)
  66. 3093
  67. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, func, other, raise_on_error, try_cast, mgr)
  68. 1189 raise
  69. 1190 except Exception as detail:
  70. -> 1191 result = handle_error()
  71. 1192
  72. 1193 # technically a broadcast error in numpy can 'work' by returning a
  73. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in handle_error()
  74. 1172 # The 'detail' variable is defined in outer scope.
  75. 1173 raise TypeError('Could not operate %s with block values %s' %
  76. -> 1174 (repr(other), str(detail))) # noqa
  77. 1175 else:
  78. 1176 # return the values
  79. TypeError: Could not operate 5 with block values must be str, not int
  1. # 行索引名设为INSTNM,用UGDS_过滤出本科生的种族比例
  2. In[38]: college = pd.read_csv('data/college.csv', index_col='INSTNM')
  3. college_ugds_ = college.filter(like='UGDS_')
  4. In[39]: college == 'asdf' # 这是jn上的,想要比较college和‘asdf’,没有意义,忽略
  5. ---------------------------------------------------------------------------
  6. TypeError Traceback (most recent call last)
  7. <ipython-input-39-697c8af60bcf> in <module>()
  8. ----> 1 college == 'asdf'
  9. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/ops.py in f(self, other)
  10. 1302 # straight boolean comparisions we want to allow all columns
  11. 1303 # (regardless of dtype to pass thru) See # 4537 for discussion.
  12. -> 1304 res = self._combine_const(other, func, raise_on_error=False)
  13. 1305 return res.fillna(True).astype(bool)
  14. 1306
  15. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in _combine_const(self, other, func, raise_on_error)
  16. 3541 def _combine_const(self, other, func, raise_on_error=True):
  17. 3542 new_data = self._data.eval(func=func, other=other,
  18. -> 3543 raise_on_error=raise_on_error)
  19. 3544 return self._constructor(new_data)
  20. 3545
  21. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, **kwargs)
  22. 3195
  23. 3196 def eval(self, **kwargs):
  24. -> 3197 return self.apply('eval', **kwargs)
  25. 3198
  26. 3199 def quantile(self, **kwargs):
  27. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
  28. 3089
  29. 3090 kwargs['mgr'] = self
  30. -> 3091 applied = getattr(b, f)(**kwargs)
  31. 3092 result_blocks = _extend_blocks(applied, result_blocks)
  32. 3093
  33. /Users/Ted/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in eval(self, func, other, raise_on_error, try_cast, mgr)
  34. 1203
  35. 1204 raise TypeError('Could not compare [%s] with block values' %
  36. -> 1205 repr(other))
  37. 1206
  38. 1207 # transpose if needed
  39. TypeError: Could not compare ['asdf'] with block values
  1. # 查看前5行
  2. In[40]: college_ugds_.head()
  3. Out[40]:

5. 在DataFrame上使用运算符 - 图1

  1. # 现在都是均质数据了,可以进行数值运算
  2. In[41]: college_ugds_.head() + .00501
  3. Out[41]:

5. 在DataFrame上使用运算符 - 图2

  1. # 用底除计算百分比分数
  2. In[42]: (college_ugds_.head() + .00501) // .01
  3. Out[42]:

5. 在DataFrame上使用运算符 - 图3

  1. # 再除以100
  2. In[43]: college_ugds_op_round = (college_ugds_ + .00501) // .01 / 100
  3. college_ugds_op_round.head()
  4. Out[43]:

5. 在DataFrame上使用运算符 - 图4

  1. # 保留两位小数
  2. In[44]: college_ugds_round = (college_ugds_ + .00001).round(2)
  3. college_ugds_round.head()
  4. Out[44]:

5. 在DataFrame上使用运算符 - 图5

  1. In[45]: .045 + .005
  2. Out[45]: 0.049999999999999996
  1. In[46]: college_ugds_op_round.equals(college_ugds_round)
  2. Out[46]: True

更多

  1. # DataFrame的通用函数也可以实现上述方法
  2. In[47]: college_ugds_op_round_methods = college_ugds_.add(.00501).floordiv(.01).div(100)