6. 在Series上使用运算符

  1. In[48]: pd.options.display.max_rows = 6
  2. In[49]: 5 + 9 # 加法
  3. Out[49]: 14
  1. In[50]: 4 ** 2 # 幂运算
  2. Out[50]: 16
  1. In[51]: a = 10 # 赋值
  1. In[52]: 5 <= 9 # 小于等于
  2. Out[52]: True
  1. In[53]: 'abcde' + 'fg' # 字符串拼接
  2. Out[53]: 'abcdefg'
  1. In[54]: not (5 <= 9) # 非运算符
  2. Out[54]: False
  1. In[55]: 7 in [1, 2, 6] # in运算符
  2. Out[55]: False
  1. In[56]: set([1,2,3]) & set([2,3,4]) # 求交集
  2. Out[56]: {2, 3}
  1. # 不支持列表和整数间的运算
  2. In[57]: [1, 2, 3] - 3
  3. ---------------------------------------------------------------------------
  4. TypeError Traceback (most recent call last)
  5. <ipython-input-57-7ca967348b32> in <module>()
  6. ----> 1 [1, 2, 3] - 3
  7. TypeError: unsupported operand type(s) for -: 'list' and 'int'
  1. In[58]: a = set([1,2,3])
  2. a[0] # 集合不支持索引

准备

  1. # 选取imdb_score这列
  2. In[59]: movie = pd.read_csv('data/movie.csv')
  3. imdb_score = movie['imdb_score']
  4. imdb_score
  5. Out[59]: 0 7.9
  6. 1 7.1
  7. 2 6.8
  8. ...
  9. 4913 6.3
  10. 4914 6.3
  11. 4915 6.6
  12. Name: imdb_score, Length: 4916, dtype: float64
  1. # 每列值加1
  2. In[60]: imdb_score + 1
  3. Out[60]: 0 8.9
  4. 1 8.1
  5. 2 7.8
  6. ...
  7. 4913 7.3
  8. 4914 7.3
  9. 4915 7.6
  10. Name: imdb_score, Length: 4916, dtype: float64
  1. # 每列值乘以2.5
  2. In[61]: imdb_score * 2.5
  3. Out[61]: 0 19.75
  4. 1 17.75
  5. 2 17.00
  6. ...
  7. 4913 15.75
  8. 4914 15.75
  9. 4915 16.50
  10. Name: imdb_score, Length: 4916, dtype: float64
  1. # 每列值除以7的余数
  2. In[62]: imdb_score // 7
  3. Out[62]: 0 1.0
  4. 1 1.0
  5. 2 0.0
  6. ...
  7. 4913 0.0
  8. 4914 0.0
  9. 4915 0.0
  10. Name: imdb_score, Length: 4916, dtype: float64
  1. # 判断是否大于7
  2. In[63]: imdb_score > 7
  3. Out[63]: 0 True
  4. 1 True
  5. 2 False
  6. ...
  7. 4913 False
  8. 4914 False
  9. 4915 False
  10. Name: imdb_score, Length: 4916, dtype: bool
  1. # 判断是否等于字符串
  2. In[64]: director = movie['director_name']
  3. In[65]: director == 'James Cameron'
  4. Out[65]: 0 True
  5. 1 False
  6. 2 False
  7. ...
  8. 4913 False
  9. 4914 False
  10. 4915 False
  11. Name: director_name, Length: 4916, dtype: bool

更多

  1. # 利用通用函数实现加法
  2. In[66]: imdb_score.add(1) # imdb_score + 1
  3. Out[66]: 0 8.9
  4. 1 8.1
  5. 2 7.8
  6. ...
  7. 4913 7.3
  8. 4914 7.3
  9. 4915 7.6
  10. Name: imdb_score, Length: 4916, dtype: float64
  1. # 利用通用函数实现乘法
  2. In[67]: imdb_score.mul(2.5) # imdb_score * 2.5
  3. Out[67]: 0 19.75
  4. 1 17.75
  5. 2 17.00
  6. ...
  7. 4913 15.75
  8. 4914 15.75
  9. 4915 16.50
  10. Name: imdb_score, Length: 4916, dtype: float64
  1. # 利用通用函数实现底除
  2. In[68]: imdb_score.floordiv(7) # imdb_score // 7
  3. Out[68]: 0 1.0
  4. 1 1.0
  5. 2 0.0
  6. ...
  7. 4913 0.0
  8. 4914 0.0
  9. 4915 0.0
  10. Name: imdb_score, Length: 4916, dtype: float64
  1. # 利用通用函数实现大于
  2. In[69]: imdb_score.gt(7) # imdb_score > 7
  3. Out[69]: 0 True
  4. 1 True
  5. 2 False
  6. ...
  7. 4913 False
  8. 4914 False
  9. 4915 False
  10. Name: imdb_score, Length: 4916, dtype: bool
  1. # 利用通用函数实现等于
  2. In[70]: director.eq('James Cameron') # director == 'James Cameron'
  3. Out[70]: 0 True
  4. 1 False
  5. 2 False
  6. ...
  7. 4913 False
  8. 4914 False
  9. 4915 False
  10. Name: director_name, Length: 4916, dtype: bool
  1. # 利用通用函数实现取模
  2. In[71]: imdb_score.astype(int).mod(5)
  3. Out[71]: 0 2
  4. 1 2
  5. 2 1
  6. ..
  7. 4913 1
  8. 4914 1
  9. 4915 1
  10. Name: imdb_score, Length: 4916, dtype: int64
  1. # a是int对象
  2. In[72]: a = type(1)
  3. In[73]: type(a)
  4. Out[73]: type
  1. # a是pandas.core.series.Series对象
  2. In[74]: a = type(imdb_score)
  3. In[75]: a([1,2,3])
  4. Out[75]: 0 1
  5. 1 2
  6. 2 3
  7. dtype: int64