In [20]: s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index = ['a', 'c', 'd', 'e'])
In [22]: s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],
...: index = ['a', 'c', 'e', 'f', 'g'])
In [23]: s1
Out[23]:
a 7.3
c -2.5
d 3.4
e 1.5
dtype: float64
In [24]: s2
Out[24]:
a -2.1
c 3.6
e -1.5
f 4.0
g 3.1
dtype: float64
In [25]: s1 + s2
Out[25]:
a 5.2
c 1.1
d NaN
e 0.0
f NaN
g NaN
dtype: float64
没有交叉值时为 NaN
In [27]: df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'),
...: index=['Ohio', 'Texas', 'Colorado'])
In [28]: df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),
...: index=['Utah', 'Ohio', 'Texas', 'Oregon'])
In [29]:
In [29]: df1
Out[29]:
b c d
Ohio 0.0 1.0 2.0
Texas 3.0 4.0 5.0
Colorado 6.0 7.0 8.0
In [30]: df2
Out[30]:
b d e
Utah 0.0 1.0 2.0
Ohio 3.0 4.0 5.0
Texas 6.0 7.0 8.0
Oregon 9.0 10.0 11.0
In [31]: df1 + df2
Out[31]:
b c d e
Colorado NaN NaN NaN NaN
Ohio 3.0 NaN 6.0 NaN
Oregon NaN NaN NaN NaN
Texas 9.0 NaN 12.0 NaN
Utah NaN NaN NaN NaN
使用填充值的算术方法
In [33]: df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)),
...: columns=list('adcd'))
In [34]: df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)),
...: columns=list('abcde'))
In [35]: df1 + df2
Out[35]:
a b c d d e
0 0.0 NaN 4.0 4.0 6.0 NaN
1 9.0 NaN 13.0 13.0 15.0 NaN
2 18.0 NaN 22.0 22.0 24.0 NaN
3 NaN NaN NaN NaN NaN NaN
In [37]: df1.add(df2, fill_value=0)
Out[37]:
a b c d d e
0 0.0 1.0 4.0 4.0 6.0 4.0
1 9.0 6.0 13.0 13.0 15.0 9.0
2 18.0 11.0 22.0 22.0 24.0 14.0
3 15.0 16.0 17.0 18.0 18.0 19.0
灵活算术方法
方法 |
描述 |
add, radd |
加法 |
sub, rsub |
减法 |
div, rdiv |
除法 |
floordiv, rfloordiv |
整除 |
mul, rmul |
乘法 |
pow, rpow |
幂次方 |
DataFrame 和 Series 间的操作
In [38]: arr = np.arange(12.).reshape((3, 4))
In [39]: arr
Out[39]:
array([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
In [40]: arr[0]
Out[40]: array([0., 1., 2., 3.])
In [41]: arr - arr[0]
Out[41]:
array([[0., 0., 0., 0.],
[4., 4., 4., 4.],
[8., 8., 8., 8.]])
广播机制
frame = pd.DataFrame(np.arange(12.).reshape((4, 3)),
...: columns=list('bde'),
...: index=['Utah', 'Ohio', 'Texas', 'Oregon'])
In [44]: series = frame.iloc[0]
In [45]: series
Out[45]:
b 0.0
d 1.0
e 2.0
Name: Utah, dtype: float64
In [46]: frame - series
Out[46]:
b d e
Utah 0.0 0.0 0.0
Ohio 3.0 3.0 3.0
Texas 6.0 6.0 6.0
Oregon 9.0 9.0 9.0
In [47]: series2 = pd.Series(range(3), index=['b', 'e', 'f'])
In [48]: series2
Out[48]:
b 0
e 1
f 2
dtype: int64
In [49]: frame + series2
Out[49]:
b d e f
Utah 0.0 NaN 3.0 NaN
Ohio 3.0 NaN 6.0 NaN
Texas 6.0 NaN 9.0 NaN
Oregon 9.0 NaN 12.0 NaN
在列上广播,行上匹配
In [50]: series3 = frame['d']
In [51]: frame
Out[51]:
b d e
Utah 0.0 1.0 2.0
Ohio 3.0 4.0 5.0
Texas 6.0 7.0 8.0
Oregon 9.0 10.0 11.0
In [52]: series3
Out[52]:
Utah 1.0
Ohio 4.0
Texas 7.0
Oregon 10.0
Name: d, dtype: float64
In [53]: frame.sub(series3, axis='index')
Out[53]:
b d e
Utah -1.0 0.0 1.0
Ohio -1.0 0.0 1.0
Texas -1.0 0.0 1.0
Oregon -1.0 0.0 1.0
axis 用于匹配的轴 axis='index' 或 axis=0