多维Index
data = pd.Series(np.random.randn(9),
index = [list('aaabbbccc'), [int(i) for i in list('123123123')]])
data
Out[305]:
a 1 -1.241109
2 -0.773149
3 -0.768199
b 1 0.033064
2 -0.572366
3 -0.058906
c 1 0.380905
2 0.191739
3 -1.165568
dtype: float64
data['a']
Out[306]:
1 -1.241109
2 -0.773149
3 -0.768199
dtype: float64
data[1]
Out[307]: -0.7731491959372425
data['a'][1]
Out[311]: -1.2411092607532435
data.loc[:,2]
Out[312]:
a -0.773149
b -0.572366
c 0.191739
dtype: float64
data.loc['a', 1]
Out[313]: -1.2411092607532435
Columns和Rows都可以创建多层Index
data = DataFrame(np.arange(12).reshape((4, 3)),
index = [list('aabb'), [1, 2, 1, 2]],
columns = [['ohio', 'ohio', 'colorado'],
['green', 'red', 'green']])
data
Out[316]:
ohio colorado
green red green
a 1 0 1 2
2 3 4 5
b 1 6 7 8
2 9 10 11
每层的index也可以拥有自己的名字
data.index.names
Out[318]: FrozenList([None, None])
data.index.names = ['key1', 'key2']
data.columns.names
Out[320]: FrozenList([None, None])
data.columns.names = ['state', 'color']
data
Out[322]:
state ohio colorado
color green red green
key1 key2
a 1 0 1 2
2 3 4 5
b 1 6 7 8
2 9 10 11
Index排序
data.sort_index(level = 0)
Out[325]:
state ohio colorado
color green red green
key1 key2
a 1 0 1 2
2 3 4 5
b 1 6 7 8
2 9 10 11
data.sort_index(level = 1)
Out[326]:
state ohio colorado
color green red green
key1 key2
a 1 0 1 2
b 1 6 7 8
a 2 3 4 5
b 2 9 10 11
根据Level聚合
data.sum(level = 'key2')
Out[327]:
state ohio colorado
color green red green
key2
1 6 8 10
2 12 14 16
data.sum(level = 'state', axis = 1)
Out[329]:
state ohio colorado
key1 key2
a 1 1 2
2 7 5
b 1 13 8
2 19 11
表连接
df1 = DataFrame({'k1': list('ababcbaba'), 'v1': range(9)})
df2 = DataFrame({'k2': list('abd'), 'v2': range(3)})
pd.merge(df1, df2, how = 'left', left_on = 'k1', right_on = 'k2')
df1 = DataFrame({'k': list('ababcbaba'), 'v1': range(9)})
df2 = DataFrame({'k': list('abd'), 'v2': range(3)})
pd.merge(df1, df2, how = 'inner', on = 'k')
Out[343]:
k v1 v2
0 a 0 0
1 a 2 0
2 a 6 0
3 a 8 0
4 b 1 1
5 b 3 1
6 b 5 1
7 b 7 1
df1 = DataFrame({'k': list('ababcbaba'), 'v': range(9)})
df2 = DataFrame({'k': list('abd'), 'v': range(3)})
pd.merge(df1, df2, how = 'inner', on = 'k', suffixes = ('_left', '_right'))
Out[347]:
k v_left v_right
0 a 0 0
1 a 2 0
2 a 6 0
3 a 8 0
4 b 1 1
5 b 3 1
6 b 5 1
7 b 7 1
表拼接
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'],
columns=['one', 'two'])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'],
columns=['three', 'four'])
df1
Out[349]:
one two
a 0 1
b 2 3
c 4 5
df2
Out[350]:
three four
a 5 6
c 7 8
pd.concat([df1, df2], axis = 1, keys = ['level1', 'level2'])
Out[351]:
level1 level2
one two three four
a 0 1 5.0 6.0
b 2 3 NaN NaN
c 4 5 7.0 8.0
数据透视&逆透视
Index的行列转换
data = pd.DataFrame(np.arange(6).reshape((2, 3)),
index=pd.Index(['Ohio', 'Colorado'], name='state'),
columns=pd.Index(['one', 'two', 'three'], name='number'))
data
Out[354]:
number one two three
state
Ohio 0 1 2
Colorado 3 4 5
透视与逆透视
melt方法和pivot方法
data = DataFrame({'k1':list('aaabbbccc'),
'k2':list('xyzxyzxyz'),
'v':range(9)})
data
Out[360]:
k1 k2 v
0 a x 0
1 a y 1
2 a z 2
3 b x 3
4 b y 4
5 b z 5
6 c x 6
7 c y 7
8 c z 8
data.pivot('k1', 'k2', 'v')
Out[361]:
k2 x y z
k1
a 0 1 2
b 3 4 5
c 6 7 8
result.reset_index()
result.melt(id_vars = ['k1'], value_vars=['x', 'y', 'z'])
Out[398]:
k1 k2 value
0 a x 0
1 b x 3
2 c x 6
3 a y 1
4 b y 4
5 c y 7
6 a z 2
7 b z 5
8 c z 8