数据合并
import pandas as pd
data1 = pd.DataFrame( {'level':['a','b','c','d'], 'number':[1, 3, 5, 7]} )
data2 = pd.DataFrame( {'level':['a','b','c','e'], 'number':[2, 3, 6, 10]} )
# 默认inner join内连接
print(pd.merge(data1,data2, on = 'level'))
# 左连接
print(pd.merge(data1, data2, how = 'left', on = 'level'))
数据去重
data=pd.DataFrame({'a':[1, 3, 3, 4],'b':[1, 3, 3, 5]} )
print(data.duplicated())
print(data.drop_duplicates())
数据分组
data=[11,15,18,20,25,26,27,24]
bins=[15,20,25]
print(pd.cut(data,bins))
print(pd.cut(data,bins).labels)
# 显示分段标签
print(pd.cut(data,bins).levels)
# 显示每个分段区间的个数
print(pd.value_counts(pd.cut(data,bins)))