3、索引、选取和过滤

1.png

2.png

3.png

4.png

5.png

6.png

7.png

8.png

9.png

10.png

12.png
4、算术运算和数据对齐

1.png
由上面可以看到,自动的数据对齐操作在不重叠的索引处引入NA值。

2.png
从上面看出,相加后会返回一个新的DataFrame,其索引和列为原来两个DataFrame的并集。
5、在算术方法中填充值

3.png

4.png

5.png

6.png
6、DataFrame和Series之间的运算

7.png

行廣播.png

列廣播.png

8.png

9.png

10.png

11.png
源码如下:
# In[1]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
s1 = Series([2,9,7],index=['a','b','c'])
print(s1)
# In[2]:
s2 = Series([5,7,11,3],index=['a','c','g','f'])
print(s2)
# In[3]:
# 相加
sm = s1 + s2
print (sm)
# In[4]:
df1 = DataFrame(np.arange(9).reshape((3,3)),columns=list('bcd'),
index=['Guangdong','Shandong','Henan'])
print(df1)
# In[5]:
df2 = DataFrame(np.arange(12).reshape((4,3)),columns=list('bde'),
index=['Hainan','Shandong','Henan','Anhui'])
print(df2)
# In[6]:
sm2 = df1 + df2
print(sm2)
# In[7]:
df1 = DataFrame(np.arange(12).reshape((3,4)),columns=list('abcd'))
print(df1)
# In[8]:
df2 = DataFrame(np.arange(20).reshape((4,5)),columns=list('abcde'))
print(df2)
# In[9]:
df = df1 + df2
print(df)
# In[10]:
ad = df1.add(df2,fill_value=0)
print(ad)
# In[11]:
re = df1.reindex(columns=df2.columns,fill_value=0)
print(re)
# In[12]:
arr = np.arange(12).reshape((3,4))
print(arr)
# In[13]:
print(arr[0])
# In[14]:
di = arr - arr[0]
print(di)
# In[15]:
frame = DataFrame(np.arange(12).reshape((4,3)),columns=list('bde'),
index=['Guangdong','Shandong','Henan','Sichuan'])
print(frame)
# In[16]:
series = frame.ix[0]
print(series)
# In[17]:
fr_se = frame - series
print(fr_se)
# In[18]:
series2 = Series(range(3),index=['b','e','f'])
fr_se2 = frame + series2
print(fr_se2)
# In[19]:
# 匹配行在列上广播
print(frame)
# In[20]:
series3 = frame['d']
print(series3)
# In[21]:
# sub表示减法,即frame-series3
# axis=0表示传入的轴号是需要匹配的轴
su = frame.sub(series3,axis=0)
print(su)
# In[22]:
obj = Series(np.arange(4),index=['a','b','c','d'])
print(obj)
# In[23]:
# 索引具体标签后的值
print(obj['b'])
# In[24]:
# 索引整数等价于obj['b']
print(obj[1])
# In[25]:
# 类似于NunmPy数组的索引
print(obj[1:3])
# In[26]:
print(obj[[1,3]])
# In[27]:
print(obj[['b','a','d']])
# In[28]:
print(obj[obj<3])
# In[29]:
print(obj['b':'c'])
# In[30]:
# 赋值
obj['b':'c'] = 5
print(obj)
# In[32]:
# DataFrame索引
data = DataFrame(np.arange(16).reshape((4,4)),
index=['Guangdong','Shandong','Henan','Sichuan'],
columns=['one','two','three','four'])
print(data)
# In[33]:
print(data['two'])
# In[34]:
print(data[['three','one']])
# In[35]:
# 通过切片选取行
print(data[:2])
# In[36]:
# 通过布尔型数组选取行
bool = data[data['three'] > 5]
print(bool)
# In[37]:
# 通过布尔型DataFrame进行索引:
print(data < 5 )
# In[38]:
# 通过布尔型DataFrame结果进行赋值
data[data < 5] = 0
print(data)
# In[40]:
print(data)
# In[41]:
# 引入索引字段ix
i = data.ix['Henan',['two','four']]
print(i)
# In[42]:
i2 = data.ix[['Shandong','Sichuan'],[1,2,3]]
print(i2)
# In[43]:
# 引入索引字段ix索引行
i3 = data.ix[3]
print(i3)
# In[46]:
# 使用ix索引一定范围
i4 = data.ix[:'Henan','three']
print(i4)
# In[47]:
i5 = data.ix[data.three > 5, :3]
print(i5)