探索虚拟姓名数据
步骤1 导入必要的库
import pandas as pd
import numpy as np
步骤2 按照如下的元数据内容创建数据框
raw_data_1 = {
'subject_id': ['1', '2', '3', '4', '5'],
'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}
raw_data_2 = {
'subject_id': ['4', '5', '6', '7', '8'],
'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
raw_data_3 = {
'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
步骤3 将上述的数据框分别命名为data1, data2, data3
data1=pd.DataFrame(raw_data_1,columns=['subject_id','first_name','last_name'])
data2=pd.DataFrame(raw_data_2,columns=['subject_id','first_name','last_name'])
data3=pd.DataFrame(raw_data_3,columns=['subject_id','test_id'])
步骤4 将data1和data2两个数据框按照行的维度进行合并,命名为all_data
all_data=pd.concat([data1,data2])
print(all_data)
步骤5 将data1和data2两个数据框按照列的维度进行合并,命名为all_data_col
al_data_col=pd.concat([data1,data2],axis=1)
print(al_data_col)
步骤6 打印data3
print(data3)
步骤7 按照subject_id的值对all_data和data3作合并 merge
print(pd.merge(all_data,data3,on='subject_id'))
步骤8 对data1和data2按照subject_id作连接
print(data1)
print(data2)
print(pd.merge(data1,data2,on='subject_id',how='inner'))
步骤9 找到 data1 和 data2 合并之后的所有匹配结果
print(pd.merge(data1,data2,on='subject_id',how='outer'))
输出:
# 步骤4
subject_id first_name last_name
0 1 Alex Anderson
1 2 Amy Ackerman
2 3 Allen Ali
3 4 Alice Aoni
4 5 Ayoung Atiches
0 4 Billy Bonder
1 5 Brian Black
2 6 Bran Balwner
3 7 Bryce Brice
4 8 Betty Btisan
# 步骤5
subject_id first_name last_name subject_id first_name last_name
0 1 Alex Anderson 4 Billy Bonder
1 2 Amy Ackerman 5 Brian Black
2 3 Allen Ali 6 Bran Balwner
3 4 Alice Aoni 7 Bryce Brice
4 5 Ayoung Atiches 8 Betty Btisan
# 步骤6
subject_id test_id
0 1 51
1 2 15
2 3 15
3 4 61
4 5 16
5 7 14
6 8 15
7 9 1
8 10 61
9 11 16
# 步骤7
subject_id first_name last_name test_id
0 1 Alex Anderson 51
1 2 Amy Ackerman 15
2 3 Allen Ali 15
3 4 Alice Aoni 61
4 4 Billy Bonder 61
5 5 Ayoung Atiches 16
6 5 Brian Black 16
7 7 Bryce Brice 14
8 8 Betty Btisan 15
# 步骤8
subject_id first_name last_name
0 1 Alex Anderson
1 2 Amy Ackerman
2 3 Allen Ali
3 4 Alice Aoni
4 5 Ayoung Atiches
subject_id first_name last_name
0 4 Billy Bonder
1 5 Brian Black
2 6 Bran Balwner
3 7 Bryce Brice
4 8 Betty Btisan
subject_id first_name_x last_name_x first_name_y last_name_y
0 4 Alice Aoni Billy Bonder
1 5 Ayoung Atiches Brian Black
# 步骤9
subject_id first_name_x last_name_x first_name_y last_name_y
0 1 Alex Anderson NaN NaN
1 2 Amy Ackerman NaN NaN
2 3 Allen Ali NaN NaN
3 4 Alice Aoni Billy Bonder
4 5 Ayoung Atiches Brian Black
5 6 NaN NaN Bran Balwner
6 7 NaN NaN Bryce Brice
7 8 NaN NaN Betty Btisan