Pandas实现Excel一行变多行
import pandas as pd
import numpy as np
df = pd.DataFrame({
"id": np.arange(10),
"name": ["a","b"]*5,
"a": [f"a{i}" for i in range(10)],
"b": [f"b{i}" for i in range(10)],
"aa": [f"aa{i}" for i in range(10)],
"bb": [f"bb{i}" for i in range(10)],
"aaa": [f"aaa{i}" for i in range(10)],
"bbb": [f"bbb{i}" for i in range(10)]
})
# 1. 这是原始的数据
df
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
<pre><code>.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</code></pre>
id | name | a | b | aa | bb | aaa | bbb | |
---|---|---|---|---|---|---|---|---|
0 | 0 | a | a0 | b0 | aa0 | bb0 | aaa0 | bbb0 |
1 | 1 | b | a1 | b1 | aa1 | bb1 | aaa1 | bbb1 |
2 | 2 | a | a2 | b2 | aa2 | bb2 | aaa2 | bbb2 |
3 | 3 | b | a3 | b3 | aa3 | bb3 | aaa3 | bbb3 |
4 | 4 | a | a4 | b4 | aa4 | bb4 | aaa4 | bbb4 |
5 | 5 | b | a5 | b5 | aa5 | bb5 | aaa5 | bbb5 |
6 | 6 | a | a6 | b6 | aa6 | bb6 | aaa6 | bbb6 |
7 | 7 | b | a7 | b7 | aa7 | bb7 | aaa7 | bbb7 |
8 | 8 | a | a8 | b8 | aa8 | bb8 | aaa8 | bbb8 |
9 | 9 | b | a9 | b9 | aa9 | bb9 | aaa9 | bbb9 |
# 2. 把要拆分的列,merge到一列
def merge_cols(x):
y = x.values
result = []
for idx in range(0, len(y), 2):
result.append(f"{y[idx]},{y[idx+1]}")
return "#".join(result)
df["merge"] = df.loc[:, "a":"bbb"].apply(merge_cols, axis=1)
df
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
<pre><code>.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</code></pre>
id | name | a | b | aa | bb | aaa | bbb | merge | |
---|---|---|---|---|---|---|---|---|---|
0 | 0 | a | a0 | b0 | aa0 | bb0 | aaa0 | bbb0 | a0,b0#aa0,bb0#aaa0,bbb0 |
1 | 1 | b | a1 | b1 | aa1 | bb1 | aaa1 | bbb1 | a1,b1#aa1,bb1#aaa1,bbb1 |
2 | 2 | a | a2 | b2 | aa2 | bb2 | aaa2 | bbb2 | a2,b2#aa2,bb2#aaa2,bbb2 |
3 | 3 | b | a3 | b3 | aa3 | bb3 | aaa3 | bbb3 | a3,b3#aa3,bb3#aaa3,bbb3 |
4 | 4 | a | a4 | b4 | aa4 | bb4 | aaa4 | bbb4 | a4,b4#aa4,bb4#aaa4,bbb4 |
5 | 5 | b | a5 | b5 | aa5 | bb5 | aaa5 | bbb5 | a5,b5#aa5,bb5#aaa5,bbb5 |
6 | 6 | a | a6 | b6 | aa6 | bb6 | aaa6 | bbb6 | a6,b6#aa6,bb6#aaa6,bbb6 |
7 | 7 | b | a7 | b7 | aa7 | bb7 | aaa7 | bbb7 | a7,b7#aa7,bb7#aaa7,bbb7 |
8 | 8 | a | a8 | b8 | aa8 | bb8 | aaa8 | bbb8 | a8,b8#aa8,bb8#aaa8,bbb8 |
9 | 9 | b | a9 | b9 | aa9 | bb9 | aaa9 | bbb9 | a9,b9#aa9,bb9#aaa9,bbb9 |
# 3. 把不用的列删除掉
drop_names = list(df.loc[:, "a":"bbb"].columns.values)
df.drop(drop_names, axis=1, inplace=True)
# 进行merge列拆分,然后explode
df["merge"] = df["merge"].str.split("#")
df.explode("merge")
df
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
<pre><code>.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</code></pre>
id | name | merge | |
---|---|---|---|
0 | 0 | a | [a0,b0, aa0,bb0, aaa0,bbb0] |
1 | 1 | b | [a1,b1, aa1,bb1, aaa1,bbb1] |
2 | 2 | a | [a2,b2, aa2,bb2, aaa2,bbb2] |
3 | 3 | b | [a3,b3, aa3,bb3, aaa3,bbb3] |
4 | 4 | a | [a4,b4, aa4,bb4, aaa4,bbb4] |
5 | 5 | b | [a5,b5, aa5,bb5, aaa5,bbb5] |
6 | 6 | a | [a6,b6, aa6,bb6, aaa6,bbb6] |
7 | 7 | b | [a7,b7, aa7,bb7, aaa7,bbb7] |
8 | 8 | a | [a8,b8, aa8,bb8, aaa8,bbb8] |
9 | 9 | b | [a9,b9, aa9,bb9, aaa9,bbb9] |
# 4. 将数据explode还原成多列
df_explode = df.explode("merge")
df_explode["a"]=df_explode["merge"].str.split(",").str[0]
df_explode["b"]=df_explode["merge"].str.split(",").str[1]
df_explode.drop("merge", axis=1)
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
<pre><code>.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</code></pre>
id | name | a | b | |
---|---|---|---|---|
0 | 0 | a | a0 | b0 |
0 | 0 | a | aa0 | bb0 |
0 | 0 | a | aaa0 | bbb0 |
1 | 1 | b | a1 | b1 |
1 | 1 | b | aa1 | bb1 |
1 | 1 | b | aaa1 | bbb1 |
2 | 2 | a | a2 | b2 |
2 | 2 | a | aa2 | bb2 |
2 | 2 | a | aaa2 | bbb2 |
3 | 3 | b | a3 | b3 |
3 | 3 | b | aa3 | bb3 |
3 | 3 | b | aaa3 | bbb3 |
4 | 4 | a | a4 | b4 |
4 | 4 | a | aa4 | bb4 |
4 | 4 | a | aaa4 | bbb4 |
5 | 5 | b | a5 | b5 |
5 | 5 | b | aa5 | bb5 |
5 | 5 | b | aaa5 | bbb5 |
6 | 6 | a | a6 | b6 |
6 | 6 | a | aa6 | bb6 |
6 | 6 | a | aaa6 | bbb6 |
7 | 7 | b | a7 | b7 |
7 | 7 | b | aa7 | bb7 |
7 | 7 | b | aaa7 | bbb7 |
8 | 8 | a | a8 | b8 |
8 | 8 | a | aa8 | bb8 |
8 | 8 | a | aaa8 | bbb8 |
9 | 9 | b | a9 | b9 |
9 | 9 | b | aa9 | bb9 |
9 | 9 | b | aaa9 | bbb9 |
本文使用 文章同步助手 同步