使用seaborn包绘制小提琴图
# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)
# 加载示例数据集
df = sns.load_dataset('iris')
df.head()
|
sepal_length |
sepal_width |
petal_length |
petal_width |
species |
0 |
5.1 |
3.5 |
1.4 |
0.2 |
setosa |
1 |
4.9 |
3.0 |
1.4 |
0.2 |
setosa |
2 |
4.7 |
3.2 |
1.3 |
0.2 |
setosa |
3 |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
4 |
5.0 |
3.6 |
1.4 |
0.2 |
setosa |
# 绘制基础小提琴图
# Make boxplot for one group only
sns.violinplot(y=df["sepal_length"])
plt.show()
# 绘制多个变量的小提琴图
# plot
sns.violinplot(data=df.iloc[:,0:2])
plt.show()
# 绘制分组小提琴图
# plot
sns.violinplot( x=df["species"], y=df["sepal_length"] )
plt.show()
# 水平放置小提琴图
# Just switch x and y
sns.violinplot(y=df["species"], x=df["sepal_length"])
plt.show()
# 设置linewidth参数更改边框线的宽度
# Change line width
sns.violinplot(x=df["species"], y=df["sepal_length"], linewidth=5)
plt.show()
# 设置width参数更改小提琴的宽度
# Change width
sns.violinplot(x=df["species"], y=df["sepal_length"], width=0.3)
plt.show()
# 自定义小提琴的颜色
# Use a color palette
sns.violinplot(x=df["species"], y=df["sepal_length"], palette="Reds")
plt.show()
# plot
sns.violinplot(x=df["species"], y=df["sepal_length"], color="skyblue")
plt.show()
# creating a dictionary with one specific color per group:
my_pal = {"versicolor": "g", "setosa": "b", "virginica": "m"}
# plot it
sns.violinplot(x=df["species"], y=df["sepal_length"], palette=my_pal)
plt.show()
# 自定义分组的排序
# specifying the group list as 'order' parameter and plotting
sns.violinplot(x='species', y='sepal_length', data=df, order=[ "versicolor", "virginica", "setosa"])
plt.show()
# Using pandas methods and slicing to determine the order by decreasing median
my_order = df.groupby(by=["species"])["sepal_length"].median().iloc[::-1].index
# Specifying the 'order' parameter with my_order and plotting
sns.violinplot(x='species', y='sepal_length', data=df, order=my_order)
plt.show()
# 添加文本注释信息
# Basic violinplot stored in a matplotlib.axes object
ax = sns.violinplot(x="species", y="sepal_length", data=df)
# Calculate number of obs per group & median to position labels
medians = df.groupby(['species'])['sepal_length'].median().values
nobs = df['species'].value_counts().values
nobs = [str(x) for x in nobs.tolist()]
nobs = ["n: " + i for i in nobs]
# Add text to the figure
pos = range(len(nobs))
for tick, label in zip(pos, ax.get_xticklabels()):
ax.text(pos[tick], medians[tick] + 0.03, nobs[tick],
horizontalalignment='center',
size='small',
color='w',
weight='semibold')
plt.show()