关键词:互作,弦图,python
摘要:本帖分享的是简单的弦图实现
#输入数据每列意义:"基因/蛋白","互作基因/互作蛋白","第一列基因分类","pvalue","color(同一分类颜色一致)"
gene1 gene2 fam1 0.5 red
gene2 gene3 fam1 1 red
gene3 gene1 fam2 0.1 blue
gene4 gene5 fam2 0.3 blue
gene5 gene2 fam2 0.9 blue
gene6 gene3 fam3 1.8 green
gene7 gene1 fam3 1.771428571 green
gene8 gene5 fam3 1.742857143 green
gene9 gene2 fam3 1.714285714 green
gene10 gene3 fam5 1.685714286 gold
gene11 gene1 fam5 1.657142857 gold
gene12 gene3 fam5 1.628571429 gold
gene13 gene1 fam5 1.6 gold
gene14 gene5 fam5 1.571428571 gold
gene15 gene2 fam4 1.542857143 orange
gene16 gene3 fam4 1.514285714 orange
gene17 gene1 fam4 1.485714286 orange
gene18 gene5 fam4 1.457142857 orange
gene19 gene2 fam4 1.428571429 orange
gene20 gene3 fam4 1.4 orange
cell1 cell2 fam3 0.7 green
cell2 cell1 fam3 0.2 green
cell3 cell4 fam3 5 green
cell4 cell6 fam6 0.7 grey
cell5 cell6 fam6 0.1 grey
cell6 cell4 fam6 0.3 grey
cell7 cell4 fam6 2 grey
#############################################代码#############################################
#Usage:python gene_act.py 半径(10) gene_act.xls(上述输入文件) gene_act1.pdf(输出文件)
import sys
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import seaborn as sns
import pandas as pd
def getB(i):
t = np.math.factorial(n)*init_t**i*(1-init_t)**(n-i)/(np.math.factorial(i)*np.math.factorial(n-i))
return np.array([t,t]).T
#数据导入
r=float(sys.argv[1])
inputfile = sys.argv[2]
outpdf = sys.argv[3]
head_lst=["gene","act_gene","family","pvalue","color"]
df = pd.read_csv(inputfile, names=head_lst, header=None, sep="\t")
df = df.sort_values(by=['family', 'pvalue'], ascending=[True, True]).reset_index(drop=True)
famlist=df["family"].drop_duplicates().to_list()
#均分角度,计算X,y坐标
sample_angle = np.linspace(0, 2 * np.pi, len(df) + len(famlist), endpoint=False)
circles = []
x = r * np.cos(sample_angle)
y = r * np.sin(sample_angle)
t = sample_angle * 180 /np.pi
circles.append(np.c_[x, y, t])
data = pd.DataFrame(circles[0],columns=['X','Y','angle'])
space_index = []
fam_len = 0
for i in famlist:
if fam_len == 0:
fam_len = df[df["family"] == i].shape[0]
else:
fam_len = fam_len + df[df["family"] == i].shape[0] + 1
space_index.append(fam_len)
data=data.drop(space_index).reset_index(drop=True)
fig, ax = plt.subplots()
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.ylim(-2.5*r,2*r)
plt.xlim(-2.5*r,2*r)
# plt.legend(loc='upper left')
cmap = []
#添加数据
df.loc[:, 'X'] = data["X"]
df.loc[:, 'Y'] = data["Y"]
df.loc[:, 'angle'] = data["angle"]
end_x = []
end_y = []
end_xy = defaultdict(list)
start_x = []
start_y = []
for i in range(0,df.shape[0]):
s_x = df.iloc[i,-3]
s_y = df.iloc[i,-2]
Color = df.iloc[i,-4]
if Color not in cmap:
cmap.append(Color)
line = df.iloc[i,3]
for act in df.iloc[i,1].split(","):
e_x = df[df["gene"] == act].iloc[0,-3]
e_y = df[df["gene"] == act].iloc[0,-2]
points = np.array([[s_x,s_y],[0,0],[e_x,e_y]])# 在此处修改坐标
n = points.shape[0]-1
init_t = np.linspace(0,1,1000)
P = np.zeros((1000,2))
for i in range(n+1):
P += getB(i)*points[i]
plt.plot(P[:,0],P[:,1],marker="None",color=Color,zorder=0,linewidth=line)
plt.plot(points[:,0],points[:,1],'r.',zorder=0,alpha=0,linewidth=line)
df.to_csv("test.out",sep = "\t")
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.ylim(-1.5*r,1.5*r)
plt.xlim(-1.5*r,1.5*r)
sns.scatterplot(data=df, x="X", y="Y", size="pvalue", hue="family",hue_order=famlist,alpha=1,zorder=3,cmap=cmap)
for i in range(0,df.shape[0]):
ax.text(df.iloc[i,-3] * 1.5, df.iloc[i,-2]* 1.5, df.iloc[i,0],ha="center",va="center",rotation=df.iloc[i,-1], color=df.iloc[i,-4],size=10,zorder=3)
ax.axis('off')
ax.set_aspect('equal')
ax.margins(x=0.1, y=0.1)
plt.legend(loc='upper left',ncol=2,prop={'size':5})
plt.savefig(outpdf)
plt.close()

结果图展示