代码复现:
1.结果转换(使用TRUST4恢复)
因为TURST4恢复的结果不能直接适配vdjtools
,所以需要进行列名更改。
示例:样品分为Control
和Treatment
两个组,每组有4个样品。
name <- read.table("./name.txt")
n <- 4 #numbers of sample each group
TCR_split <- function(matrix,output_TRA,output_TRB,output_TRD,output_TRG){
trust4_results <- matrix
TRA_results <- trust4_results[which(substr(trust4_results$v,1,4)=="TRAV"),]
TRB_results <- trust4_results[which(substr(trust4_results$v,1,4)=="TRBV"),]
TRD_results <- trust4_results[which(substr(trust4_results$v,1,4)=="TRDV"),]
TRG_results <- trust4_results[which(substr(trust4_results$v,1,4)=="TRGV"),]
TRA_results$freq <- unlist(lapply(TRA_results$count,function(x) x/sum(TRA_results$count)))
TRB_results$freq <- unlist(lapply(TRB_results$count,function(x) x/sum(TRB_results$count)))
TRD_results$freq <- unlist(lapply(TRD_results$count,function(x) x/sum(TRD_results$count)))
TRG_results$freq <- unlist(lapply(TRG_results$count,function(x) x/sum(TRG_results$count)))
write.table(TRA_results,file = output_TRA,quote = F,sep = "\t",row.names = F)
write.table(TRB_results,file = output_TRB,quote = F,sep = "\t",row.names = F)
write.table(TRD_results,file = output_TRD,quote = F,sep = "\t",row.names = F)
write.table(TRG_results,file = output_TRG,quote = F,sep = "\t",row.names = F)
}
metadata_all <- data.frame(file_name="",sample_id="",
sample_type=c(rep("Control",n),rep("Treatment",n)),
tcr_type="")
metadata <- data.frame(file_name="",sample_id="",
sample_type=c(rep("Control",n*4),rep("Treatment",n*4)),
tcr_type="")
count_sum <- data.frame(name=name$V1,sum=rep(0,length(name)))
for (i in 1:nrow(name)){
input <- paste0("./trust4/",name[i,"V1"],"_report.tsv")
output <- paste0("./convert_results/",name[i,"V1"],".txt")
trust4_results <- read.csv(input,sep="\t")
trust4_results <- trust4_results[,1:8]
colnames(trust4_results) <- c("count","freq","cdr3nt","cdr3aa","v","d","j","c")
trust4_results <- trust4_results[-which(trust4_results$cdr3aa=="out_of_frame"),]
row.names(trust4_results) <- 1:nrow(trust4_results)
count_sum[i,"sum"] <- sum(trust4_results$count)
write.table(trust4_results,file = output,quote = F,sep = "\t",row.names = F)
output_TRA <- paste0("./convert_results/",name[i,"V1"],"_TRA.txt")
output_TRB <- paste0("./convert_results/",name[i,"V1"],"_TRB.txt")
output_TRD <- paste0("./convert_results/",name[i,"V1"],"_TRD.txt")
output_TRG <- paste0("./convert_results/",name[i,"V1"],"_TRG.txt")
TCR_split(trust4_results,output_TRA,output_TRB,output_TRD,output_TRG)
}
for (i in 1:nrow(name)){
output <- paste0("./convert_results/",name[i,"V1"],".txt")
metadata_all[i,"file_name"] <- output
metadata_all[i,"sample_id"] <- strsplit(name[i,"V1"],"_")[[1]][1]
metadata_all[i,"tcr_type"] <- "TCR"
metadata[4*i-3,"file_name"] <- output_TRA
metadata[4*i-3,"tcr_type"] <- "TRA"
metadata[4*i-2,"file_name"] <- output_TRB
metadata[4*i-2,"tcr_type"] <- "TRB"
metadata[4*i-1,"file_name"] <- output_TRD
metadata[4*i-1,"tcr_type"] <- "TRD"
metadata[4*i,"file_name"] <- output_TRG
metadata[4*i,"tcr_type"] <- "TRG"
for (k in 3:0){
metadata[4*i-k,"sample_id"] <- strsplit(name[i,"V1"],"_")[[1]][1]
}
}
metadata_all$sample_id <- paste0(metadata_all$sample_id,"_",metadata_all$tcr_type)
metadata$sample_id <- paste0(metadata$sample_id,"_",metadata$tcr_type)
write.table(metadata_all,file = "./metadata_all.txt",sep="\t",row.names = F,quote = F)
write.table(metadata,file = "./metadata.txt",sep="\t",row.names = F,quote = F)
write.table(count_sum,file = "./count_sum.txt",sep="\t",row.names = F,quote = F)
metadata_all
是没有拆分TRA TRB TRD TRG的
file_name | sample_id | sample_type | tcr_type |
---|---|---|---|
./convert_results/MSTCR_01.txt | MSTCR_01 | Control | TCR |
./convert_results/MSTCR_02.txt | MSTCR_02 | Control | TCR |
./convert_results/MSTCR_03.txt | MSTCR_03 | Control | TCR |
./convert_results/MSTCR_04.txt | MSTCR_04 | Control | TCR |
./convert_results/MSTCR_05.txt | MSTCR_05 | Treatment | TCR |
./convert_results/MSTCR_06.txt | MSTCR_06 | Treatment | TCR |
./convert_results/MSTCR_07.txt | MSTCR_07 | Treatment | TCR |
./convert_results/MSTCR_08.txt | MSTCR_08 | Treatment | TCR |
metadata
是拆分TRA TRB TRD TRG四条链的
file_name | sample_id | sample_type | tcr_type |
---|---|---|---|
./convert_results/MSTCR_01_TRA.txt | MSTCR_01_TRA | Control | TRA |
./convert_results/MSTCR_01_TRB.txt | MSTCR_01_TRB | Control | TRB |
./convert_results/MSTCR_01_TRD.txt | MSTCR_01_TRD | Control | TRD |
./convert_results/MSTCR_01_TRG.txt | MSTCR_01_TRG | Control | TRG |
./convert_results/MSTCR_02_TRA.txt | MSTCR_02_TRA | Control | TRA |
./convert_results/MSTCR_02_TRB.txt | MSTCR_02_TRB | Control | TRB |
./convert_results/MSTCR_02_TRD.txt | MSTCR_02_TRD | Control | TRD |
./convert_results/MSTCR_02_TRG.txt | MSTCR_02_TRG | Control | TRG |
./convert_results/MSTCR_03_TRA.txt | MSTCR_03_TRA | Control | TRA |
./convert_results/MSTCR_03_TRB.txt | MSTCR_03_TRB | Control | TRB |
./convert_results/MSTCR_03_TRD.txt | MSTCR_03_TRD | Control | TRD |
./convert_results/MSTCR_03_TRG.txt | MSTCR_03_TRG | Control | TRG |
./convert_results/MSTCR_04_TRA.txt | MSTCR_04_TRA | Control | TRA |
./convert_results/MSTCR_04_TRB.txt | MSTCR_04_TRB | Control | TRB |
./convert_results/MSTCR_04_TRD.txt | MSTCR_04_TRD | Control | TRD |
./convert_results/MSTCR_04_TRG.txt | MSTCR_04_TRG | Control | TRG |
./convert_results/MSTCR_05_TRA.txt | MSTCR_05_TRA | Treatment | TRA |
./convert_results/MSTCR_05_TRB.txt | MSTCR_05_TRB | Treatment | TRB |
./convert_results/MSTCR_05_TRD.txt | MSTCR_05_TRD | Treatment | TRD |
./convert_results/MSTCR_05_TRG.txt | MSTCR_05_TRG | Treatment | TRG |
./convert_results/MSTCR_06_TRA.txt | MSTCR_06_TRA | Treatment | TRA |
./convert_results/MSTCR_06_TRB.txt | MSTCR_06_TRB | Treatment | TRB |
./convert_results/MSTCR_06_TRD.txt | MSTCR_06_TRD | Treatment | TRD |
./convert_results/MSTCR_06_TRG.txt | MSTCR_06_TRG | Treatment | TRG |
./convert_results/MSTCR_07_TRA.txt | MSTCR_07_TRA | Treatment | TRA |
./convert_results/MSTCR_07_TRB.txt | MSTCR_07_TRB | Treatment | TRB |
./convert_results/MSTCR_07_TRD.txt | MSTCR_07_TRD | Treatment | TRD |
./convert_results/MSTCR_07_TRG.txt | MSTCR_07_TRG | Treatment | TRG |
./convert_results/MSTCR_08_TRA.txt | MSTCR_08_TRA | Treatment | TRA |
./convert_results/MSTCR_08_TRB.txt | MSTCR_08_TRB | Treatment | TRB |
./convert_results/MSTCR_08_TRD.txt | MSTCR_08_TRD | Treatment | TRD |
./convert_results/MSTCR_08_TRG.txt | MSTCR_08_TRG | Treatment | TRG |
count_sum
是每个sample的总counts数,用于结果标准化。
name | sum |
---|---|
MSTCR_01 | 182672 |
MSTCR_02 | 159604 |
MSTCR_03 | 236935 |
MSTCR_04 | 169434 |
MSTCR_05 | 79721 |
MSTCR_06 | 34295 |
MSTCR_07 | 45231 |
MSTCR_08 | 40237 |
2.结果标准化(可选)
size=500000 #选择合适的counts_sum进行抽样
name <- read.table("./name.txt")
TCR_split <- function(matrix,output_TRA,output_TRB,output_TRD,output_TRG){
trsut4_results <- matrix
TRA_results <- trsut4_results[which(substr(trsut4_results$v,1,4)=="TRAV"),]
TRB_results <- trsut4_results[which(substr(trsut4_results$v,1,4)=="TRBV"),]
TRD_results <- trsut4_results[which(substr(trsut4_results$v,1,4)=="TRDV"),]
TRG_results <- trsut4_results[which(substr(trsut4_results$v,1,4)=="TRGV"),]
write.table(TRA_results,file = output_TRA,quote = F,sep = "\t",row.names = F)
write.table(TRB_results,file = output_TRB,quote = F,sep = "\t",row.names = F)
write.table(TRD_results,file = output_TRD,quote = F,sep = "\t",row.names = F)
write.table(TRG_results,file = output_TRG,quote = F,sep = "\t",row.names = F)
}
tcr_normalization <- function(trust4_results,size){
pool <- c()
for (i in 1:nrow(trust4_results)){
pool <- append(pool,rep(i,trust4_results[i,"count"]))
}
all <- as.numeric(sample(pool,size = size,replace = F))
count <- table(all)
trust4_results <- trust4_results[unique(all),]
trust4_results$count <- count
trust4_results <- trust4_results[order(trust4_results$count,decreasing = T),]
rownames(trust4_results) <- 1:nrow(trust4_results)
return(trust4_results)
}
metadata_all_normal <- data.frame(file_name="",sample_id="",
sample_type=c(rep("Con",n),rep("Bgal",n),rep("AH1",n),rep("A5",n)),
tcr_type="")
metadata_normal <- data.frame(file_name="",sample_id="",
sample_type=c(rep("Con",n*4),rep("Bgal",n*4),rep("AH1",n*4),rep("A5",n*4)),
tcr_type="")
for (i in 1:nrow(name)){
input <- paste0("./trust4/",name[i,"V1"],"_report.tsv")
output <- paste0("./convert_results_normal/",name[i,"V1"],".txt")
trust4_results <- read.csv(input,sep="\t")
trust4_results <- trust4_results[,1:8]
colnames(trust4_results) <- c("count","freq","cdr3nt","cdr3aa","v","d","j","c")
trust4_results <- trust4_results[-which(trust4_results$cdr3aa=="out_of_frame"),]
row.names(trust4_results) <- 1:nrow(trust4_results)
trust4_results <- tcr_normalization(trust4_results,size)
write.table(trust4_results,file = output,quote = F,sep = "\t",row.names = F)
output_TRA <- paste0("./convert_results_normal/",name[i,"V1"],"_TRA.txt")
output_TRB <- paste0("./convert_results_normal/",name[i,"V1"],"_TRB.txt")
output_TRD <- paste0("./convert_results_normal/",name[i,"V1"],"_TRD.txt")
output_TRG <- paste0("./convert_results_normal/",name[i,"V1"],"_TRG.txt")
TCR_split(trust4_results,output_TRA,output_TRB,output_TRD,output_TRG)
metadata_all_normal[i,"file_name"] <- output
metadata_all_normal[i,"sample_id"] <- strsplit(name[i,"V1"],"_")[[1]][1]
metadata_all_normal[i,"tcr_type"] <- "TCR"
metadata_normal[4*i-3,"file_name"] <- output_TRA
metadata_normal[4*i-3,"tcr_type"] <- "TRA"
metadata_normal[4*i-2,"file_name"] <- output_TRB
metadata_normal[4*i-2,"tcr_type"] <- "TRB"
metadata_normal[4*i-1,"file_name"] <- output_TRD
metadata_normal[4*i-1,"tcr_type"] <- "TRD"
metadata_normal[4*i,"file_name"] <- output_TRG
metadata_normal[4*i,"tcr_type"] <- "TRG"
for (k in 3:0){
metadata_normal[4*i-k,"sample_id"] <- strsplit(name[i,"V1"],"_")[[1]][1]
}
}
metadata_all_normal$sample_id <- paste0(metadata_all_normal$sample_id,"_",metadata_all_normal$tcr_type)
metadata_normal$sample_id <- paste0(metadata_normal$sample_id,"_",metadata_normal$tcr_type)
write.table(metadata_all_normal,file = "./metadata_all_normal.txt",sep="\t",row.names = F,quote = F)
write.table(metadata_normal,file = "./metadata_normal.txt",sep="\t",row.names = F,quote = F)
3.免疫组库多样性(Richness/Diversity/Clonality)的计算及可视化
3.1计算免疫组库多样性
metadata <- read.csv("./metadata.txt",sep = "\t")
Shannon_index <- function(list_p){
sum=0
for (p in list_p){
sum <- p*log(p) + sum
}
H <- -sum
return(H)
}
clonality <- function(diversity,richness){
E<- diversity/log(richness)
C <- 1-E
return(C)
}
file_path <- metadata$file_name
results <- data.frame(sample=metadata$sample_id)
for (i in 1:length(file_path)){
df <- read.csv(file = file_path[i],
stringsAsFactors = F,sep = "\t")
temp <- try(list_p <- df[,"freq"])
if ('try-error' %in% class(temp)){next}
results[i,"richness"] <- nrow(df)
results[i,"diversity"] <- Shannon_index(list_p)
results[i,"clonality"] <- clonality(Shannon_index(list_p),nrow(df))
}
write.table(results,"./results/basic.txt",sep = "\t",row.names = F,quote = F)
3.2使用vdjtools计算多样性
java -jar e:/vdjtools/vdjtools-1.2.1.jar CalcDiversityStats -m .\metadata.txt .\results\
vdjtools
的使用参考:https://www.jianshu.com/p/ea7edd430a5a
inputpath <- "./results/"
outputpath <- "./results/"
df <- read.csv(paste0(inputpath,"diversity.strict.exact.txt"), sep = "\t",stringsAsFactors = FALSE)
df$real_diversity <- log(df$shannonWienerIndex_mean)
df$clonality <- 1 - df$normalizedShannonWienerIndex_mean
for (k in c("TRA","TRB","TRD","TRG")){
work_df <- df[which(df$tcr_type==k),]
sample_name <- work_df$sample_id
richness<- work_df$observedDiversity_mean
diversity <- work_df$real_diversity
clonality <- work_df$clonality
work_df <- data.frame(sample_name,richness,diversity,clonality)
write.table(work_df,file = paste0(outputpath,k,"-Basic.txt"),row.names = F,quote = F,sep="\t")
}
TRB-Basic.txt
sample_name | richness | diversity | clonality |
---|---|---|---|
MSTCR_01_TRB | 35170 | 9.89085335938176 | 0.0551297471186711 |
MSTCR_02_TRB | 29431 | 9.68714727061554 | 0.0585683225389396 |
MSTCR_03_TRB | 50768 | 10.3132468709197 | 0.0481563088949662 |
MSTCR_04_TRB | 34260 | 9.89423352728728 | 0.0524338444699065 |
MSTCR_05_TRB | 7236 | 7.96452265160162 | 0.103782995009542 |
MSTCR_06_TRB | 3534 | 7.2072238519678 | 0.117862903222087 |
MSTCR_07_TRB | 3423 | 7.12183471831036 | 0.124896027125899 |
MSTCR_08_TRB | 4354 | 7.35997369287484 | 0.121600997692666 |
#可视化
rm(list = ls())
inputpath <- "./results/"
outputpath <- "./output/"
library(ggplot2)
library(reshape2)
library(ggpubr)
df <- read.csv(paste0(inputpath,"diversity.strict.exact.txt"), sep = "\t",stringsAsFactors = FALSE)
df$real_diversity <- log(df$shannonWienerIndex_mean)
df$clonality <- 1 - df$normalizedShannonWienerIndex_mean
tcr_type <- "TRB"
work_df <- df[which(df$tcr_type==tcr_type),]
sample_name <- work_df$sample_id
Group <- work_df$sample_type
richness<- work_df$observedDiversity_mean
diversity <- work_df$real_diversity
clonality <- work_df$clonality
work_df <- data.frame(Group,richness,diversity,clonality)
work_df$Group <- factor(work_df$Group)
work_res<- melt(work_df, id.vars = "Group")
work_richness <- work_res[which(work_res$variable=="richness"),]
work_diversity <- work_res[which(work_res$variable=="diversity"),]
work_clonality <- work_res[which(work_res$variable=="clonality"),]
p1 <- ggboxplot(work_diversity, x = "Group", y = "value",
legend = "right",color = "Group", palette = "nejm",
add = "jitter",add.params=list(color = "Group",size=3),
title = "Diversity", xlab = "",size = 0.5,width =0.5,
ylab = paste0("Shannon Diversity Index of \n ",tcr_type," repertoire")) +
ylim(c(0,max(work_diversity$value)+min(work_diversity$value)))+
theme(axis.text.x =element_text(size=13), axis.title.y=element_text(size=16,face="bold"))
p1 + stat_compare_means(aes(group = work_df$Group),
label.x = 1.5,
label.y = max(work_diversity$value)+min(work_diversity$value),
label = "p.signif",
method = 'wilcox.test',hide.ns = TRUE,size=8)
ggsave(paste0(outputpath,paste0(tcr_type,"-Diversity.pdf")),dpi = 600,width = 8,height = 8)
dev.off()
p2 <- ggboxplot(work_richness, x = "Group", y = "value",
legend = "right",color = "Group", palette = "nejm",
add = "jitter",add.params=list(color = "Group",size=3),
title = "Richness", xlab = "",size = 0.5,width =0.5,
ylab = paste0("Richness of \n ",tcr_type," repertoire"))+
ylim(c(0,max(work_richness$value)+min(work_richness$value)))+
theme(axis.text.x =element_text(size=13), axis.title.y=element_text(size=16,face="bold"))
p2 + stat_compare_means(aes(group =work_df$Group),
label = "p.signif",label.x = 1.5,
label.y = max(work_richness$value)+min(work_richness$value),
method = 'wilcox.test',hide.ns = TRUE,size=8)
ggsave(paste0(outputpath,paste0(tcr_type,"-Richness.pdf")),dpi = 600,width = 8,height = 8)
dev.off()
p3 <- ggboxplot(work_clonality, x = "Group", y = "value",
legend = "right",color = "Group", palette = "nejm",
add = "jitter",add.params=list(color = "Group",size=3),
title = "Clonality", xlab = "",size = 0.5,width =0.5,
ylab = paste0("Clonality Index of \n ",tcr_type," repertoire"))+
ylim(c(0,max(work_clonality$value)+min(work_clonality$value)))+
theme(axis.text.x =element_text(size=13), axis.title.y=element_text(size=16,face="bold"))
p3 + stat_compare_means(aes(group =work_df$Group),
label = "p.signif",label.x = 1.5,
label.y = max(work_clonality$value)+min(work_clonality$value),
method = 'wilcox.test',hide.ns = TRUE,size=8)
ggsave(paste0(outputpath,paste0(tcr_type,"-Clonality.pdf")),dpi = 600,width = 8,height = 8)
dev.off()
4.CDR3 length distribution analysis
rm(list = ls())
library(ggplot2)
library(reshape2)
library(ggpubr)
lenth_distribution <- function(filepath,all_length,id){
df <- read.csv(filepath,sep="\t",stringsAsFactors = F)
for (i in 1:nrow(df)){
cdr3 <- df[i,"cdr3aa"]
all_length[as.character(nchar(cdr3)),id] <- all_length[as.character(nchar(cdr3)),id]+1
}
return(all_length)
}
all_lenth_distribution <- function(file_list,id_list,group_list){
all_length <- data.frame()
for (i in 1:length(file_list)){
df <- read.csv(file_list[i],sep="\t",stringsAsFactors = F)
for (k in 1:nrow(df)){
cdr3 <- df[k,"cdr3aa"]
all_length[as.character(nchar(cdr3)),id_list[i]] <- 0
}
}
all_length[is.na(all_length)] <- 0
for (i in 1:length(file_list)){
all_length <- lenth_distribution(file_list[i],all_length,id_list[i])
all_length["sample_type",id_list[i]]=group_list[i]
}
return(all_length)
}
name <- read.table("./metadata.txt",sep = "\t",header = 1)
work_tcr_type <- "TRA" #更改这里为所需要分析的链
file_list <- name$file_name[which(name$tcr_type==work_tcr_type)]
id_list <- name$sample_id[which(name$tcr_type==work_tcr_type)]
group_list <- c(rep("Control"4),rep("Treatment",4))
all_length <- all_lenth_distribution(file_list,id_list,group_list)
#---------absolute----------------#
#使用绝对数值进行绘图
if(F){
temp <- all_length[-nrow(all_length),]
temp <- temp[order(as.numeric(rownames(temp))),]
all_length <- rbind(temp,all_length[nrow(all_length),])}
#------------percent----------#
#使用占比百分比进行绘图
if(T){
temp <- all_length[-nrow(all_length),]
temp=as.data.frame(lapply(temp,as.numeric),row.names = rownames(temp))
colSums(temp)
temp <- data.frame(apply(temp,2,function(x) x/sum(x)))
temp <- temp[order(as.numeric(rownames(temp))),]
all_length <- rbind(temp,all_length[nrow(all_length),])}
####---check this and change--###
all_length <- all_length[c(6:14,nrow(all_length)),]#挑选合适的长度区间进行绘图
#draw picture
df <- all_length
work_df <- data.frame(t(df))
df <- melt(work_df, id="sample_type", variable.name="length",value.name = "count")
df$length <- substring(df$length,2)
df$count <- as.numeric(df$count)
df$length <- as.numeric(df$length)
df <- df[order(df$length),]
df$length <- as.factor(df$length)
mean <- aggregate(df$count, by=list(df$length,df$sample_type), FUN=mean)
sd <- aggregate(df$count, by=list(df$length, df$sample_type), FUN=sd)
len <- aggregate(df$count, by=list(df$length, df$sample_type), FUN=length)
df_res <- data.frame(mean, sd=sd$x, len=len$x)
colnames(df_res) = c("CDR3_Length", "Group", "Mean", "Sd", "Count")
df_res$Se <- df_res$Sd/sqrt(df_res$Count)
ggplot(df_res, mapping = aes(x=CDR3_Length, y=Mean, fill=Group,group=Group,color=Group)) +
labs(xlab = "CDR3 Length",ylab="Mean",title =paste0("Distribution of ",work_tcr_type," CDR3 Length"))+
geom_bar(stat="identity", position=position_dodge(),color="black", width=.8) +
geom_errorbar(aes(ymin=Mean-Se, ymax=Mean +Se),
position=position_dodge(.8), width=.2,color="black") +
theme(axis.text.x = element_text(size = 14, color = "black"))+#设置x轴字体大小
theme(axis.text.y = element_text(size = 14, color = "black"))+#设置y轴字体大小
theme(title=element_text(size=13))+#设置标题字体大小
theme_bw()+
geom_line(size=0.5,position = position_dodge(0.8))+
scale_y_continuous(expand = c(0,0),limits = c(0,0.3)) #注意更改Y轴范围
ggsave(paste0("./output/CDR3_lenth",work_tcr_type,".pdf"),width = 10,height = 8)
#堆叠图
library(ggsci)
ggplot(df_res,mapping = aes(x=Group,y=Mean,fill=CDR3_Length))+
geom_col(position = 'stack', width = 0.6)+
scale_color_aaas()+
theme_bw()+scale_y_continuous(expand = c(0,0))
ggsave(paste0("./output/CDR3_lenth",work_tcr_type,"_stack.pdf"),width = 7,height = 7)
5.Gene usage analysis
java -jar e:/vdjtools/vdjtools-1.2.1.jar CalcSegmentUsage -m .\metadata.txt .\results\
rm(list=ls())
library(RColorBrewer)
library(pheatmap)
for (region in c("V","J")){
for (type in c("TRA","TRB","TRD","TRG")){
file_name <- paste0("./results/segments.wt.",region,".txt")
df <- read.csv(file_name,sep="\t",row.names = 1)
work_tcr_type <- type
work_df <- df[which(df$tcr_type==work_tcr_type),]
work_df <- work_df[,-1:-2]
work_df <- t(work_df)
work_df <- work_df[which(rowSums(work_df)>0),]
work_df <- work_df*100
#---------------------------------------------------------#
annotation_col <- data.frame(c(rep("Control",4),rep("Treatment",4)))
#---------------------------------------------------------#
rownames(annotation_col) <- colnames(work_df)
colnames(annotation_col) <- "Type"
#coul <- colorRampPalette(brewer.pal(9, "OrRd"))(50)
coul <- colorRampPalette(colors = c("blue","white","red"))(100)
pheatmap(work_df,
filename = paste0("./output/",work_tcr_type,"_GeneUsage_",region,".pdf"),
scale = "row",
annotation_col = annotation_col,cluster_cols = FALSE,
height = 15,width = 10,cutree_col =2,display_numbers = FALSE,labels_col = "",color = coul)
}
}
6.氨基酸占比分析
metadata <- read.table("./metadata.txt",sep="\t",header = 1)
chr_freq <- function(str,chr){
df <- data.frame()
for (i in 1:nchar(str)){
df[i,"cdr3"] <- substr(str,i,i)
}
p <- table(df)[chr]/nchar(str)
return(p)
}
aa_freq <- function(df,chr){
p <- c()
for (i in 1:nrow(df)){
cdr3 <- df[i,"cdr3aa"]
p[i] <- chr_freq(cdr3,chr)
}
p_mean <- mean(p,na.rm = T)
return(p_mean)
}
aa_name <- c("A","R","N","D","C","Q","E","G","H","I","L","K","M","F","P","S","T","W","Y","V")#要分析的氨基酸
file_list <- metadata$file_name
sample_id <- metadata$sample_id
p=data.frame()
for (i in 1:length(file_list)){
print(paste0(file_list[i],"---running!"))
df <- read.csv(file_list[i],sep="\t",stringsAsFactors = F)
for (aa in aa_name){
print(paste0(aa,"-------runnning!----------"))
p[sample_id[i],aa] <- aa_freq(df,aa)
}
}
p1 <- p*100
write.table(p1,file="./results/aa_frequency.txt",sep="\t",row.names = T,quote = F)
library(ggplot2)
library(reshape2)
library(ggpubr)
aa_fre <- read.csv("./results/aa_frequency.txt",sep="\t",row.names = 1)
aa_fre$Sample_type <- c(rep("Control",16),rep("Treatment",16))
aa_fre$tcr_type <- rep( c("TRA","TRB","TRD","TRG"),4)
aa_fre$Sample_type <- factor(aa_fre$Sample_type)
aa_fre$tcr_type <- factor(aa_fre$tcr_type)
for (tcr in c("TRA","TRB","TRD","TRG")){
aa_fre_work <- aa_fre[which(aa_fre$tcr_type==tcr),]
aa_fre_work <- aa_fre_work[,-ncol(aa_fre_work)]
work_res <- melt(aa_fre_work,id.vars = "Sample_type")
for (i in 1:(ncol(aa_fre_work)-1)){
temp <- work_res[which(work_res$variable==colnames(aa_fre)[i]),]
p1 <- ggpaired(temp, x = "Sample_type", y = "value",
line.color = "gray", line.size = 0.5,
legend = "right",color = "Sample_type", palette = "nejm",
add = "jitter",add.params=list(color = "Sample_type",size=3),
title = colnames(aa_fre)[i], xlab = "",size = 0.5,width =0.5,
ylab = paste0("Percentage of ",tcr," aa frequency")) +
theme(axis.text.x =element_text(size=13), axis.title.y=element_text(size=16,face="bold"))+
theme(legend.position = 'none')
p1 + stat_compare_means(aes(group = Sample_type),
label.x = 1.5,label = "p.signif",
# comparisons = list(c("Control","Treatment")),
method = 'wilcox.test',hide.ns = TRUE,size=8)
ggsave(paste0("./output/aa_fre/",tcr,"_",colnames(aa_fre_work)[i],".png"))
}
}