全网最全免疫组库分析流程+代码复现(二)

代码复现:

1.结果转换(使用TRUST4恢复)

因为TURST4恢复的结果不能直接适配vdjtools,所以需要进行列名更改。
示例:样品分为ControlTreatment两个组,每组有4个样品。

name <- read.table("./name.txt")
n <- 4 #numbers of sample each group
TCR_split <- function(matrix,output_TRA,output_TRB,output_TRD,output_TRG){
  trust4_results <- matrix
  TRA_results <- trust4_results[which(substr(trust4_results$v,1,4)=="TRAV"),]
  TRB_results <- trust4_results[which(substr(trust4_results$v,1,4)=="TRBV"),]
  TRD_results <- trust4_results[which(substr(trust4_results$v,1,4)=="TRDV"),]
  TRG_results <- trust4_results[which(substr(trust4_results$v,1,4)=="TRGV"),]
  TRA_results$freq <- unlist(lapply(TRA_results$count,function(x) x/sum(TRA_results$count)))
  TRB_results$freq <- unlist(lapply(TRB_results$count,function(x) x/sum(TRB_results$count)))
  TRD_results$freq <- unlist(lapply(TRD_results$count,function(x) x/sum(TRD_results$count)))
  TRG_results$freq <- unlist(lapply(TRG_results$count,function(x) x/sum(TRG_results$count)))
  write.table(TRA_results,file = output_TRA,quote = F,sep = "\t",row.names = F)
  write.table(TRB_results,file = output_TRB,quote = F,sep = "\t",row.names = F)
  write.table(TRD_results,file = output_TRD,quote = F,sep = "\t",row.names = F)
  write.table(TRG_results,file = output_TRG,quote = F,sep = "\t",row.names = F)
}
metadata_all <- data.frame(file_name="",sample_id="",
                           sample_type=c(rep("Control",n),rep("Treatment",n)),
                           tcr_type="")
metadata <- data.frame(file_name="",sample_id="",
                       sample_type=c(rep("Control",n*4),rep("Treatment",n*4)),
                       tcr_type="")
count_sum <- data.frame(name=name$V1,sum=rep(0,length(name)))
for (i in 1:nrow(name)){
  input <- paste0("./trust4/",name[i,"V1"],"_report.tsv")
  output <- paste0("./convert_results/",name[i,"V1"],".txt")
  trust4_results <- read.csv(input,sep="\t")
  trust4_results <- trust4_results[,1:8]
  colnames(trust4_results) <- c("count","freq","cdr3nt","cdr3aa","v","d","j","c")
  trust4_results <- trust4_results[-which(trust4_results$cdr3aa=="out_of_frame"),]
  row.names(trust4_results) <- 1:nrow(trust4_results)
  count_sum[i,"sum"] <- sum(trust4_results$count)
  write.table(trust4_results,file = output,quote = F,sep = "\t",row.names = F)
  output_TRA <- paste0("./convert_results/",name[i,"V1"],"_TRA.txt")
  output_TRB <- paste0("./convert_results/",name[i,"V1"],"_TRB.txt")
  output_TRD <- paste0("./convert_results/",name[i,"V1"],"_TRD.txt")
  output_TRG <- paste0("./convert_results/",name[i,"V1"],"_TRG.txt")
  TCR_split(trust4_results,output_TRA,output_TRB,output_TRD,output_TRG)
}
for (i in 1:nrow(name)){
  output <- paste0("./convert_results/",name[i,"V1"],".txt")
  metadata_all[i,"file_name"] <- output
  metadata_all[i,"sample_id"] <- strsplit(name[i,"V1"],"_")[[1]][1]
  metadata_all[i,"tcr_type"] <- "TCR"
  metadata[4*i-3,"file_name"] <- output_TRA
  metadata[4*i-3,"tcr_type"] <- "TRA"
  metadata[4*i-2,"file_name"] <- output_TRB
  metadata[4*i-2,"tcr_type"] <- "TRB"
  metadata[4*i-1,"file_name"] <- output_TRD
  metadata[4*i-1,"tcr_type"] <- "TRD"
  metadata[4*i,"file_name"] <- output_TRG
  metadata[4*i,"tcr_type"] <- "TRG"
  for (k in 3:0){
    metadata[4*i-k,"sample_id"] <- strsplit(name[i,"V1"],"_")[[1]][1]
  }

}
metadata_all$sample_id <- paste0(metadata_all$sample_id,"_",metadata_all$tcr_type)
metadata$sample_id <- paste0(metadata$sample_id,"_",metadata$tcr_type)
write.table(metadata_all,file = "./metadata_all.txt",sep="\t",row.names = F,quote = F)
write.table(metadata,file = "./metadata.txt",sep="\t",row.names = F,quote = F)
write.table(count_sum,file = "./count_sum.txt",sep="\t",row.names = F,quote = F)

metadata_all是没有拆分TRA TRB TRD TRG的

file_name sample_id sample_type tcr_type
./convert_results/MSTCR_01.txt MSTCR_01 Control TCR
./convert_results/MSTCR_02.txt MSTCR_02 Control TCR
./convert_results/MSTCR_03.txt MSTCR_03 Control TCR
./convert_results/MSTCR_04.txt MSTCR_04 Control TCR
./convert_results/MSTCR_05.txt MSTCR_05 Treatment TCR
./convert_results/MSTCR_06.txt MSTCR_06 Treatment TCR
./convert_results/MSTCR_07.txt MSTCR_07 Treatment TCR
./convert_results/MSTCR_08.txt MSTCR_08 Treatment TCR

metadata是拆分TRA TRB TRD TRG四条链的

file_name sample_id sample_type tcr_type
./convert_results/MSTCR_01_TRA.txt MSTCR_01_TRA Control TRA
./convert_results/MSTCR_01_TRB.txt MSTCR_01_TRB Control TRB
./convert_results/MSTCR_01_TRD.txt MSTCR_01_TRD Control TRD
./convert_results/MSTCR_01_TRG.txt MSTCR_01_TRG Control TRG
./convert_results/MSTCR_02_TRA.txt MSTCR_02_TRA Control TRA
./convert_results/MSTCR_02_TRB.txt MSTCR_02_TRB Control TRB
./convert_results/MSTCR_02_TRD.txt MSTCR_02_TRD Control TRD
./convert_results/MSTCR_02_TRG.txt MSTCR_02_TRG Control TRG
./convert_results/MSTCR_03_TRA.txt MSTCR_03_TRA Control TRA
./convert_results/MSTCR_03_TRB.txt MSTCR_03_TRB Control TRB
./convert_results/MSTCR_03_TRD.txt MSTCR_03_TRD Control TRD
./convert_results/MSTCR_03_TRG.txt MSTCR_03_TRG Control TRG
./convert_results/MSTCR_04_TRA.txt MSTCR_04_TRA Control TRA
./convert_results/MSTCR_04_TRB.txt MSTCR_04_TRB Control TRB
./convert_results/MSTCR_04_TRD.txt MSTCR_04_TRD Control TRD
./convert_results/MSTCR_04_TRG.txt MSTCR_04_TRG Control TRG
./convert_results/MSTCR_05_TRA.txt MSTCR_05_TRA Treatment TRA
./convert_results/MSTCR_05_TRB.txt MSTCR_05_TRB Treatment TRB
./convert_results/MSTCR_05_TRD.txt MSTCR_05_TRD Treatment TRD
./convert_results/MSTCR_05_TRG.txt MSTCR_05_TRG Treatment TRG
./convert_results/MSTCR_06_TRA.txt MSTCR_06_TRA Treatment TRA
./convert_results/MSTCR_06_TRB.txt MSTCR_06_TRB Treatment TRB
./convert_results/MSTCR_06_TRD.txt MSTCR_06_TRD Treatment TRD
./convert_results/MSTCR_06_TRG.txt MSTCR_06_TRG Treatment TRG
./convert_results/MSTCR_07_TRA.txt MSTCR_07_TRA Treatment TRA
./convert_results/MSTCR_07_TRB.txt MSTCR_07_TRB Treatment TRB
./convert_results/MSTCR_07_TRD.txt MSTCR_07_TRD Treatment TRD
./convert_results/MSTCR_07_TRG.txt MSTCR_07_TRG Treatment TRG
./convert_results/MSTCR_08_TRA.txt MSTCR_08_TRA Treatment TRA
./convert_results/MSTCR_08_TRB.txt MSTCR_08_TRB Treatment TRB
./convert_results/MSTCR_08_TRD.txt MSTCR_08_TRD Treatment TRD
./convert_results/MSTCR_08_TRG.txt MSTCR_08_TRG Treatment TRG

count_sum是每个sample的总counts数,用于结果标准化。

name sum
MSTCR_01 182672
MSTCR_02 159604
MSTCR_03 236935
MSTCR_04 169434
MSTCR_05 79721
MSTCR_06 34295
MSTCR_07 45231
MSTCR_08 40237

2.结果标准化(可选)

size=500000 #选择合适的counts_sum进行抽样
name <- read.table("./name.txt")
TCR_split <- function(matrix,output_TRA,output_TRB,output_TRD,output_TRG){
  trsut4_results <- matrix
  TRA_results <- trsut4_results[which(substr(trsut4_results$v,1,4)=="TRAV"),]
  TRB_results <- trsut4_results[which(substr(trsut4_results$v,1,4)=="TRBV"),]
  TRD_results <- trsut4_results[which(substr(trsut4_results$v,1,4)=="TRDV"),]
  TRG_results <- trsut4_results[which(substr(trsut4_results$v,1,4)=="TRGV"),]
  write.table(TRA_results,file = output_TRA,quote = F,sep = "\t",row.names = F)
  write.table(TRB_results,file = output_TRB,quote = F,sep = "\t",row.names = F)
  write.table(TRD_results,file = output_TRD,quote = F,sep = "\t",row.names = F)
  write.table(TRG_results,file = output_TRG,quote = F,sep = "\t",row.names = F)
}
tcr_normalization <- function(trust4_results,size){
  pool <- c()
  for (i in 1:nrow(trust4_results)){
    pool <- append(pool,rep(i,trust4_results[i,"count"]))
  }
  all <- as.numeric(sample(pool,size = size,replace = F))
  count <- table(all)
  trust4_results <- trust4_results[unique(all),]
  trust4_results$count <- count
  trust4_results <- trust4_results[order(trust4_results$count,decreasing = T),]
  rownames(trust4_results) <- 1:nrow(trust4_results)
  return(trust4_results)
}
metadata_all_normal <- data.frame(file_name="",sample_id="",
                                  sample_type=c(rep("Con",n),rep("Bgal",n),rep("AH1",n),rep("A5",n)),
                                  tcr_type="")
metadata_normal <- data.frame(file_name="",sample_id="",
                              sample_type=c(rep("Con",n*4),rep("Bgal",n*4),rep("AH1",n*4),rep("A5",n*4)),
                              tcr_type="")
for (i in 1:nrow(name)){
  input <- paste0("./trust4/",name[i,"V1"],"_report.tsv")
  output <- paste0("./convert_results_normal/",name[i,"V1"],".txt")
  trust4_results <- read.csv(input,sep="\t")
  trust4_results <- trust4_results[,1:8]
  colnames(trust4_results) <- c("count","freq","cdr3nt","cdr3aa","v","d","j","c")
  trust4_results <- trust4_results[-which(trust4_results$cdr3aa=="out_of_frame"),]
  row.names(trust4_results) <- 1:nrow(trust4_results)
  trust4_results <- tcr_normalization(trust4_results,size)
  write.table(trust4_results,file = output,quote = F,sep = "\t",row.names = F)
  output_TRA <- paste0("./convert_results_normal/",name[i,"V1"],"_TRA.txt")
  output_TRB <- paste0("./convert_results_normal/",name[i,"V1"],"_TRB.txt")
  output_TRD <- paste0("./convert_results_normal/",name[i,"V1"],"_TRD.txt")
  output_TRG <- paste0("./convert_results_normal/",name[i,"V1"],"_TRG.txt")
  TCR_split(trust4_results,output_TRA,output_TRB,output_TRD,output_TRG)
  metadata_all_normal[i,"file_name"] <- output
  metadata_all_normal[i,"sample_id"] <- strsplit(name[i,"V1"],"_")[[1]][1]
  metadata_all_normal[i,"tcr_type"] <- "TCR"
  metadata_normal[4*i-3,"file_name"] <- output_TRA
  metadata_normal[4*i-3,"tcr_type"] <- "TRA"
  metadata_normal[4*i-2,"file_name"] <- output_TRB
  metadata_normal[4*i-2,"tcr_type"] <- "TRB"
  metadata_normal[4*i-1,"file_name"] <- output_TRD
  metadata_normal[4*i-1,"tcr_type"] <- "TRD"
  metadata_normal[4*i,"file_name"] <- output_TRG
  metadata_normal[4*i,"tcr_type"] <- "TRG"
  for (k in 3:0){
    metadata_normal[4*i-k,"sample_id"] <- strsplit(name[i,"V1"],"_")[[1]][1]
  }
}
metadata_all_normal$sample_id <- paste0(metadata_all_normal$sample_id,"_",metadata_all_normal$tcr_type)
metadata_normal$sample_id <- paste0(metadata_normal$sample_id,"_",metadata_normal$tcr_type)
write.table(metadata_all_normal,file = "./metadata_all_normal.txt",sep="\t",row.names = F,quote = F)
write.table(metadata_normal,file = "./metadata_normal.txt",sep="\t",row.names = F,quote = F)

3.免疫组库多样性(Richness/Diversity/Clonality)的计算及可视化

3.1计算免疫组库多样性
metadata <- read.csv("./metadata.txt",sep = "\t")
Shannon_index <- function(list_p){
  sum=0
  for (p in list_p){
    sum <- p*log(p) + sum
  }
  H <- -sum
  return(H)
}
clonality <- function(diversity,richness){
  E<- diversity/log(richness)
  C <- 1-E
  return(C)
}
file_path <- metadata$file_name
results <- data.frame(sample=metadata$sample_id)
for (i in 1:length(file_path)){
  df <- read.csv(file = file_path[i],
                 stringsAsFactors = F,sep = "\t")
  temp <- try(list_p <- df[,"freq"])
  if ('try-error' %in% class(temp)){next}
  results[i,"richness"] <- nrow(df)
  results[i,"diversity"] <- Shannon_index(list_p)
  results[i,"clonality"] <- clonality(Shannon_index(list_p),nrow(df))
}
write.table(results,"./results/basic.txt",sep = "\t",row.names = F,quote = F)
3.2使用vdjtools计算多样性
java -jar e:/vdjtools/vdjtools-1.2.1.jar CalcDiversityStats -m .\metadata.txt .\results\

vdjtools的使用参考:https://www.jianshu.com/p/ea7edd430a5a

inputpath <- "./results/"
outputpath <- "./results/"
df <- read.csv(paste0(inputpath,"diversity.strict.exact.txt"), sep = "\t",stringsAsFactors = FALSE)
df$real_diversity <- log(df$shannonWienerIndex_mean)
df$clonality <- 1 - df$normalizedShannonWienerIndex_mean
for (k in c("TRA","TRB","TRD","TRG")){
  work_df <- df[which(df$tcr_type==k),]
  sample_name <- work_df$sample_id
  richness<- work_df$observedDiversity_mean
  diversity <- work_df$real_diversity
  clonality <- work_df$clonality
  work_df <- data.frame(sample_name,richness,diversity,clonality)
  write.table(work_df,file = paste0(outputpath,k,"-Basic.txt"),row.names = F,quote = F,sep="\t")
}

TRB-Basic.txt

sample_name richness diversity clonality
MSTCR_01_TRB 35170 9.89085335938176 0.0551297471186711
MSTCR_02_TRB 29431 9.68714727061554 0.0585683225389396
MSTCR_03_TRB 50768 10.3132468709197 0.0481563088949662
MSTCR_04_TRB 34260 9.89423352728728 0.0524338444699065
MSTCR_05_TRB 7236 7.96452265160162 0.103782995009542
MSTCR_06_TRB 3534 7.2072238519678 0.117862903222087
MSTCR_07_TRB 3423 7.12183471831036 0.124896027125899
MSTCR_08_TRB 4354 7.35997369287484 0.121600997692666
#可视化
rm(list = ls())
inputpath <- "./results/"
outputpath <- "./output/"
library(ggplot2)
library(reshape2)
library(ggpubr)
df <- read.csv(paste0(inputpath,"diversity.strict.exact.txt"), sep = "\t",stringsAsFactors = FALSE)
df$real_diversity <- log(df$shannonWienerIndex_mean)
df$clonality <- 1 - df$normalizedShannonWienerIndex_mean
tcr_type <- "TRB"
work_df <- df[which(df$tcr_type==tcr_type),]
sample_name <- work_df$sample_id
Group <- work_df$sample_type
richness<- work_df$observedDiversity_mean
diversity <- work_df$real_diversity
clonality <- work_df$clonality
work_df <- data.frame(Group,richness,diversity,clonality)
work_df$Group <- factor(work_df$Group)  
work_res<- melt(work_df, id.vars = "Group")
work_richness <- work_res[which(work_res$variable=="richness"),]
work_diversity <- work_res[which(work_res$variable=="diversity"),]
work_clonality <- work_res[which(work_res$variable=="clonality"),]

p1 <- ggboxplot(work_diversity, x = "Group", y = "value",
                legend = "right",color = "Group", palette = "nejm",
                add = "jitter",add.params=list(color = "Group",size=3),
                title = "Diversity",  xlab = "",size = 0.5,width =0.5,
                ylab = paste0("Shannon Diversity Index of \n ",tcr_type," repertoire")) +
  ylim(c(0,max(work_diversity$value)+min(work_diversity$value)))+
  theme(axis.text.x =element_text(size=13), axis.title.y=element_text(size=16,face="bold"))
p1 + stat_compare_means(aes(group = work_df$Group),
                        label.x = 1.5,
                        label.y = max(work_diversity$value)+min(work_diversity$value),
                        label = "p.signif",
                        method = 'wilcox.test',hide.ns = TRUE,size=8)  
ggsave(paste0(outputpath,paste0(tcr_type,"-Diversity.pdf")),dpi = 600,width = 8,height = 8)
dev.off()
p2 <- ggboxplot(work_richness, x = "Group", y = "value",
                legend = "right",color = "Group", palette = "nejm",
                add = "jitter",add.params=list(color = "Group",size=3),
                title = "Richness",  xlab = "",size = 0.5,width =0.5,
                ylab = paste0("Richness of \n ",tcr_type," repertoire"))+
  ylim(c(0,max(work_richness$value)+min(work_richness$value)))+
  theme(axis.text.x =element_text(size=13), axis.title.y=element_text(size=16,face="bold"))
  
p2 + stat_compare_means(aes(group =work_df$Group),
                        label = "p.signif",label.x = 1.5,
                        label.y = max(work_richness$value)+min(work_richness$value),
                        method = 'wilcox.test',hide.ns = TRUE,size=8) 
ggsave(paste0(outputpath,paste0(tcr_type,"-Richness.pdf")),dpi = 600,width = 8,height = 8)
dev.off()
p3 <- ggboxplot(work_clonality, x = "Group", y = "value",
                legend = "right",color = "Group", palette = "nejm",
                add = "jitter",add.params=list(color = "Group",size=3),
                title = "Clonality",  xlab = "",size = 0.5,width =0.5,
                ylab = paste0("Clonality Index of \n ",tcr_type," repertoire"))+
  ylim(c(0,max(work_clonality$value)+min(work_clonality$value)))+
  theme(axis.text.x =element_text(size=13), axis.title.y=element_text(size=16,face="bold"))
p3 + stat_compare_means(aes(group =work_df$Group),  
                        label = "p.signif",label.x = 1.5,
                        label.y = max(work_clonality$value)+min(work_clonality$value),
                        method = 'wilcox.test',hide.ns = TRUE,size=8) 
ggsave(paste0(outputpath,paste0(tcr_type,"-Clonality.pdf")),dpi = 600,width = 8,height = 8)
dev.off()

4.CDR3 length distribution analysis

rm(list = ls())
library(ggplot2)
library(reshape2)
library(ggpubr)
lenth_distribution <- function(filepath,all_length,id){
  df <- read.csv(filepath,sep="\t",stringsAsFactors = F)
  for (i in 1:nrow(df)){
    cdr3 <- df[i,"cdr3aa"]
    all_length[as.character(nchar(cdr3)),id] <- all_length[as.character(nchar(cdr3)),id]+1
  }
  return(all_length)
}
all_lenth_distribution <- function(file_list,id_list,group_list){
  all_length <- data.frame()
  for (i in 1:length(file_list)){
    df <- read.csv(file_list[i],sep="\t",stringsAsFactors = F)
    for (k in 1:nrow(df)){
      cdr3 <- df[k,"cdr3aa"]
      all_length[as.character(nchar(cdr3)),id_list[i]] <- 0
    }
  }
  all_length[is.na(all_length)] <- 0
  for (i in 1:length(file_list)){
    all_length <- lenth_distribution(file_list[i],all_length,id_list[i])
    all_length["sample_type",id_list[i]]=group_list[i]
  }
  return(all_length)
}
name <- read.table("./metadata.txt",sep = "\t",header = 1)

work_tcr_type <- "TRA" #更改这里为所需要分析的链

file_list <- name$file_name[which(name$tcr_type==work_tcr_type)]
id_list <- name$sample_id[which(name$tcr_type==work_tcr_type)]
group_list <- c(rep("Control"4),rep("Treatment",4))
all_length <- all_lenth_distribution(file_list,id_list,group_list)
#---------absolute----------------#
#使用绝对数值进行绘图
if(F){
temp <- all_length[-nrow(all_length),]
temp <- temp[order(as.numeric(rownames(temp))),]
all_length <- rbind(temp,all_length[nrow(all_length),])}

#------------percent----------#
#使用占比百分比进行绘图
if(T){
temp <- all_length[-nrow(all_length),]
temp=as.data.frame(lapply(temp,as.numeric),row.names = rownames(temp))
colSums(temp)
temp <- data.frame(apply(temp,2,function(x) x/sum(x)))
temp <- temp[order(as.numeric(rownames(temp))),]
all_length <- rbind(temp,all_length[nrow(all_length),])}


####---check this and change--###
all_length <- all_length[c(6:14,nrow(all_length)),]#挑选合适的长度区间进行绘图


#draw picture
df <- all_length
work_df <- data.frame(t(df))
df <- melt(work_df, id="sample_type", variable.name="length",value.name = "count")
df$length <- substring(df$length,2)
df$count <- as.numeric(df$count)
df$length <- as.numeric(df$length)
df <- df[order(df$length),]
df$length <- as.factor(df$length)
mean <- aggregate(df$count, by=list(df$length,df$sample_type), FUN=mean)
sd <- aggregate(df$count, by=list(df$length, df$sample_type), FUN=sd)
len <- aggregate(df$count, by=list(df$length, df$sample_type), FUN=length)
df_res <- data.frame(mean, sd=sd$x, len=len$x)
colnames(df_res) = c("CDR3_Length", "Group", "Mean", "Sd", "Count")
df_res$Se <- df_res$Sd/sqrt(df_res$Count)
ggplot(df_res, mapping = aes(x=CDR3_Length, y=Mean, fill=Group,group=Group,color=Group)) +
  labs(xlab = "CDR3 Length",ylab="Mean",title =paste0("Distribution of ",work_tcr_type," CDR3 Length"))+
  geom_bar(stat="identity", position=position_dodge(),color="black", width=.8) +
  geom_errorbar(aes(ymin=Mean-Se, ymax=Mean +Se),
                position=position_dodge(.8), width=.2,color="black") +
  theme(axis.text.x = element_text(size = 14, color = "black"))+#设置x轴字体大小
  theme(axis.text.y = element_text(size = 14, color = "black"))+#设置y轴字体大小
  theme(title=element_text(size=13))+#设置标题字体大小
  theme_bw()+
  geom_line(size=0.5,position = position_dodge(0.8))+
  scale_y_continuous(expand = c(0,0),limits = c(0,0.3)) #注意更改Y轴范围
ggsave(paste0("./output/CDR3_lenth",work_tcr_type,".pdf"),width = 10,height = 8)

#堆叠图
library(ggsci)
ggplot(df_res,mapping = aes(x=Group,y=Mean,fill=CDR3_Length))+
  geom_col(position = 'stack', width = 0.6)+
  scale_color_aaas()+
  theme_bw()+scale_y_continuous(expand = c(0,0))
ggsave(paste0("./output/CDR3_lenth",work_tcr_type,"_stack.pdf"),width = 7,height = 7) 

5.Gene usage analysis

java -jar e:/vdjtools/vdjtools-1.2.1.jar CalcSegmentUsage -m .\metadata.txt .\results\
rm(list=ls())
library(RColorBrewer)
library(pheatmap)
for (region in c("V","J")){
  for (type in c("TRA","TRB","TRD","TRG")){
    file_name <- paste0("./results/segments.wt.",region,".txt")
    df <- read.csv(file_name,sep="\t",row.names = 1)
    work_tcr_type <- type
    work_df <- df[which(df$tcr_type==work_tcr_type),]
    work_df <- work_df[,-1:-2]
    work_df <- t(work_df)
    work_df <- work_df[which(rowSums(work_df)>0),]
    work_df <- work_df*100
    #---------------------------------------------------------#
    annotation_col <- data.frame(c(rep("Control",4),rep("Treatment",4)))
    #---------------------------------------------------------#
    rownames(annotation_col) <- colnames(work_df)
    colnames(annotation_col) <- "Type"
    #coul <- colorRampPalette(brewer.pal(9, "OrRd"))(50)
    coul <-  colorRampPalette(colors = c("blue","white","red"))(100)
    pheatmap(work_df,
             filename = paste0("./output/",work_tcr_type,"_GeneUsage_",region,".pdf"),
             scale = "row",
             annotation_col = annotation_col,cluster_cols = FALSE,
             height = 15,width = 10,cutree_col =2,display_numbers = FALSE,labels_col = "",color = coul)
  }
}

6.氨基酸占比分析

metadata <- read.table("./metadata.txt",sep="\t",header = 1)
chr_freq <- function(str,chr){
  df <- data.frame()
  for (i in 1:nchar(str)){
    df[i,"cdr3"] <- substr(str,i,i)
  }
  p <- table(df)[chr]/nchar(str)
  return(p)
}
aa_freq <- function(df,chr){
  p <- c()
  for (i in 1:nrow(df)){
    cdr3 <- df[i,"cdr3aa"]
    p[i] <- chr_freq(cdr3,chr)
  }
  p_mean <- mean(p,na.rm = T)
  return(p_mean)
}
aa_name <- c("A","R","N","D","C","Q","E","G","H","I","L","K","M","F","P","S","T","W","Y","V")#要分析的氨基酸
file_list <- metadata$file_name
sample_id <- metadata$sample_id
p=data.frame()
for (i in 1:length(file_list)){
  print(paste0(file_list[i],"---running!"))
  df <- read.csv(file_list[i],sep="\t",stringsAsFactors = F)
  for (aa in aa_name){
    print(paste0(aa,"-------runnning!----------"))
    p[sample_id[i],aa] <- aa_freq(df,aa)
  }
}
p1 <- p*100
write.table(p1,file="./results/aa_frequency.txt",sep="\t",row.names = T,quote = F)
library(ggplot2)
library(reshape2)
library(ggpubr)
aa_fre <- read.csv("./results/aa_frequency.txt",sep="\t",row.names = 1)
aa_fre$Sample_type <- c(rep("Control",16),rep("Treatment",16))
aa_fre$tcr_type <- rep( c("TRA","TRB","TRD","TRG"),4)
aa_fre$Sample_type <- factor(aa_fre$Sample_type)
aa_fre$tcr_type <- factor(aa_fre$tcr_type)

for (tcr in c("TRA","TRB","TRD","TRG")){
  aa_fre_work <- aa_fre[which(aa_fre$tcr_type==tcr),]
  aa_fre_work <- aa_fre_work[,-ncol(aa_fre_work)]
  work_res <- melt(aa_fre_work,id.vars = "Sample_type")
  for (i in 1:(ncol(aa_fre_work)-1)){
    temp <- work_res[which(work_res$variable==colnames(aa_fre)[i]),]
    p1 <- ggpaired(temp, x = "Sample_type", y = "value",
                   line.color = "gray", line.size = 0.5,
                    legend = "right",color = "Sample_type", palette = "nejm",
                    add = "jitter",add.params=list(color = "Sample_type",size=3),
                    title = colnames(aa_fre)[i],  xlab = "",size = 0.5,width =0.5,
                    ylab = paste0("Percentage of ",tcr," aa frequency")) +
      theme(axis.text.x =element_text(size=13), axis.title.y=element_text(size=16,face="bold"))+
      theme(legend.position = 'none')
    p1 + stat_compare_means(aes(group = Sample_type),
                            label.x = 1.5,label = "p.signif",
                           # comparisons = list(c("Control","Treatment")),
                            method = 'wilcox.test',hide.ns = TRUE,size=8)
    ggsave(paste0("./output/aa_fre/",tcr,"_",colnames(aa_fre_work)[i],".png"))
  }
}
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 216,402评论 6 499
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 92,377评论 3 392
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 162,483评论 0 353
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 58,165评论 1 292
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 67,176评论 6 388
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 51,146评论 1 297
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 40,032评论 3 417
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,896评论 0 274
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 45,311评论 1 310
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,536评论 2 332
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,696评论 1 348
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 35,413评论 5 343
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 41,008评论 3 325
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,659评论 0 22
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,815评论 1 269
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 47,698评论 2 368
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,592评论 2 353

推荐阅读更多精彩内容