0.背景知识
TIDE评分越高越容易发生免疫逃逸,免疫治疗获益的可能性就越低,评分>0视为无响应,<0视为有反应。
只有网页工具和python版。网页工具需要注册登陆使用,普通邮箱注册就可以。
https://github.com/liulab-dfci/TIDEpy
1.亚型
可以是现成的:
TCGA的亚型数据下载链接: https://tcga-pancan-atlas-hub.s3.us-east-1.amazonaws.com/download/TCGASubtype.20170308.tsv.gz
也可以是自己聚类得到的,例如
实战-TCGA数据的NMF聚类和可视化
TCGA数据的一致性聚类实战和可视化
有人把亚型分析做成了一站式R包
多组学、多算法聚类神器-MOVICS
rm(list = ls())
library(stringr)
sub = rio::import("TCGASubtype.20170308.tsv.gz")
k = stringr::str_starts(sub$Subtype_Selected,"ACC");table(k)
## k
## FALSE TRUE
## 7643 91
sub = sub[k,]
table(sub$Subtype_Selected)
##
## ACC.CIMP-high ACC.CIMP-intermediate ACC.CIMP-low
## 20 27 32
## ACC.NA
## 12
k2 = sub$Subtype_Selected!="ACC.NA";table(k2)
## k2
## FALSE TRUE
## 12 79
sub = sub[k2,]
sub = data.frame(row.names = sub$sample,
subtype = str_remove_all(sub$Subtype_Selected,"ACC.CIMP-| "))
head(sub)
## subtype
## TCGA-OR-A5J1-01 high
## TCGA-OR-A5J2-01 low
## TCGA-OR-A5J3-01 intermediate
## TCGA-OR-A5J4-01 high
## TCGA-OR-A5J5-01 intermediate
## TCGA-OR-A5J6-01 low
搞成行名是样本名称,内容是亚型的格式即可。
2.表达矩阵
load("D:/TCGA_RNA_seq/count/acc_exp.Rdata")
acc[1:3,1:3]
## TCGA-OR-A5JD-01A-11R-A29S-07 TCGA-OR-A5L5-01A-11R-A29S-07
## ENSG00000000003.15 2086 3813
## ENSG00000000005.6 2 3
## ENSG00000000419.13 2086 2909
## TCGA-OR-A5KX-01A-11R-A29S-07
## ENSG00000000003.15 2145
## ENSG00000000005.6 2
## ENSG00000000419.13 2546
library(tinyarray)
exp = trans_exp_new(acc)
table(make_tcga_group(exp)) #都是tumor,不是的话要去除normal样本
##
## normal tumor
## 0 79
#exp = exp[,make_tcga_group(exp)=="tumor"]
3.匹配表达矩阵与亚型数据的样本顺序
head(colnames(exp))
## [1] "TCGA-OR-A5JD-01A-11R-A29S-07" "TCGA-OR-A5L5-01A-11R-A29S-07"
## [3] "TCGA-OR-A5KX-01A-11R-A29S-07" "TCGA-OR-A5JY-01A-31R-A29S-07"
## [5] "TCGA-OR-A5JV-01A-11R-A29S-07" "TCGA-PK-A5H8-01A-11R-A29S-07"
head(rownames(sub))
## [1] "TCGA-OR-A5J1-01" "TCGA-OR-A5J2-01" "TCGA-OR-A5J3-01" "TCGA-OR-A5J4-01"
## [5] "TCGA-OR-A5J5-01" "TCGA-OR-A5J6-01"
colnames(exp) = str_sub(colnames(exp),1,15)
s = intersect(colnames(exp),rownames(sub));length(s)
## [1] 78
exp = exp[,s]
sub = sub[s,,drop=F]
4.将表达矩阵进行标准化并导出
TIDE首页有明显的提示:
Note: The gene expression value should be normalized toward a control sample which could be either normal tissues related with a cancer type or mixture sample from diverse tumor samples. The log2(RPKM+1) values from a RNA-seq experiment may not be meaningful unless a good reference control is available to adjust the batch effect and cancer type difference. In our study, we used the all sample average in each study as the normalization control.
最后一句话说“使用每个研究中的所有样本平均值作为归一化对照” 代码是:
exp2 <- sweep(exp,1, apply(exp,1,mean,na.rm=T))
write.table(exp2,"TIDE.txt",sep = "\t",quote = F)
5.读取结果并作图
将这个文件上传的TIDE,得到的结果是tide.csv
res <- read.csv("tide.csv",row.names = 1,check.names = F)
res[1:4,1:4]
## No benefits Responder TIDE IFNG
## TCGA-OR-A5J9-01 False False 0.90 -1353.97
## TCGA-P6-A5OF-01 False False 0.68 -818.80
## TCGA-OR-A5KV-01 False False 0.66 -1885.47
## TCGA-OR-A5JF-01 False False 0.64 -1489.63
res = merge(res,sub,by = "row.names")
table(res$Responder,res$subtype)
##
## high intermediate low
## False 11 11 14
## True 8 16 18
f = fisher.test(table(res$subtype,res$Responder))
label = paste("fisher.test p value =",round(f$p.value,3))
label
## [1] "fisher.test p value = 0.525"
fisher.test用来检验subtype和Responder是否相关,p<0.05表示相关
很不幸这个例子是不相关滴。
5.画图
TIDE列是TIDE分数。Responder是免疫治疗是否响应
5.1 TIDE评分柱状图
library(ggplot2)
library(dplyr)
res = arrange(res,desc(TIDE))
p1 = ggplot(res, aes(x = 1:nrow(res),
y = TIDE,
fill = Responder)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("#f87669","#2fa1dd"))+
xlab("patient")+
annotate("text", x = 40, y = 1, label = label,size = 5) +
theme_minimal()
5.2.免疫反应与亚型
library(dplyr)
dat = count(res,subtype,Responder)
dat = dat %>% group_by(subtype) %>%
summarise(Responder = Responder,n = n/sum(n))
dat$Responder = factor(dat$Responder,levels = c("False","True"))
dat
## # A tibble: 6 × 3
## # Groups: subtype [3]
## subtype Responder n
## <chr> <fct> <dbl>
## 1 high False 0.579
## 2 high True 0.421
## 3 intermediate False 0.407
## 4 intermediate True 0.593
## 5 low False 0.438
## 6 low True 0.562
library(ggplot2)
p2 = ggplot(data = dat)+
geom_bar(aes(x = subtype,y = n,
fill = Responder),
stat = "identity")+
scale_fill_manual(values = c("#f87669","#2fa1dd"))+
geom_label(aes(x = subtype,y = n,
label = scales::percent(n),
fill = Responder),
color = "white",
size = 4,label.size = 0,
show.legend = F,
position = position_fill(vjust = 0.5))+
theme_minimal()+
guides(label = "none")
library(patchwork)
p1+p2+ plot_layout(widths = c(3,2),guides = "collect")
6.其他几个评分
colnames(res)
## [1] "Row.names" "No benefits" "Responder" "TIDE" "IFNG"
## [6] "MSI Expr Sig" "Merck18" "CD274" "CD8" "CTL.flag"
## [11] "Dysfunction" "Exclusion" "MDSC" "CAF" "TAM M2"
## [16] "subtype"
IFNG:Interferon-gamma,干扰素-γ是一种由免疫细胞,特别是T细胞和自然杀伤细胞产生的细胞因子。 Cytotoxic T Lymphocyte(CTL.flag,细胞毒性T淋巴细胞) T cell dysfunction score(Dysfunction,T细胞功能障碍评分) T cell exclusion score(Exclusion,T细胞排斥评分) cancer-associated fibroblasts (CAF,癌症相关成纤维细胞) myeloid-derived suppressor cells (MDSC,髓源性抑制细胞) M2 macrophages.
详细的分数计算原理在这里: https://liulab-dfci.github.io/RIMA/Response.html
可以作图比较他们
dat = t(res[,c(4:5,8:9,11:15)])
draw_boxplot(dat,res$Responder)+
facet_wrap(~rows,scales = "free")
draw_boxplot(dat,res$subtype)+
facet_wrap(~rows,scales = "free")