说在前面
Immugent在上一期推文中:ProjecTILs:一站式解决肿瘤和感染模型中T细胞的注释,大致介绍了ProjecTILs的背景知识和功能,从本期开始将通过实操对ProjecTILs的分析技巧进行演示。
在这期推文中,Immugent将通过代码演示建立一个整合scRNA-seq分析工作的流程,来解读MC38小鼠结肠癌模型中肿瘤浸润T细胞的转录和克隆分型情况。
要想复现下面代码需要安装下列三个R包:Seurat,ProjecTILs,scRepertoire。
代码实现
首先是需要准备输入的单细胞转录组数据,这里用的是示例数据,大家也可以直接用自己的数据。
library(gridExtra)
library(ggplot2)
library(plotly)
library(ProjecTILs)
library(scRepertoire)
files <- c("E-MTAB-7919.processed.1.zip", "E-MTAB-7919.processed.2.zip", "E-MTAB-7919.processed.3.zip")
matrix_dir <- "./input/Xiong_TIL/matrix"
system(sprintf("mkdir -p %s", matrix_dir))
for (i in 1:length(files)) {
data_path <- sprintf("https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-7919/%s",
files[i])
system(sprintf("curl %s --output %s/data.zip", data_path, matrix_dir))
system(sprintf("unzip -o %s/data.zip -d %s", matrix_dir, matrix_dir))
}
system(sprintf("rm %s/data.zip", matrix_dir))
projectID <- "Xiong_TIL"
libIDtoSampleID <- c("Mouse 1", "Mouse 2", "Mouse 3", "Mouse 4")
names(libIDtoSampleID) <- 4:7
exp_mat <- Read10X(matrix_dir)
querydata <- CreateSeuratObject(counts = exp_mat, project = projectID, min.cells = 3,
min.features = 50)
querydata$Sample <- substring(colnames(querydata), 18)
table(querydata$Sample)
querydata$SampleLabel <- factor(querydata$Sample, levels = c(4:7), labels = libIDtoSampleID)
table(querydata$SampleLabel)
#Mouse 1 Mouse 2 Mouse 3 Mouse 4
#1759 1746 1291 1386
接下来还需要相应的TCR数据。
data_path <- "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-7918/E-MTAB-7918.processed.1.zip"
tcr_dir <- "./input/Xiong_TIL/TCR"
system(sprintf("mkdir -p %s", tcr_dir))
system(sprintf("curl %s --output %s/data.zip", data_path, tcr_dir))
system(sprintf("unzip -o %s/data.zip -d %s", tcr_dir, tcr_dir))
libIDtoSampleID_VDJ <- 4:7
names(libIDtoSampleID_VDJ) <- 35:38
vdj.list <- list()
for (i in 1:length(libIDtoSampleID_VDJ)) {
s <- names(libIDtoSampleID_VDJ)[i]
vdj.list[[i]] <- read.csv(sprintf("%s/filtered_contig_annotations_%s.csv", tcr_dir,
s), as.is = T)
# Rename barcodes to match scRNA-seq suffixes
vdj.list[[i]]$barcode <- sub("\\d$", "", vdj.list[[i]]$barcode)
vdj.list[[i]]$barcode <- paste0(vdj.list[[i]]$barcode, libIDtoSampleID_VDJ[i])
vdj.list[[i]]$raw_clonotype_id <- paste0(vdj.list[[i]]$raw_clonotype_id, "-",
libIDtoSampleID_VDJ[i])
vdj.list[[i]]$SampleLabel <- libIDtoSampleID_VDJ[i]
}
# Using parameters removeNA=T and removeMulti=T will remove cells with multiple
# a-b combinations
combined <- combineTCR(vdj.list, samples = libIDtoSampleID_VDJ, ID = names(libIDtoSampleID_VDJ),
cells = "T-AB", removeNA = T, removeMulti = T)
for (i in seq_along(combined)) {
combined[[i]] <- stripBarcode(combined[[i]], column = 1, connector = "_", num_connects = 3)
}
querydata <- combineExpression(combined, querydata, cloneCall = "gene", groupBy = "none")
数据准备完成后,下面就开始运行ProjecTILs。
ref <- load.reference.map()
query.projected <- make.projection(querydata, ref = ref, ncores = 2)
p1 <- plot.projection(ref)
p2 <- plot.projection(ref, query.projected, linesize = 0.5, pointsize = 0.5)
grid.arrange(p1, p2, ncol = 2)
plots <- list()
sample_names <- unique(query.projected$SampleLabel)
for (sample_i in seq_along(sample_names)) {
sample <- sample_names[sample_i]
plots[[sample_i]] <- plot.projection(ref, query.projected[, query.projected$SampleLabel ==
sample]) + ggtitle(sample)
}
grid.arrange(grobs = plots, ncol = 2)
query.projected <- cellstate.predict(ref = ref, query = query.projected)
table(query.projected$functional.cluster) #Cell state assignment is stored in the 'functional.cluster' metadata field
plot.statepred.composition(ref, query.projected, metric = "Percent")
plot.states.radar(ref = ref, query.projected, min.cells = 30)
levs <- c("Hyperexpanded (100 < X <= 500)", "Large (20 < X <= 100)", "Medium (5 < X <= 20)",
"Small (1 < X <= 5)", "Single (0 < X <= 1)", NA)
palette <- colorRampPalette(c("#FF4B20", "#FFB433", "#C6FDEC", "#7AC5FF", "#0348A6"))
query.projected$cloneType <- factor(query.projected$cloneType, levels = levs)
DimPlot(query.projected, group.by = "cloneType") + scale_color_manual(values = c(palette(5)),
na.value = "grey")
clone_call = "CTaa" #select column/variable to use as clonotypes ID, in this case CTaa, the paired TCR CDR3 aminoacid sequences
cutoff <- 35 #Min cells for clonotype
clonotypeSizes <- sort(table(query.projected[[clone_call]])[table(query.projected[[clone_call]]) >
cutoff], decreasing = T)
bigClonotypes <- names(clonotypeSizes)
plots <- list()
for (i in 1:length(bigClonotypes)) {
ctype <- bigClonotypes[i]
plots[[i]] <- plot.projection(ref, query.projected[, which(query.projected[[clone_call]] ==
ctype)]) + ggtitle(sprintf("%s - size %i", ctype, clonotypeSizes[ctype]))
}
grid.arrange(grobs = plots, ncol = 2)
meta <- melt(table(query.projected@meta.data[!is.na(query.projected@meta.data$Frequency),
c("functional.cluster", "cloneType")]), varnames = c("functional.cluster", "cloneType"))
ggplot(data = meta, aes(x = functional.cluster, y = value, fill = cloneType)) + geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + scale_fill_manual(values = c(palette(5)),
na.value = "grey")
meta.list <- expression2List(query.projected, group = "functional.cluster")
clonalOverlap(meta.list, cloneCall = "gene", method = "morisita") + theme(axis.text.x = element_text(angle = 90,
hjust = 1, vjust = 0.5))
compareClonotypes(meta.list, numbers = 6, samples = c("CD8_EffectorMemory", "CD8_Tpex",
"CD8_Tex", "CD8_NaiveLike", "CD8_EarlyActiv"), cloneCall = "aa", graph = "alluvial") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
clone_call = "CTaa" #select column/variable to use as clonotypes ID, in this case CTaa, the paired TCR CDR3 aminoacid sequences
cutoff <- 35 #Min cells for clonotype
clonotypeSizes <- sort(table(query.projected[[clone_call]])[table(query.projected[[clone_call]]) >
cutoff], decreasing = T)
bigClonotypes <- names(clonotypeSizes)
plots <- list()
for (i in 1:length(bigClonotypes)) {
ctype <- bigClonotypes[i]
plots[[i]] <- plot.projection(ref, query.projected[, which(query.projected[[clone_call]] ==
ctype)]) + ggtitle(sprintf("%s - size %i", ctype, clonotypeSizes[ctype]))
}
grid.arrange(grobs = plots, ncol = 2)
小结
通过这个示例数据(MC38 TILs)在参考图谱上的投射显示,这些肿瘤浸润的T细胞主要由耗竭的(CD8_Tex)、耗竭前体的CD8 T细胞(CD8_Tpex)和典型的高免疫原性(热肿瘤)的Tregs组成。此外,大多数扩展克隆被发现跨越CD8_Tex和CD8_Tpex状态。这是预期的,因为在肿瘤中持续的抗原刺激驱动免疫主导的肿瘤反应性克隆走向衰竭分化路径。在TIL状态注释参考图谱的背景下,结合投射物和scRepertoire简化了单细胞表达数据和克隆型分析的联合分析。
好啦,ProjecTILs第一期的实操部分到这就结束啦,欢迎大家继续关注!