Xcell实战

xCell is a recently published method based on ssGSEA that estimates the abundance scores of 64 immune cell types, including adaptive and innate immune cells, hematopoietic progenitors, epithelial cells, and extracellular matrix cells

xcell 是基于ssGSEA(single-sample GSEA)
ssGSEA顾名思义是一种特殊的GSEA,它主要针对单样本无法做GSEA而提出的一种实现方法,原理上与GSEA是类似的,不同的是GSEA需要准备表达谱文件即gct,根据表达谱文件计算每个基因的rank值
参考网址https://shengxin.ren/article/403https://support.bioconductor.org/p/98463/

关于Xcell找对网址很重要,我一开始找错了地方

https://github.com/dviraran/xCell
首先看read.me 很开心是我要的东西

image.png

安装这个之前经常报错,要安装很多别的辅助包

install.packages('Rcpp')#########安装各类程序包
devtools::install_github('dviraran/xCell')
image.png

安装的时候还会有错误。


安装好的这一刻,还是很开心的。

image.png

使用方法

第一步 计算xCell

library(xCell)
exprMatrix = read.table(file = '/Users/chenyuqiao/Desktop/TCGA-LUAD.htseq_counts.tsv',header=TRUE,row.names=1, as.is=TRUE)
xCellAnalysis(exprMatrix)
data imput
library(xCell)
exprMatrix = read.table(file = '/Users/chenyuqiao/Desktop/TCGA-LUAD.htseq_counts.tsv',header=TRUE,row.names=1, as.is=TRUE)

###exprMatrix<- exprMatrix[1:10,1:10]
Ensemble_ID<- rownames(exprMatrix)
ID<- strsplit(Ensemble_ID, "[.]")
str(ID)
IDlast<- sapply(ID, "[", 1)
exprMatrix$Ensemble_ID<- IDlast
row.names(exprMatrix)<- exprMatrix$Ensemble_ID
save(exprMatrix, file = 'TCGA.Rdata')
load(file = 'TCGA.Rdata')


####library(clusterProfiler)
library(org.Hs.eg.db)
ls("package:org.Hs.eg.db")
g2s=toTable(org.Hs.egSYMBOL);head(g2s)
g2e=toTable(org.Hs.egENSEMBL);head(g2e)
tmp=merge(g2e,g2s,by='gene_id')
head(tmp)
colnames(exprMatrix)[ncol(exprMatrix)] <- c("ensembl_id")###################重命名Ensemble_ID 便于后面merge
exprMatrix[1:4,1:4]
exprMatrix<- merge(tmp,exprMatrix,by='ensembl_id')
exprMatrix[1:4,1:4]
exprMatrix<- exprMatrix[,- c(1,2)]
exprMatrix=exprMatrix[!duplicated(exprMatrix$symbol),]
row.names(exprMatrix)<- exprMatrix[,1]
exprMatrix<- exprMatrix[,-1]
exprMatrix[1:4,1:4]
xCellAnalysis(exprMatrix)####################一句话就分析完成了
##save(results,file = 'Xcell_result.Rdata')#############需要重新修改

第二步:批量生存分析

load(file = 'Xcell_result.Rdata')
result<- as.data.frame(result)
library(dplyr)
library(tidyverse)

TCGA.LUAD.GDC_phenotype <- read.delim("TCGA-LUAD.GDC_phenotype.tsv")

#colnames(TCGA.LUAD.GDC_phenotype)
#head(TCGA.LUAD.GDC_phenotype)

LUAD_Pheno<- select(TCGA.LUAD.GDC_phenotype, "submitter_id.samples", "vital_status.diagnoses", "days_to_death.diagnoses", "days_to_last_follow_up.diagnoses", "pathologic_N", "pathologic_M", "days_to_new_tumor_event_after_initial_treatment")
LUAD_Pheno<- LUAD_Pheno[grep('01A',LUAD_Pheno$submitter_id.samples),]  #####只筛选01A的   01A代表肿瘤
LUAD_Pheno[is.na(LUAD_Pheno)]<- 0
LUAD_Pheno$PFS_status<- ifelse((LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment == 0 & LUAD_Pheno$days_to_death.diagnoses == 0), 0,1)
##################################
LUAD_Pheno$OS<- ifelse(LUAD_Pheno$days_to_last_follow_up.diagnoses > LUAD_Pheno$days_to_death.diagnoses, LUAD_Pheno$days_to_last_follow_up.diagnoses,LUAD_Pheno$days_to_death.diagnoses)
LUAD_Pheno$PFS<- ifelse(LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment == 0, LUAD_Pheno$OS ,LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment)
LUAD_Pheno$OS_status<- as.factor(LUAD_Pheno$vital_status.diagnoses)
#############################设计好分组




#############################生存曲线

firstdata<- result  ###############expre
firstdata$ID<- rownames(firstdata)
gene<- row.names(firstdata)
#######select only gene to analysis
library(survminer)
library(survival)
library(ggplot2)
library(dplyr)
for (x in gene) {
  RNA_seq_data<-filter(firstdata, firstdata$ID == x)
  RNA_seq_data<- t(RNA_seq_data)
  RNA_seq_data<- as.data.frame(RNA_seq_data)
  # str(RNA_seq_data)
  # colnames(LUAD_Pheno)
  RNA_seq_data$submitter_id.samples<- row.names(RNA_seq_data)
  colnames(RNA_seq_data)<- c("Expressionvalue","submitter_id.samples")
  LUAD_Pheno$submitter_id.samples<- as.character(LUAD_Pheno$submitter_id.samples)
  LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
  LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
  LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
  LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
  finaldata<- inner_join(LUAD_Pheno,RNA_seq_data, by = "submitter_id.samples")
  finaldata$PFS_status<- as.character(finaldata$PFS_status)
  finaldata$PFS_status<- as.numeric(as.factor(finaldata$PFS_status))
  finaldata$Expressionvalue<- as.numeric(as.character(finaldata$Expressionvalue))
  finaldata$group<- ifelse(finaldata$Expressionvalue>median(finaldata$Expressionvalue),'high','low')
  library(survminer)
  library(survival)
  fit <- survfit(Surv(finaldata$PFS,finaldata$PFS_status)~finaldata$group, data=finaldata) 
  summary(fit)
  pp<- ggsurvplot(fit, data = finaldata, conf.int = F, pval = TRUE,
                  xlim = c(0,2000), # present narrower X axis, but not affect
                  # survival estimates. 
                  xlab = "Time in days", # customize X axis label. 
                  break.time.by = 200) # break X axis in time intervals by 500. 
  ggsave(filename = paste("plot_",x,".pdf",sep = ""))
  print(x)
}
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • 前些日子从@张鑫旭微博处得一份推荐(Front-end-tutorial),号称最全的资源教程-前端涉及的所有知识...
    谷子多阅读 4,277评论 0 44
  • 写作如同挠痒,首先要找对位置,比如左背痒你不能挠到右腰去。所以一定要抓住主题,不能跑偏。再个就是要拿捏好轻重,找准...
    群星咖啡馆阅读 694评论 0 0
  • 上篇提到了投资的第一性原理,即我们在投资时应该秉承的理念是什么? 本篇来谈,我们如何在投资时,去找到那些好的资产,...
    骏少的宅院阅读 488评论 0 2
  • 傍晚时分遇到了初中的同学,他目前在做宝健养生,开着医疗门诊,还是有国家营养师资格的讲师。好久不见,聊着聊着就发...
    王宇歌阅读 96评论 0 1
  • 一夜春风出新芽, 放眼望出遍地花。 哪是花来哪是叶? 近观细辨认识它! 红黄紫绿争妖艳, 微风轻抚媚人间。 春暖花...
    深知绿叶阅读 335评论 0 2