照着老大的这篇教程来的TCGA

主要照着老大的这篇教程来的TCGA

https://mp.weixin.qq.com/s/JB_329LCWqo5dY6MLawfEA

生存分析的统计学探究 (老大的)

https://mp.weixin.qq.com/s?__biz=MzAxMDkxODM1Ng==&mid=2247483699&idx=1&sn=56ec5ceef6c8496f2daeb7e4a2412396&scene=21#wechat_redirect

Surv:用于创建生存数据对象

survfit:创建KM生存曲线或是Cox调整生存曲线

survdiff:用于不同组的统计检验

###先把这些代码保存下来
library(survival)
my.surv <- Surv(OS_MONTHS,OS_STATUS==’LIVING’)  ##这个生存对象是看看病人的总生存期与死亡状态的关系
##这个Surv函数第一个参数必须是数值型的时间,第二个参数是逻辑向量,1,0表示死亡与否
kmfit1 <- survfit(my.surv~1) ## 直接对生存对象拟合生存函数
summary(kmfit1)
plot(kmfit1)   ##画出生存曲线
survdiff(my.surv~type, data=dat)     ### 根据生存对象再加上一个分组因子来拟合生存函数,并且比较不同因子分组的生存效果。


library(cgdsr)
mycgds <- CGDS(‘http://www.cbioportal.org/public-portal/‘)
test(mycgds)
all_TCGA_studies <- getCancerStudies(mycgds)
all_tables <- getCaseLists(mycgds, ‘ov_tcga_pub’)
all_dataset<- getGeneticProfiles(mycgds, ‘ov_tcga_pub’)
BRCA1 <- getProfileData(mycgds, my_gene, my_dataset, my_table)
ov_tcga_pub_meth1<- getClinicalData(mycgds, all_tables[8,1])
ov_tcga_pub_meth2<- getClinicalData(mycgds, all_tables[9,1])
ov_tcga_pub_meth3<- getClinicalData(mycgds, all_tables[10,1])
ov_tcga_pub_meth4<- getClinicalData(mycgds, all_tables[11,1])


library(survival)
attach(ov_tcga_pub_meth1)
## 估计KM生存曲线
my.surv <- Surv(OS_MONTHS,OS_STATUS==’LIVING’)
kmfit1 <- survfit(my.surv~1)
summary(kmfit1)
plot(kmfit1)



kmfit2 <- survfit(my.surv~TUMOR_STAGE_2009)
summary(kmfit2)
plot(kmfit2,col = rainbow(length(unique(ov_tcga_pub_meth1[,13]))))
detach(ov_tcga_pub_meth1)


ov_tcga_pub_meth1$sample<- rownames(ov_tcga_pub_meth1)
ov_tcga_pub_meth2$sample<- rownames(ov_tcga_pub_meth2)
ov_tcga_pub_meth3$sample<- rownames(ov_tcga_pub_meth3)
ov_tcga_pub_meth4$sample<- rownames(ov_tcga_pub_meth4)
ov_tcga_pub_meth1$type<- ‘meth1′
ov_tcga_pub_meth2$type<- ‘meth2′
ov_tcga_pub_meth3$type<- ‘meth3′
ov_tcga_pub_meth4$type<- ‘meth4′
dat=rbind(ov_tcga_pub_meth1,ov_tcga_pub_meth2,ov_tcga_pub_meth3,ov_tcga_pub_meth4)
attach(dat)
## 根据分组type估计KM生存曲线
my.surv <- Surv(OS_MONTHS,OS_STATUS==’LIVING’)
kmfit3 <- survfit(my.surv~type)
summary(kmfit3)
plot(kmfit3,col = rainbow(4))


##由图中可以看到甲基化数据分成的4个组,生存率差异还是蛮大的
##用图形方法检验PH假设
plot(kmfit3,fun=’cloglog’,col = rainbow(4))
# 检验显著性
survdiff(my.surv~type, data=dat)
# 用strata来控制协变量的影响
survdiff(my.surv~type+strata(TUMOR_STAGE_2009),data=dat)

2.文章中提到:使用R包cgdsr来下载TCGA的数据http://www.bio-info-trainee.com/1257.html

getCancerStudies,getCaseLists,getGeneticProfiles,getProfileData

TCGA的28篇教程- 使用R语言的RTCGA包获取TCGA数据

https://mp.weixin.qq.com/s/JB_329LCWqo5dY6MLawfEA

1.指定任意基因从任意癌症里面获取芯片表达数据

image-20190110192530175
image-20190110192559307
image-20190110192722155
compare_means(c(GATA3, PTEN, XBP1) ~ dataset, data = expr)
image-20190110180537874
image-20190110184549281
image-20190110192815685

更多可视化见:http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/77-facilitating-exploratory-data-visualization-application-to-tcga-genomic-data/STHDA网址

2.指定任意基因从任意癌症里面获取测序表达数据

image-20190110190332681
image-20190110193528686

3.用全部的rnaseq的表达数据来做主成分分析

image-20190110201803048

4.用5个基因在3个癌症的表达量做主成分分析

image-20190110203621191

5.用突变数据做生存分析

image-20190110210350353

6.多个基因在多种癌症的表达量热图

image-20190110211820768

==代码==

#1.指定任意基因从任意癌症里面获取芯片表达数据
options(BioC_mirror="https://mirrors.ustc.edu.cn/bioc/")
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
library(BiocManager)
BiocManager::install("RTCGA.clinical") 
BiocManager::install("RTCGA.mRNA")
BiocManager::install('RTCGA.mutations')
library(RTCGA.mutations)
expr <- expressionsTCGA(BRCA.mRNA, OV.mRNA, LUSC.mRNA,
                        extract.cols = c("GATA3", "PTEN", "XBP1","ESR1", "MUC1"))
expr
head <- head(expr)
nb_samples <- table(expr$dataset)
nb_samples
expr$dataset <- gsub(pattern = ".mRNA", replacement = "",  expr$dataset)
expr$dataset
expr$bcr_patient_barcode <- paste0(expr$dataset, c(1:590, 1:561, 1:154))
expr$bcr_patient_barcode

library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
# GATA3
ggboxplot(expr, x = "dataset", y = "GATA3",
          title = "GATA3", ylab = "Expression",
          color = "dataset", palette = "jco")

ggboxplot(expr, x = "dataset", y = "PTEN",
          title = "PTEN", ylab = "Expression",
          color = "dataset", palette = "jco")


my_comparisons <- list(c("BRCA", "OV"), c("OV", "LUSC"))#加上Pvalues
ggboxplot(expr, x = "dataset", y = "GATA3",
          title = "GATA3", ylab = "Expression",
          color = "dataset", palette = "jco")+
  stat_compare_means(comparisons = my_comparisons)


compare_means(c(GATA3, PTEN, XBP1) ~ dataset, data = expr)

label.select.criteria <- list(criteria = "`y` > 10 & `x` %in% c('BRCA', 'OV')")
ggboxplot(expr, x = "dataset",
          y = c("GATA3", "PTEN", "XBP1"),
          combine = TRUE,
          color = "dataset", palette = "jco",
          ylab = "Expression", 
          label = "bcr_patient_barcode",              # column containing point labels
          label.select = label.select.criteria,       # Select some labels to display
          font.label = list(size = 9, face = "italic"), # label font
          repel = TRUE                                # Avoid label text overplotting
)


ggboxplot(expr, x = "dataset", y = "GATA3",
          title = "GATA3", ylab = "Expression",
          color = "dataset", palette = "jco",
          rotate = TRUE)

#2.指定任意基因从任意癌症里面获取测序表达数据
library(RTCGA)
library(RTCGA.rnaseq)
expr <- expressionsTCGA(BRCA.rnaseq, OV.rnaseq, LUSC.rnaseq,
                        extract.cols = c("GATA3|2625", "PTEN|5728", "XBP1|7494","ESR1|2099", "MUC1|4582"))
expr


nb_samples <- table(expr$dataset)
nb_samples


library(ggpubr)
# ESR1|2099
ggboxplot(expr, x = "dataset", y = "`PTEN|5728`",
          title = "PTEN|5728", ylab = "Expression",
          color = "dataset", palette = "jco")

#3.用全部的rnaseq的表达数据来做主成分分析
library(RTCGA.rnaseq)
library(dplyr)

expressionsTCGA(BRCA.rnaseq, OV.rnaseq, HNSC.rnaseq) %>%
  dplyr::rename(cohort = dataset) %>%  
  filter(substr(bcr_patient_barcode, 14, 15) == "01") -> BRCA.OV.HNSC.rnaseq.cancer
pcaTCGA(BRCA.OV.HNSC.rnaseq.cancer, "cohort") -> pca_plot
plot(pca_plot)


save(pca_plot,file = "pca-png")

#4.用5个基因在3个癌症的表达量做主成分分析
BiocManager::install('DT')
library(DT)

expr %>%  
  filter(substr(bcr_patient_barcode, 14, 15) == "01") -> rnaseq.5genes.3cancers
DT::datatable(rnaseq.5genes.3cancers)
pcaTCGA(rnaseq.5genes.3cancers, "dataset") -> pca_plot
plot(pca_plot)

#5.用突变数据做生存分析
library(RTCGA.mutations)
# library(dplyr) if did not load at start
library(survminer)
mutationsTCGA(BRCA.mutations, OV.mutations) %>%
  filter(Hugo_Symbol == 'TP53') %>%
  filter(substr(bcr_patient_barcode, 14, 15) ==
           "01") %>% # cancer tissue
  mutate(bcr_patient_barcode =
           substr(bcr_patient_barcode, 1, 12)) ->
  BRCA_OV.mutations
library(RTCGA.clinical)
survivalTCGA(
  BRCA.clinical,
  OV.clinical,
  extract.cols = "admin.disease_code"
) %>%
  dplyr::rename(disease = admin.disease_code) ->
  BRCA_OV.clinical
BRCA_OV.clinical %>%
  left_join(
    BRCA_OV.mutations,
    by = "bcr_patient_barcode"
  ) %>%
  mutate(TP53 =
           ifelse(!is.na(Variant_Classification), "Mut","WILDorNOINFO")) ->
  BRCA_OV.clinical_mutations
BRCA_OV.clinical_mutations %>%
  select(times, patient.vital_status, disease, TP53) -> BRCA_OV.2plot
kmTCGA(
  BRCA_OV.2plot,
  explanatory.names = c("TP53", "disease"),
  break.time.by = 400,
  xlim = c(0,2000),
  pval = TRUE) -> km_plot

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.
print(km_plot)

#6.多个基因在多种癌症的表达量热图
library(RTCGA.rnaseq)
# perfrom plot
# library(dplyr) if did not load at start
expressionsTCGA(
  ACC.rnaseq,
  BLCA.rnaseq,
  BRCA.rnaseq,
  OV.rnaseq,
  extract.cols = 
    c("MET|4233",
      "ZNF500|26048",
      "ZNF501|115560")
) %>%
  dplyr::rename(cohort = dataset,
                MET = `MET|4233`) %>%
  #cancer samples
  filter(substr(bcr_patient_barcode, 14, 15) ==
           "01") %>%
  mutate(MET = cut(MET,
                   round(quantile(MET, probs = seq(0,1,0.25)), -2),
                   include.lowest = TRUE,
                   dig.lab = 5)) -> ACC_BLCA_BRCA_OV.rnaseq
ACC_BLCA_BRCA_OV.rnaseq %>%
  select(-bcr_patient_barcode) %>%
  group_by(cohort, MET) %>%
  summarise_each(funs(median)) %>%
  mutate(ZNF500 = round(`ZNF500|26048`),
         ZNF501 = round(`ZNF501|115560`)) ->
  ACC_BLCA_BRCA_OV.rnaseq.medians
## `summarise_each()` is deprecated.
## Use `summarise_all()`, `summarise_at()` or `summarise_if()` instead.
## To map `funs` over all variables, use `summarise_all()`
heatmapTCGA(ACC_BLCA_BRCA_OV.rnaseq.medians,
            "cohort", "MET", "ZNF500",
            title = "Heatmap of ZNF500 expression")

©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 213,711评论 6 493
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 91,079评论 3 387
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 159,194评论 0 349
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 57,089评论 1 286
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 66,197评论 6 385
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 50,306评论 1 292
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 39,338评论 3 412
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,119评论 0 269
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 44,541评论 1 306
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 36,846评论 2 328
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,014评论 1 341
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 34,694评论 4 337
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 40,322评论 3 318
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,026评论 0 21
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,257评论 1 267
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 46,863评论 2 365
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 43,895评论 2 351

推荐阅读更多精彩内容

  • 最终还是没忍住,报名参加了舒月明的写作班,购买了她提供的阅读计划,她的两节课程…… 我的选择是对的,知道了什么样的...
    曼谷123阅读 203评论 0 0
  • 4月份是“全国爱国卫生运动月”,为了人人养成讲卫生的好习惯,各单位都在加大爱国卫生月卫生宣传的力度,下面是爱国卫生...
    TeacherGao_阅读 377评论 0 0
  • 凯恩斯的“节约贫穷论”的宏观经济学原理,着实让一般人难以理解。其实,我非常支持他的观点,GDP的计算方式决定了高G...
    飞翔雨季阅读 205评论 0 1