前言
肿瘤的多组学分析过程,我们常需要从TCGA下载相关的数据,这里我们介绍使用TCGAbiolinks包下载数据的流程。最后数据分装成SummerisedExperiment格式。
代码
library(TCGAbiolinks)
library(dplyr)
library(SummarizedExperiment)
cancer_type <- "TCGA-CESC"
clinical <- GDCquery_clinic(project = cancer_type,
type = "clinical")
write.csv(clinical, "TCGA-CESC_clincal.csv", row.names = F)
# download omics-data
get_OmicsData <- function(project = cancer_type,
Outdir = "mRNA"){
if(Outdir == "mRNA"){
query_Data <- GDCquery(project = project,
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - Counts")
}else if(Outdir == "miRNA"){
query_Data <- GDCquery(project = project,
data.category = "Transcriptome Profiling",
data.type = "miRNA Expression Quantification",
workflow.type = "BCGSC miRNA Profiling")
}else if(Outdir == "CNV"){
query_Data <- GDCquery(project = project,
data.category = "Copy Number Variation",
data.type = "Copy Number Segment")
}else if(Outdir == "DNA_Methylation"){
query_Data <- GDCquery(project = project,
data.category = "DNA methylation",
legacy = TRUE)
}
GDCdownload(query = query_Data,
method = "api",
files.per.chunk = 60,
directory = Outdir)
expdat <- GDCprepare(query = query_Data,
directory = Outdir)
return(expdat)
}
dat_mRNA <- get_OmicsData(project = cancer_type,
Outdir = "mRNA")
saveRDS(dat_mRNA, file = "TCGA-CESC_mRNA.RDS")
dat_miRNA <- get_OmicsData(project = cancer_type,
Outdir = "miRNA")
saveRDS(dat_miRNA, file = "TCGA-CESC_miRNA.RDS")
dat_CNV <- get_OmicsData(project = cancer_type,
Outdir = "CNV")
saveRDS(dat_CNV, file = "TCGA-CESC_CNV.RDS")
dat_methy <- get_OmicsData(project = cancer_type,
Outdir = "DNA_Methylation")
saveRDS(dat_methy, file = "TCGA-CESC_Methylation.RDS")