# Step2 Grouping by special clinical information --------------------------
if (!file.exists( '.TCGA-BRCA.GDC_phenotype.tsv.gz' )) {
  gzfile <- "./TCGA-BRCA.GDC_phenotype.tsv.gz"
  download.file("https://gdc.xenahubs.net/download/TCGA-BRCA/Xena_Matrices/TCGA-BRCA.GDC_phenotype.tsv.gz", 
                destfile = gzfile)
  phenoData <- read.table( gzfile,
                           header = T,
                           sep = '  ',
                           quote = '' )
  save( phenoData, file = './TCGA-BRCA.GDC_phenotype.Rdata' )
}else{
  load('./TCGA-BRCA.GDC_phenotype.Rdata')
}
pheno_num <- c()
invisible(
  lapply(1:ncol(phenoData), 
         function(col_num){
           ## Assume that the classification project is between 2 and 4   #??
           if (1 < dim(table(phenoData[,4])) & 
               dim(table(phenoData[,col_num])) < 5) {
             pheno_num <<- append(pheno_num, col_num, after = length(pheno_num))
           }
         }
  )
)
View(phenoData[, pheno_num])
names(phenoData[, pheno_num])
## Category 3: TP53     
#SNV( single nucleotide variants)
if (!file.exists( './TCGA-BRCA.mutect2_snv.tsv.gz' )) {
  gzfile <- "./TCGA-BRCA.mutect2_snv.tsv.gz"
  download.file("https://gdc.xenahubs.net/download/TCGA-BRCA/Xena_Matrices/TCGA-BRCA.mutect2_snv.tsv.gz", 
                destfile = gzfile)
  mutype_file <- read.table( gzfile,
                             header = T,
                             sep = '    ',
                             quote = '' )
  save( mutype_file, file = './TCGA-BRCA.mutect2_snv.Rdata' )
}else{
  load('./TCGA-BRCA.mutect2_snv.Rdata')
}
raw_data[1:5, 1:15]
## Pick columns that contains 'tp53'
TP53 <- mutype_file[mutype_file$gene == 'tp53' | mutype_file$gene == 'TP53',]
TP53_sample <- unique( sort( TP53$Sample_ID ) )
tumor_sample <- colnames(raw_data)[substr( colnames(raw_data),14,15) < 10] #find 癌group="01" 
TP53_sample <- intersect(tumor_sample, TP53_sample)
noTP53_sample <- setdiff(tumor_sample, TP53_sample)
save(TP53_sample, noTP53_sample, file = './sample_by_TP53.Rdata')
# Step3 Filt sample ------------------------------------------------
load('./TCGA-BRCA.htseq_counts.Rdata')
tp53_sample <- c(TP53_sample, noTP53_sample)
AssayData <- raw_data[, tp53_sample]
dim(AssayData)
group_list <- c(rep('TP53', length(TP53_sample)),
                rep('NO_TP53', length(noTP53_sample)))
save(AssayData, group_list, file = './tnbc_tumor_TP53_AssayData.Rdata')
R-clinical data
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
推荐阅读更多精彩内容
- 作者编写的一个高效的多媒体支持操作开源库,可多方面的简单配置操作拍照、相册、录制、录音等功能。[https://w...
- #v课会·第3季·小学30天思维导图实战营# 打卡天数:27/30 打卡时间:2018.7.6 打卡主题:思维导图...