GEO数据库分析

保姆式GEO数据挖掘演示
 配对GEO数据分析
 GEO数据挖掘系列文
 TCGAbiolinks做差异分析
我的一个paired DEGs 的例子
rm(list=ls())
suppressMessages(library(limma))
suppressMessages(library("plyr"))
data_dir<-"your_path"
setwd(data_dir)
#####download data
if(T){
  suppressMessages(library(GEOquery))
  gset <- getGEO('GSE76161', destdir=".",
                 AnnotGPL = T,    
                 getGPL = T)      
  save(gset,file=paste0(human_dir,'/GSE76161_eSet.Rdata'))
}
load(paste0(human_dir,'/GSE76161_eSet.Rdata')) 

###Get gene expression and phenotype data
b = gset[[1]] 
exprSet=exprs(b) 
###get GPL information and clean the data 
GPL = b@featureData@data 
colnames(GPL)
ids = GPL[,c(1,3)] 
ids = ids[ids[,2] != '',] 
a<-strsplit(as.character(ids[,2]), "///")
tmp <- mapply(cbind, ids[,1],a)
df <- ldply (tmp, data.frame) 
ID2gene = df
colnames(ID2gene) = c("id", "gene")
exprSet = exprSet[rownames(exprSet) %in% ID2gene[,1],]
ID2gene = ID2gene[match(rownames(exprSet), ID2gene[,1]),]
dim(ID2gene)
dim(exprSet)
tail(sort(table(ID2gene[,2])), n = 12L)
{
  MAX = by(exprSet, ID2gene[,2], 
           function(x) rownames(x)[ which.max(rowMeans(x))])
  MAX = as.character(MAX)   
  exprSet = exprSet[rownames(exprSet) %in% MAX,]     
  rownames( exprSet ) = ID2gene[ match( rownames( exprSet ), ID2gene[ , 1 ] ), 2 ]
}
exprSet[1:5,1:5] 
pdata = pData(b) 
colnames(pdata)
save(exprSet, pdata, file = paste0(human_dir,'/finalSet.Rdata'))

###DEG analysis
#build the matrix 

SibShip <- factor(pdata$characteristics_ch1.3,levels=c("patient_id: 3","patient_id: 4","patient_id: 5"))
Treat<-factor(pdata[,47],levels=c("vehicle (0.1% DMSO)","1 uM OCA (INT-747)"))
design <- model.matrix(~0 + (SibShip+Treat))
rownames(design) = colnames(exprSet)  
fit1 <- lmFit(exprSet, design)
#fit2 <- contrasts.fit(fit1, design)
fit2 <- eBayes(fit1)
nrDEG = topTable( fit2, 
                  adjust.method="BH",
                  coef = "Treat1 uM OCA (INT-747)",
                  n = nrow(fit2)
                  )
nrDEG<-nrDEG[order(nrDEG$P.Value),]
write.csv(nrDEG,paste0(data_dir,"/GSE76161_human_DEGs_INT-747_vs_DMSO.csv"))
GEO数据库分析

推荐阅读更多精彩内容