参考:
library(edgeR)
#载入row count文件
raw_count<-read.csv(file.choose(),header = T,sep = ",")
head(raw_count)
#构建DGEList对象
group<-factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)),
levels = c("ASD","Healthy"))
genelist<-DGEList(counts = as.matrix(raw_count), group = group)
#过滤low counts数据:利用CPM标准化
keep<-rowSums(cpm(genelist) > 0.5 ) >=2
table(keep)
genelist.filted<-genelist[keep, ,keep.lib.sizes=FALSE]
#使用TMM算法对DGEList标准化
genelist.norm<-calcNormFactors(genelist.filted)
#实验设计矩阵
design<-model.matrix(~0+group)
colnames(design)<-levels(group)
design
#估计离散值(Dispersion)
genelist.Disp<-estimateDisp(genelist.norm, design, robust = TRUE)
plotBCV(genelist.Disp)
#quasi-likelihood (QL)拟合NB模型:用于解释生物学和技术性导致的基因特异性变异
fit<-glmQLFit(genelist.Disp, design, robust=TRUE)
head(fit$coefficients)
#差异表达检验:构建比较矩阵
cntr.vs.KD<-makeContrasts(control-ASD, levels = design)
res<-glmQLFTest(fit, contrast = cntr.vs.KD)
ig.edger<-res$table[p.adjust(res$table$PValue, method = "BH") < 0.05, ]
#提取显著性差异的基因
topTags(res,n=10)
is.de <- decideTestsDGE(res)
summary(is.de)
plotMD(res, status=is.de, values=c(1,-1), col=c("red","blue"),
legend="topright")