- DESeq2 用来寻找差异基因
2.代码及结果展示
setwd("Desktop/new/DEGenes")
library(tidyverse)
library(ashr)
library(DESeq2)
导入数据------------------------------
mycounts <- read_csv("countData_4.csv")
metadata <- read_csv("colData_4.csv")
mycounts <- as.data.frame(mycounts)
metadata <- as.data.frame(metadata)
mycounts
metadata
class(mycounts)
class(metadata)
检查数据匹配性-----------------------
names(mycounts)[-1]
metadataid
all(names(mycounts)[-1]==metadata$id)
构建dds------------------------------
dds <- DESeqDataSetFromMatrix(countData = mycounts,
colData = metadata,
design = ~ phase,
tidy=TRUE)
ddsphase,levels = c("control","first","first_R","second","second_R","fourth")) # 因子化
keep <- rowSums(counts(dds)) >= 10 # pre-filtering
dds <- dds[keep,]
差异分析--------------------------
dds <- DESeq(dds)
dds
sizeFactors(dds)
dispersions(dds)
resultsNames(dds)
差异分析结果展示/主成分分析------------
vsdata <- vst(dds, blind=FALSE)
p <- plotPCA(vsdata, intgroup="phase")
p
一张配色敲好看的主成分分析图~
显示全部的normalize后的表达量数据--------------
normalized.data <- counts(dds,normalized = TRUE)
write.csv(normalized.data,file = "normalized expression data_group4.csv")
差异分析结果展示/DEGenes identification---------
p-values and adjusted p-values------------------
res1-------------------------
res1 <- results(dds,contrast = c("phase","first","control"),alpha=0.05) # How many adjusted p-values were less than 0.05?
res1 <- res1[order(res1description # More information on results columns
plotMA(res1, ylim=c(-5,5),main = "first vs control(unshrinked)")
resLFC <- lfcShrink(dds,contrast = c("phase","first","control"), type="ashr")
plotMA(resLFC,ylim=c(-5,5),main = "first vs control")
summary(res1)
sum(res1$padj < 0.05, na.rm=TRUE)
write.csv(as.data.frame(res1), file="first vs control_all_DEgenes.csv") # Exporting results to CSV files
resSig1 <- subset(res1, padj < 0.05)
write.csv(as.data.frame(resSig1), file="first vs control_significant_DEgenes.csv") # Exporting results to CSV files