另外一个就是指定哪一组作为control组,在计算log2FD时 ,需要明确control组,默认会字符串顺序对分组的名字进行排序,排在前面的作为control组,这种默认行为选出的control可能与我们的实验设计不同,所以必须明确指定control组。
library(DESeq2)
setwd('F:/wgcna/差异表达/')
raw_count=read.csv('lung.csv',sep=",",header=T)
count_data<-raw_count[,2:7]
row.names(count_data) <- raw_count[,1]
condition <- factor(c("L.cow_lung","L.cow_lung","L.cow_lung" ,"H.yak_lung","H.yak_lung","H.yak_lung"))
col_data <- data.frame(row.names = colnames(count_data), condition)
colData
dds <- DESeqDataSetFromMatrix(countData = count_data,colData = col_data,design = ~ condition)
nrow(dds)
dds_filter <- dds[ rowSums(counts(dds))>1, ]
nrow(dds_filter)#过滤了多少
dds_out = DESeq(dds_filter)
res = results(dds_out, contrast=c("condition", "L.cow_lung", "H.yak_lung"))
res = res[order(res$pvalue),]
head(res)
summary(res)
#write.csv(res,file="lung_deseq.csv")
table(res$padj<0.01)
#提取基因
diff_gene_deseq2 <-subset(res, padj < 0.01 & abs(log2FoldChange) > 1)
dim(diff_gene_deseq2)
head(diff_gene_deseq2)
summary(diff_gene_deseq2)
#write.csv(diff_gene_deseq2,file= "lung_DEG_0.05_2.csv")
#---------------可视化-----------------
#PCA
vsdata <- rlog(dds, blind=FALSE)#vst和rlog都是归一化的方法
plotPCA(vsdata, intgroup="condition")
#beatifule pca plot
#pcaData <- plotPCA(vsdata, intgroup=c("condition"), returnData=TRUE)
#percentVar <- round(100 * attr(pcaData, "percentVar"))
pdf(file = 'lung_pca.pdf')
#ggplot(pcaData, aes(PC1, PC2, color=condition, shape=condition)) +
# geom_point(size=3) +
# xlab(paste0("PC1: ",percentVar[1],"% variance")) +
#ylab(paste0("PC2: ",percentVar[2],"% variance")) +
#coord_fixed()
#+ggtitle("A")+theme(plot.title = element_text(hjust = 0.5))
#想要显示名称的话
pcaData <- plotPCA(vsdata, intgroup=c("condition"), returnData=TRUE)
plot(pcaData[,1:2],pch=19,col=c("red","red","red","blue","blue","blue"))
text(pcaData[,1],pcaData[,2]+0.2,row.names(pcaData),cex=0.5)
dev.off()
pdf(file = '火山图_lung.pdf')
EnhancedVolcano(res,
lab = rownames(res),
x = 'log2FoldChange',
y = 'pvalue',
xlim = c(-8, 8),
)
dev.off
library(genefilter)
library(pheatmap)
rld <- rlogTransformation(dds_out,blind = F)
#write.csv(assay(rld),file="mm.DESeq2.pseudo.counts.csv")
topVarGene <- head(order(rowVars(assay(rld)),decreasing = TRUE),20)
mat <- assay(rld)[ topVarGene, ]
### mat <- mat - rowMeans(mat) 减去一个平均值,让数值更加集中。第二个图
anno <- as.data.frame(colData(rld)[,c("condition","sizeFactor")])
pdf(file = 'lung_heatmap.pdf')
pheatmap(mat, annotation_col = anno)## clustering_method参数设定不同聚类方法,默认为"complete",可以设定为'ward', 'ward.D', 'ward.D2', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid'
dev.off()