hisat2+featurecounts+DESeq2

##从hisat2官网下载打包好的reference,然后比对##

hisat2 -p 16 -x ../reference/hg19/hisat2/grch37_snp_tran/genome_snp_tran -1 gjy2_1.fastq.gz-2 gjy2_2.fastq.gz-S gjy2.sam--summary-file gjy2.txt

samtools sort --threads 15 -o **.bam **.sam

##featurecounts计算rawcounts##

featureCounts -T 15 -p -t exon -g gene_id -a ../../reference/hg19/hisat2/ensemble_gtf/Homo_sapiens.GRCh37.75.gtf -o gjy1_featurecounts.txt**.bam

##取rawcounts列##

cut -f 1,7 gjy1_featurecounts.txt|grep -v '^#' > gjy1_rawcounts.txt

sed -i '1d' filename

##在Rsudio中分析差异基因###

setwd('xx_result')

library("DESeq2")

directory <-'xx/06_featurecounts'

directory

sampleFiles <- grep("wy",list.files(directory),value=TRUE)

sampleFiles

###注意control要放在前面####

sampleCondition <- c("control","control","KO","KO","KO")

sampleCondition

sampleTable <- data.frame(sampleName= sampleFiles,fileName = sampleFiles,condition = sampleCondition)

sampleTable

dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,directory = directory,design= ~ condition)

dds

dds <- dds [ rowSums(counts(dds)) > 1, ]

#PCA#

rld<-rlog(dds)

plotPCA(rld)

dds<-DESeq(dds)

res <- results(dds)

head(res)

summary(res)

resOrdered <- res[order(res$padj),]

resOrdered=as.data.frame(resOrdered)

head(resOrdered)

resOrdered=na.omit(resOrdered)

DEmiRNA=resOrdered[abs(resOrdered$log2FoldChange)>log2(1.5) & resOrdered$padj <0.01 ,]

head(resOrdered)

write.csv(resOrdered,"hisat2_samtools_htseq_DESeq2_output.csv")

##volcano.plot###

alpha <- 0.01 # Threshold on the adjusted p-value

cols <- densCols(res$log2FoldChange, -log10(res$pvalue))

plot(res$log2FoldChange, -log10(res$padj), col=cols, panel.first=grid(),

main="Volcano plot", xlab="Effect size: log2(fold-change)", ylab="-log10(adjusted p-value)",

pch=20, cex=0.6)

abline(v=0)

abline(v=c(-1,1), col="brown")

abline(h=-log10(alpha), col="brown")

gn.selected <- abs(res$log2FoldChange) > 2.5 & res$padj < alpha

text(res$log2FoldChange[gn.selected],

-log10(res$padj)[gn.selected],

lab=rownames(res)[gn.selected ], cex=0.7)

#MA图#

library("geneplotter")

plotMA(res,main="DESeq2",ylim=c(-2,2))

#heatmap#

select<-order(rowMeans(counts(dds,normalized=TRUE)),decreasing = TRUE)[1:666]

nt<-normTransform(dds)

log2.norm.counts<-assay(nt)[select,]

df<-as.data.frame(colData(dds))

library(pheatmap)

pheatmap(log2.norm.counts,cluster_rows = TRUE,show_rownames = FALSE,cluster_cols = TRUE,annotation_col = df)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • library(pheatmap) data<- read.table("new 1.txt",header = ...
    Weiyx阅读 3,843评论 0 0
  • setwd('xx_result') library("DESeq2") directory <-'xx/06_f...
    njmujjc阅读 7,713评论 0 1
  • 一、读文章获取下载数据 1、读文章 一般我都从NCBI上面下载文章,找到数据号 2、下载数据 进入NCBI的GEO...
    黄思源_3a22阅读 11,399评论 0 2
  • need:表达矩阵 分组矩阵,值要是整数 DESeq2和EdgeR都可用于做基因差异表达分析,主要也是用于RNA-...
    白云梦_7阅读 46,434评论 1 24
  • 一、精力管理学习 今天精力管理进入身材的话题。 二、MOS考试准备 已经注册好账号两天了,我却还没有进入学习,准备...
    2队小果_苏州阅读 1,248评论 0 0