@(我的第一个笔记本)[马克飞象, 帮助, Markdown]
—bugu,2020.11.16
记录miRNA-seq数据分析方法 ,参考jm生信技能树的—— 构建miRNA-seq数据分析环境
step1:使用TBtools小工具sRNAseqAdaperRemover去除测序原始数据接头,参考——[小RNA数据分析-第一步(去除测序数据接头)](file:///F:/%E4%B8%8B%E8%BD%BD/%E7%BD%91%E9%A1%B5/%E5%B0%8FRNA%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90-%E7%AC%AC%E4%B8%80%E6%AD%A5.html)
TBtools=java -cp /home/test/software/TBtools_JRE1.6.jar
ls *.gz\
while read i\
do "$TBtools" biocjava.sRNA.Tools.sRNAseqAdaperRemover\
--inFxFile "$i"\
--outFaFile "${i%%_*}".filter.fasta\
done
Step2:利用seqkit过滤掉小于18bp,大于40bp的片段
ls *fasta |\
while read i;\
do seqkit seq -m 18 "$i" |\
seqkit seq -M 40 > "${i%%.*}"_filter.fasta ;\
done
Step3:利用bowtie比对到B73参考基因组
dir=/home/test/workspace/05.miRNA
reference=/home/test/data/genome/zeamays/index_bowtie
ls *fasta |\
while read i ; \
do
bowtie -f -S -p 24 -M 1 --best --strata "$reference"/Zea_mays "$dir"/1_q_adapter_data/"$i" | samtools view -Sb -o ../2_align/"${i%%.*}".bam 2>log
done
Step4:使用featureCounts进行定量
featureCounts -T 4 -F gff -M -t miRNA_primary_transcript -g Name -a zma.gff3 -o all.counts.hairpin.txt *.bam 1>counts.log 2>&1
Step5:使用DESeq2进行差异分析
library(DESeq2)
data = read.table("de_analysis/read_count_MIR_extract.txt", header=T, row.names=1, com='')
col_ordering = c(1,2,3,4,5,6)
rnaseqMatrix = data[,col_ordering]
rnaseqMatrix = round(rnaseqMatrix)
rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = data.frame(conditions=factor(c(rep("Chang72", 3), rep("Fan", 3))))
rownames(conditions) = colnames(rnaseqMatrix)
ddsFullCountTable <- DESeqDataSetFromMatrix(
countData = rnaseqMatrix,
colData = conditions,
design = ~ conditions)
dds = DESeq(ddsFullCountTable)
res = results(dds)
baseMeanA <- rowMeans(counts(dds, normalized=TRUE)[,colData(dds)$condition == "Chang72"])
baseMeanB <- rowMeans(counts(dds, normalized=TRUE)[,colData(dds)$condition == "Fan"])
res = cbind(baseMeanA, baseMeanB, as.data.frame(res))
res = cbind(id=rownames(res), as.data.frame(res))
res$padj[is.na(res$padj)] <- 1
write.table(as.data.frame(res[order(res$pvalue),]), file='Chang72_vs_Fan.txt', sep=' ', quote=FALSE, row.names=F)
#source("/disks/workin/zwm/miniconda3/opt/trinity-2.1.1/Analysis/DifferentialExpression/R/rnaseq_plot_funcs.R")
#pdf("read_count_extrat.txt.COI_vs_Col_Na.DESeq2.DE_results.MA_n_Volcano.pdf")
#plot_MA_and_Volcano(log2(res$baseMean+1), res$log2FoldChange, res$padj)
#dev.off()