miRNA-seq数据分析——学习笔记

@(我的第一个笔记本)[马克飞象, 帮助, Markdown]

—bugu，2020.11.16
记录miRNA-seq数据分析方法，参考jm生信技能树的—— 构建miRNA-seq数据分析环境

step1：使用TBtools小工具sRNAseqAdaperRemover去除测序原始数据接头，参考——[小RNA数据分析-第一步(去除测序数据接头)](file:///F:/%E4%B8%8B%E8%BD%BD/%E7%BD%91%E9%A1%B5/%E5%B0%8FRNA%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90-%E7%AC%AC%E4%B8%80%E6%AD%A5.html)

TBtools=java -cp /home/test/software/TBtools_JRE1.6.jar
ls *.gz\
while read i\
do "$TBtools" biocjava.sRNA.Tools.sRNAseqAdaperRemover\
        --inFxFile "$i"\
        --outFaFile "${i%%_*}".filter.fasta\
done

Step2：利用seqkit过滤掉小于18bp，大于40bp的片段

ls *fasta |\
while read i;\
do  seqkit seq -m 18 "$i" |\
    seqkit seq -M 40 > "${i%%.*}"_filter.fasta ;\
done

Step3：利用bowtie比对到B73参考基因组

dir=/home/test/workspace/05.miRNA
reference=/home/test/data/genome/zeamays/index_bowtie

ls *fasta |\
while read i ; \
do
bowtie -f -S -p 24 -M 1 --best --strata "$reference"/Zea_mays "$dir"/1_q_adapter_data/"$i" | samtools view -Sb -o ../2_align/"${i%%.*}".bam 2>log
done

Step4：使用featureCounts进行定量

featureCounts -T 4 -F gff  -M -t miRNA_primary_transcript  -g Name  -a zma.gff3 -o all.counts.hairpin.txt *.bam 1>counts.log 2>&1

Step5：使用DESeq2进行差异分析

library(DESeq2)

data = read.table("de_analysis/read_count_MIR_extract.txt", header=T, row.names=1, com='')
col_ordering = c(1,2,3,4,5,6)
rnaseqMatrix = data[,col_ordering]
rnaseqMatrix = round(rnaseqMatrix)
rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = data.frame(conditions=factor(c(rep("Chang72", 3), rep("Fan", 3))))
rownames(conditions) = colnames(rnaseqMatrix)
ddsFullCountTable <- DESeqDataSetFromMatrix(
    countData = rnaseqMatrix,
    colData = conditions,
    design = ~ conditions)
dds = DESeq(ddsFullCountTable)
res = results(dds)
baseMeanA <- rowMeans(counts(dds, normalized=TRUE)[,colData(dds)$condition == "Chang72"])
baseMeanB <- rowMeans(counts(dds, normalized=TRUE)[,colData(dds)$condition == "Fan"])
res = cbind(baseMeanA, baseMeanB, as.data.frame(res))
res = cbind(id=rownames(res), as.data.frame(res))
res$padj[is.na(res$padj)]  <- 1
write.table(as.data.frame(res[order(res$pvalue),]), file='Chang72_vs_Fan.txt', sep='    ', quote=FALSE, row.names=F)
#source("/disks/workin/zwm/miniconda3/opt/trinity-2.1.1/Analysis/DifferentialExpression/R/rnaseq_plot_funcs.R")
#pdf("read_count_extrat.txt.COI_vs_Col_Na.DESeq2.DE_results.MA_n_Volcano.pdf")
#plot_MA_and_Volcano(log2(res$baseMean+1), res$log2FoldChange, res$padj)
#dev.off()