转录组分析

一、文章数据下载

安装miniconda

sudo apt-get install wget

wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh

bash Miniconda3-latest-Linux-x86_64.sh

source ~/.bashrc

中科大源

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/pkgs/main/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/pkgs/free/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/msys2/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/menpo/

conda config --set show_channel_urls yes

清华源

conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/

conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/

conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/

conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/

conda config --set show_channel_urls yes

安装软件

conda create -n rnaseq python=3

安装软件，保证在rnaseq环境中激活环境 conda activate rnaseq

安装软件conda install sra-tools

source activate rnaseq

质控软件fastqc multiqc trimmomatic cutadapt trim-galore

比对star hisat2 bowtie2 tophat bwa subread

计数conda install htseq bedtools deeptools salmon

conda install trimmomatic cutadapt trim-galore star hisat2 bowtie2 tophat bwa subread htseq bedtools deeptools salmon

下载数据

下载SRR_Acc_List.txt文件，

cat SRR_Acc_List.txt | while read id; do prefetch $id; done

挂在后台下载：

cat id | while read id; do (prefetch $id &); done

转化为fastq文件

fastq-dump SRR10695769 --gzip --split-3 -O /mnt/f/project/HN/

批量转换sra到fq格式

ls /mnt/f/project/N/sra/*.sra | while read id; do ( nohup fastq-dump --gzip --split -3 -O /mnt/f/project/N/fastq ${id} & ); done

二、质控

fastqc

ls /mnt/f/project/HN/fastq/*.gz | while read id; do ( nohup fastqc -q -t 4 -o /mnt/f/project/HN/fastqc ${id} & ); done

fastp

单端测序

fastp -i in.fq -o out.fq

for i in $(ls *.fastq.gz | sed s/.fastq.gz//g) ；do fastp -w 4 -i ${i}.fastq.gz -o ./fastp_data/${i}.fastq.gz -h ./fastp_result/${i}.html -j ./fastp_result/${i}.json ；done

ls *fastq.gz |cut -d "_" -f 1 |sort -u | while read id; do (nohup fastp -w 4 -i ${id}.fastq.gz -o /fastp_data/ ${id}.fq.gz -h /fastp_result/${id}.html -j /fastp_result/${id}.json&); done

双端测序

ls *1.fastq.gz |cut -d "_" -f 1 |sort -u | while read id; do ( nohup fastp -w 4 -i ${id}_1.fastq.gz -I ${id}_2.fastq.gz -o ./fastp_data/${id}_1.fastq.gz -O ./fastp_data/${id}_2.fastq.gz -h ./fastp_result/${id}.html -j ./fastp_result/${id}.json & ) ; done

#! /bin/bash

#Used for rnaseq data by Fastp

# 注意样品名称 (1)SRR10695753_1.fastq.gz ls *1.fq.gz |cut -d "_" -f 1 |sort -u |;${id}_1.fastq.gz #(2)Sample_Ck_1.R1.fq.gz ls *R1.fq.gz |cut -d "." -f 1 |sort -u |, ${id}.R1.fq.gz,${id}.R2.fq.gz

cd /mnt/g/project/N_batch/data/rawdata

ls *R1.fq.gz |cut -d "." -f 1 |sort -u | while read id; do ( nohup fastp -w 4 -i ${id}.R1.fq.gz -I ${id}.R2.fq.gz -o /mnt/g/project/N_batch/data/cleandata/fastp/fastp_data/${id}_cleandata.R1.fq.gz -O /mnt/g/project/N_batch/data/cleandata/fastp/fastp_data/${id}_cleandata.R2.fq.gz -h /mnt/g/project/N_batch/data/cleandata/fastp/fastp_result/${id}.html -j /mnt/g/project/N_batch/data/cleandata/fastp/fastp_result/${id}.json & ) ; done

#trim_galore

#! /bin/bash

#Used for rnaseq data by trim_galore

cd /mnt/g/project/N_batch/data/ rawdata

ls *_1.fastq.gz >1

ls *_2.fastq.gz >2

paste 1 2 > config

dir=/mnt/g/project/N_batch/data/cleandata/ trim_galore

cat config | while read id do

arr=${id}

fq1=${arr[0]}

fq2=${arr[1]}

nohup trim_galore -q 25 --phred33 --length 35 -e 0.1 --stringency 3 --paired -o $dir $fq1 $fq2 &

done

比对

#!/bin/bash

ls *fq.gz|cut -d"." -f 1 |sort -u |while read id;

do

nohup hisat2 -p 4 -x /home/user/../genome_hist2/hisat2_css_index -U ${id}.fq.gz -S ${id}.sam ;

done

双端测序

#!/bin/bash

ls *gz|cut -d"_" -f 1 |sort -u |while read id; do ls -lh ${id}_1.fastq.gz ${id}_2.fastq.gz nohup hisat2 -p 4 -x /home/user/../genome_hist2/hisat2_css_index -1 ${id}_1.fastq.gz -2 ${id}_2.fastq.gz -S /home/user/../HN/sam/${id}.sam ；done

转bam排序

#!/bin/bash

ls *.sam | while read id; do (samtools sort -O bam -@ 5 -o $(basename ${id} ".sam").bam ${id}); done

ls *.bam | xargs -i samtools index {}

ls *.bam | while read id; do (samtools flagstat -@ 10 $id > $(basename ${id} ".bam").flagstat); done

计数

conda install subread -y

featureCounts -T 4 -f -t exon -g gene_id -a ****.gtf -o all_id.count /*.bam

转录组分析

转录组分析

相关阅读更多精彩内容

友情链接更多精彩内容