转录组分析

一、文章数据下载

安装miniconda

sudo apt-get install wget

wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh

bash Miniconda3-latest-Linux-x86_64.sh

source ~/.bashrc

中科大源

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/pkgs/main/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/pkgs/free/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/msys2/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/

conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/menpo/

conda config --set show_channel_urls yes

清华源

conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/  

conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/  

conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/

conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/

 conda config --set show_channel_urls yes

安装软件

conda create -n rnaseq python=3

安装软件,保证在rnaseq环境中激活环境 conda activate rnaseq

安装软件conda install sra-tools

source activate rnaseq

质控软件fastqc multiqc trimmomatic cutadapt trim-galore

比对star hisat2 bowtie2 tophat bwa subread

计数conda install htseq bedtools deeptools salmon

conda install trimmomatic cutadapt trim-galore star hisat2 bowtie2 tophat bwa subread htseq bedtools deeptools salmon

下载数据

下载SRR_Acc_List.txt文件,

cat SRR_Acc_List.txt | while read id; do prefetch $id; done

挂在后台下载:

cat id | while read id; do (prefetch $id &); done

转化为fastq文件

fastq-dump SRR10695769 --gzip --split-3 -O /mnt/f/project/HN/

批量转换sra到fq格式

ls /mnt/f/project/N/sra/*.sra  | while read id; do ( nohup fastq-dump --gzip --split -3 -O /mnt/f/project/N/fastq  ${id} & ); done


二、质控

fastqc

ls /mnt/f/project/HN/fastq/*.gz | while read id; do ( nohup fastqc -q -t 4 -o /mnt/f/project/HN/fastqc ${id} & ); done

fastp

单端测序

fastp -i in.fq -o out.fq

for i in $(ls *.fastq.gz | sed s/.fastq.gz//g) ;do fastp -w 4  -i ${i}.fastq.gz   -o ./fastp_data/${i}.fastq.gz  -h ./fastp_result/${i}.html  -j ./fastp_result/${i}.json ;done


ls *fastq.gz  |cut -d "_" -f 1 |sort -u | while read id; do (nohup fastp -w 4 -i ${id}.fastq.gz -o  /fastp_data/ ${id}.fq.gz  -h  /fastp_result/${id}.html  -j /fastp_result/${id}.json&); done

双端测序

ls *1.fastq.gz |cut -d "_" -f 1 |sort -u | while read id; do ( nohup fastp -w 4 -i ${id}_1.fastq.gz -I ${id}_2.fastq.gz -o ./fastp_data/${id}_1.fastq.gz -O ./fastp_data/${id}_2.fastq.gz -h ./fastp_result/${id}.html -j ./fastp_result/${id}.json & ) ; done

#! /bin/bash

 #Used for rnaseq data by Fastp 

# 注意样品名称 (1)SRR10695753_1.fastq.gz ls *1.fq.gz |cut -d "_" -f 1 |sort -u |;${id}_1.fastq.gz #(2)Sample_Ck_1.R1.fq.gz ls *R1.fq.gz |cut -d "." -f 1 |sort -u |, ${id}.R1.fq.gz,${id}.R2.fq.gz

cd /mnt/g/project/N_batch/data/rawdata

 ls *R1.fq.gz |cut -d "." -f 1 |sort -u | while read id; do ( nohup fastp -w 4 -i ${id}.R1.fq.gz -I ${id}.R2.fq.gz -o /mnt/g/project/N_batch/data/cleandata/fastp/fastp_data/${id}_cleandata.R1.fq.gz -O /mnt/g/project/N_batch/data/cleandata/fastp/fastp_data/${id}_cleandata.R2.fq.gz -h /mnt/g/project/N_batch/data/cleandata/fastp/fastp_result/${id}.html -j /mnt/g/project/N_batch/data/cleandata/fastp/fastp_result/${id}.json & ) ; done

#trim_galore

#! /bin/bash 

#Used for rnaseq data by trim_galore 

cd /mnt/g/project/N_batch/data/ rawdata

ls *_1.fastq.gz >1

 ls *_2.fastq.gz >2 

paste 1 2 > config 

dir=/mnt/g/project/N_batch/data/cleandata/ trim_galore

 cat config | while read id do

 arr=${id} 

fq1=${arr[0]}

 fq2=${arr[1]} 

nohup trim_galore -q 25 --phred33 --length 35 -e 0.1 --stringency 3 --paired -o $dir $fq1 $fq2 & 

done


比对


#!/bin/bash

ls *fq.gz|cut -d"." -f 1 |sort -u |while read id;

do

nohup hisat2 -p 4 -x  /home/user/../genome_hist2/hisat2_css_index -U ${id}.fq.gz -S  ${id}.sam ;

done

双端测序

#!/bin/bash 

ls *gz|cut -d"_" -f 1 |sort -u |while read id; do ls -lh ${id}_1.fastq.gz ${id}_2.fastq.gz nohup hisat2 -p 4 -x /home/user/../genome_hist2/hisat2_css_index -1 ${id}_1.fastq.gz -2 ${id}_2.fastq.gz -S /home/user/../HN/sam/${id}.sam ;done


转bam排序

#!/bin/bash

ls *.sam | while read id; do (samtools sort -O bam -@ 5 -o $(basename ${id} ".sam").bam ${id}); done

ls *.bam | xargs -i samtools index {}

ls *.bam | while read id; do (samtools flagstat -@ 10 $id > $(basename ${id} ".bam").flagstat); done

计数

conda install subread -y

featureCounts -T 4 -f -t exon -g gene_id -a  ****.gtf -o all_id.count    /*.bam

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容