一、文章数据下载
安装miniconda
sudo apt-get install wget
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh
source ~/.bashrc
中科大源
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/pkgs/main/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/pkgs/free/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/msys2/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/menpo/
conda config --set show_channel_urls yes
清华源
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/
conda config --set show_channel_urls yes
安装软件
conda create -n rnaseq python=3
安装软件,保证在rnaseq环境中激活环境 conda activate rnaseq
安装软件conda install sra-tools
source activate rnaseq
质控软件fastqc multiqc trimmomatic cutadapt trim-galore
比对star hisat2 bowtie2 tophat bwa subread
计数conda install htseq bedtools deeptools salmon
conda install trimmomatic cutadapt trim-galore star hisat2 bowtie2 tophat bwa subread htseq bedtools deeptools salmon
下载数据
下载SRR_Acc_List.txt文件,
cat SRR_Acc_List.txt | while read id; do prefetch $id; done
挂在后台下载:
cat id | while read id; do (prefetch $id &); done
转化为fastq文件
fastq-dump SRR10695769 --gzip --split-3 -O /mnt/f/project/HN/
批量转换sra到fq格式
ls /mnt/f/project/N/sra/*.sra | while read id; do ( nohup fastq-dump --gzip --split -3 -O /mnt/f/project/N/fastq ${id} & ); done
二、质控
fastqc
ls /mnt/f/project/HN/fastq/*.gz | while read id; do ( nohup fastqc -q -t 4 -o /mnt/f/project/HN/fastqc ${id} & ); done
fastp
单端测序
fastp -i in.fq -o out.fq
for i in $(ls *.fastq.gz | sed s/.fastq.gz//g) ;do fastp -w 4 -i ${i}.fastq.gz -o ./fastp_data/${i}.fastq.gz -h ./fastp_result/${i}.html -j ./fastp_result/${i}.json ;done
ls *fastq.gz |cut -d "_" -f 1 |sort -u | while read id; do (nohup fastp -w 4 -i ${id}.fastq.gz -o /fastp_data/ ${id}.fq.gz -h /fastp_result/${id}.html -j /fastp_result/${id}.json&); done
双端测序
ls *1.fastq.gz |cut -d "_" -f 1 |sort -u | while read id; do ( nohup fastp -w 4 -i ${id}_1.fastq.gz -I ${id}_2.fastq.gz -o ./fastp_data/${id}_1.fastq.gz -O ./fastp_data/${id}_2.fastq.gz -h ./fastp_result/${id}.html -j ./fastp_result/${id}.json & ) ; done
#! /bin/bash
#Used for rnaseq data by Fastp
# 注意样品名称 (1)SRR10695753_1.fastq.gz ls *1.fq.gz |cut -d "_" -f 1 |sort -u |;${id}_1.fastq.gz #(2)Sample_Ck_1.R1.fq.gz ls *R1.fq.gz |cut -d "." -f 1 |sort -u |, ${id}.R1.fq.gz,${id}.R2.fq.gz
cd /mnt/g/project/N_batch/data/rawdata
ls *R1.fq.gz |cut -d "." -f 1 |sort -u | while read id; do ( nohup fastp -w 4 -i ${id}.R1.fq.gz -I ${id}.R2.fq.gz -o /mnt/g/project/N_batch/data/cleandata/fastp/fastp_data/${id}_cleandata.R1.fq.gz -O /mnt/g/project/N_batch/data/cleandata/fastp/fastp_data/${id}_cleandata.R2.fq.gz -h /mnt/g/project/N_batch/data/cleandata/fastp/fastp_result/${id}.html -j /mnt/g/project/N_batch/data/cleandata/fastp/fastp_result/${id}.json & ) ; done
#trim_galore
#! /bin/bash
#Used for rnaseq data by trim_galore
cd /mnt/g/project/N_batch/data/ rawdata
ls *_1.fastq.gz >1
ls *_2.fastq.gz >2
paste 1 2 > config
dir=/mnt/g/project/N_batch/data/cleandata/ trim_galore
cat config | while read id do
arr=${id}
fq1=${arr[0]}
fq2=${arr[1]}
nohup trim_galore -q 25 --phred33 --length 35 -e 0.1 --stringency 3 --paired -o $dir $fq1 $fq2 &
done
比对
#!/bin/bash
ls *fq.gz|cut -d"." -f 1 |sort -u |while read id;
do
nohup hisat2 -p 4 -x /home/user/../genome_hist2/hisat2_css_index -U ${id}.fq.gz -S ${id}.sam ;
done
双端测序
#!/bin/bash
ls *gz|cut -d"_" -f 1 |sort -u |while read id; do ls -lh ${id}_1.fastq.gz ${id}_2.fastq.gz nohup hisat2 -p 4 -x /home/user/../genome_hist2/hisat2_css_index -1 ${id}_1.fastq.gz -2 ${id}_2.fastq.gz -S /home/user/../HN/sam/${id}.sam ;done
转bam排序
#!/bin/bash
ls *.sam | while read id; do (samtools sort -O bam -@ 5 -o $(basename ${id} ".sam").bam ${id}); done
ls *.bam | xargs -i samtools index {}
ls *.bam | while read id; do (samtools flagstat -@ 10 $id > $(basename ${id} ".bam").flagstat); done
计数
conda install subread -y
featureCounts -T 4 -f -t exon -g gene_id -a ****.gtf -o all_id.count /*.bam