转录组数据:
SRR8002919.0_PE.1.clean.dup.fq.gz SRR8002928.0_PE.1.clean.dup.fq.gz SRR8002944.0_PE.1.clean.dup.fq.gz SRR8002959.0_PE.1.clean.dup.fq.gz SRR8002963.0_PE.1.clean.dup.fq.gz
SRR8002919.0_PE.2.clean.dup.fq.gz SRR8002928.0_PE.2.clean.dup.fq.gz SRR8002944.0_PE.2.clean.dup.fq.gz SRR8002959.0_PE.2.clean.dup.fq.gz SRR8002963.0_PE.2.clean.dup.fq.gz
SRR8002920.0_PE.1.clean.dup.fq.gz SRR8002929.0_PE.1.clean.dup.fq.gz SRR8002945.0_PE.1.clean.dup.fq.gz SRR8002960.0_PE.1.clean.dup.fq.gz SRR8002964.0_PE.1.clean.dup.fq.gz
SRR8002920.0_PE.2.clean.dup.fq.gz SRR8002929.0_PE.2.clean.dup.fq.gz SRR8002945.0_PE.2.clean.dup.fq.gz SRR8002960.0_PE.2.clean.dup.fq.gz SRR8002964.0_PE.2.clean.dup.fq.gz
SRR8002921.0_PE.1.clean.dup.fq.gz SRR8002942.0_PE.1.clean.dup.fq.gz SRR8002956.0_PE.1.clean.dup.fq.gz SRR8002961.0_PE.1.clean.dup.fq.gz SRR8002965.0_PE.1.clean.dup.fq.gz
SRR8002921.0_PE.2.clean.dup.fq.gz SRR8002942.0_PE.2.clean.dup.fq.gz SRR8002956.0_PE.2.clean.dup.fq.gz SRR8002961.0_PE.2.clean.dup.fq.gz SRR8002965.0_PE.2.clean.dup.fq.gz
SRR8002926.0_PE.1.clean.dup.fq.gz SRR8002943.0_PE.1.clean.dup.fq.gz SRR8002957.0_PE.1.clean.dup.fq.gz SRR8002962.0_PE.1.clean.dup.fq.gz SRR8002966.0_PE.1.clean.dup.fq.gz
SRR8002926.0_PE.2.clean.dup.fq.gz SRR8002943.0_PE.2.clean.dup.fq.gz SRR8002957.0_PE.2.clean.dup.fq.gz SRR8002962.0_PE.2.clean.dup.fq.gz SRR8002966.0_PE.2.clean.dup.fq.gz
基因组数据:
mhl.fa mhl.gff
1.使用hisat2将转录组数据与基因组比对,得到sam文件
01.hisat2.py 代码如下:
```
import os, sys
#cwd = os.getcwd()
#d=os.chdir("./")
for f in ["SRR8002919.0_PE","SRR8002920.0_PE","SRR8002921.0_PE","SRR8002926.0_PE","SRR8002928.0_PE","SRR8002929.0_PE","SRR8002942.0_PE","SRR8002943.0_PE","SRR8002944.0_PE","SRR8002945.0_PE","SRR8002956.0_PE","SRR8002957.0_PE","SRR8002959.0_PE","SRR8002960.0_PE","SRR8002961.0_PE","SRR8002962.0_PE","SRR8002963.0_PE","SRR8002964.0_PE","SRR8002965.0_PE","SRR8002966.0_PE"]:
cmd="/home/qiuqiang/zhangyijiuling/software/hisat2-2.1.0/hisat2 -p 8 --dta --no-mixed --no-discordant --summary-file " + f + ".summary.txt -x /home/qiuqiang/qintao/projects/00.genome/mhl.fa \
-1 /home/qiuqiang/qintao/projects/01.sika_transcriptome/" + f + ".1.clean.dup.fq.gz \
-2 /home/qiuqiang/qintao/projects/01.sika_transcriptome/" + f + ".2.clean.dup.fq.gz \
-S " + f + ".sam & "
print cmd
os.system(cmd)
```
2.将sam文件转换为bam,并排序
此处使用Perl脚本 02.convert_sam2bam.pl
```
#!/usr/bin/perl
use strict;
use warnings;
my @sam=<*sam>;
open (O,">$0.sh");
for my $sam(@sam){
$sam=~/(\S+).sam/;
my $prefix=$1;
print O "samtools view -bS $sam | samtools sort > $1.sort.bam\n";
}
close O;
```
得到 02.convert_sam2bam.pl.sh
samtools view -bS SRR8002929.0_PE.sam | samtools sort > SRR8002929.0_PE.sort.bam
samtools view -bS SRR8002945.0_PE.sam | samtools sort > SRR8002945.0_PE.sort.bam
samtools view -bS SRR8002956.0_PE.sam | samtools sort > SRR8002956.0_PE.sort.bam
samtools view -bS SRR8002957.0_PE.sam | samtools sort > SRR8002957.0_PE.sort.bam
samtools view -bS SRR8002959.0_PE.sam | samtools sort > SRR8002959.0_PE.sort.bam
samtools view -bS SRR8002963.0_PE.sam | samtools sort > SRR8002963.0_PE.sort.bam
samtools view -bS SRR8002964.0_PE.sam | samtools sort > SRR8002964.0_PE.sort.bam
samtools view -bS SRR8002965.0_PE.sam | samtools sort > SRR8002965.0_PE.sort.bam
samtools view -bS SRR8002966.0_PE.sam | samtools sort > SRR8002966.0_PE.sort.bam
3.使用stringtie从sort.bam文件中提取gtf文件
03.stringtie.py
```
import os, sys
#cwd = os.getcwd()
#d=os.chdir("./")
for f in ["SRR8002919.0_PE", "SRR8002920.0_PE","SRR8002921.0_PE","SRR8002926.0_PE","SRR8002928.0_PE","SRR8002929.0_PE","SRR8002942.0_PE","SRR8002943.0_PE","SRR8002944.0_PE","SRR8002945.0_PE","SRR8002956.0_PE","SRR8002957.0_PE","SRR8002959.0_PE","SRR8002960.0_PE","SRR8002961.0_PE","SRR8002962.0_PE","SRR8002963.0_PE","SRR8002964.0_PE","SRR8002965.0_PE","SRR8002966.0_PE"]:
#cmd="stringtie -p 3 -o " + f + ".gtf -l " + f + " " + f + ".bam & "
cmd="stringtie -p 3 -o " + f + "." + f[:-1]+ ".gtf -l " + f + " " + f + ".sort.bam & "
print(cmd)
os.system(cmd)
```
得到gtf文件:
SRR8002919.0_PE.SRR8002919.0_P.gtf SRR8002928.0_PE.SRR8002928.0_P.gtf SRR8002944.0_PE.SRR8002944.0_P.gtf SRR8002959.0_PE.SRR8002959.0_P.gtf SRR8002963.0_PE.SRR8002963.0_P.gtf SRR8002920.0_PE.SRR8002920.0_P.gtf SRR8002929.0_PE.SRR8002929.0_P.gtf SRR8002945.0_PE.SRR8002945.0_P.gtf SRR8002960.0_PE.SRR8002960.0_P.gtf SRR8002964.0_PE.SRR8002964.0_P.gtf SRR8002921.0_PE.SRR8002921.0_P.gtf SRR8002942.0_PE.SRR8002942.0_P.gtf SRR8002956.0_PE.SRR8002956.0_P.gtf SRR8002961.0_PE.SRR8002961.0_P.gtf SRR8002965.0_PE.SRR8002965.0_P.gtf SRR8002926.0_PE.SRR8002926.0_P.gtf SRR8002943.0_PE.SRR8002943.0_P.gtf SRR8002957.0_PE.SRR8002957.0_P.gtf SRR8002962.0_PE.SRR8002962.0_P.gtf SRR8002966.0_PE.SRR8002966.0_P.gtf
4.利用gffread文件将gtf转为fa文件
利用04.gtfTOfa.pl
```
#! /usr/bin/perl
use strict;
use warnings;
my @gtf=<*.gtf>;
open (O,">$0.sh");
for my $gtf(@gtf){
print O "gffread -w $gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa $gtf.gtf\n";
}
close O;
```
得到gtfTOfa.pl.sh
gffread -w SRR8002919.0_PE.SRR8002919.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002919.0_PE.SRR8002919.0_P.gtf.gtf
gffread -w SRR8002920.0_PE.SRR8002920.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002920.0_PE.SRR8002920.0_P.gtf.gtf
gffread -w SRR8002921.0_PE.SRR8002921.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002921.0_PE.SRR8002921.0_P.gtf.gtf
gffread -w SRR8002926.0_PE.SRR8002926.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002926.0_PE.SRR8002926.0_P.gtf.gtf
gffread -w SRR8002928.0_PE.SRR8002928.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002928.0_PE.SRR8002928.0_P.gtf.gtf
gffread -w SRR8002929.0_PE.SRR8002929.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002929.0_PE.SRR8002929.0_P.gtf.gtf
gffread -w SRR8002942.0_PE.SRR8002942.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002942.0_PE.SRR8002942.0_P.gtf.gtf
gffread -w SRR8002943.0_PE.SRR8002943.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002943.0_PE.SRR8002943.0_P.gtf.gtf
gffread -w SRR8002944.0_PE.SRR8002944.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002944.0_PE.SRR8002944.0_P.gtf.gtf
gffread -w SRR8002945.0_PE.SRR8002945.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002945.0_PE.SRR8002945.0_P.gtf.gtf
gffread -w SRR8002956.0_PE.SRR8002956.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002956.0_PE.SRR8002956.0_P.gtf.gtf
gffread -w SRR8002957.0_PE.SRR8002957.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002957.0_PE.SRR8002957.0_P.gtf.gtf
gffread -w SRR8002959.0_PE.SRR8002959.0_P.gtf.fa -g /home/qiuqiang/qintao/projects/00.genome/new_ref.fa SRR8002959.0_PE.SRR8002959.0_P.gtf.gtf
最后就得到了fa文件