服务器上面已经安装好了
sudo apt-get install seqtk
seqtk的运行
比如从一个fastqc文件中提取100000条reads
(base) 202031107010173@xiaoming-HP:~$ gunzip -c /disk1/shares/Seqs/Akle_TTAGGC_L004_R1_001.fastq.gz |seqtk sample -s 60 - 500 >test500.fq
(base) 202031107010173@xiaoming-HP:~$ wc -l test500.fq
2000 test500.fq
(base) 202031107010173@xiaoming-HP:~$ less test500.fq
seqtk seq -a test500.fq > test500.fa
提取DNA序列
grep $'\tProtein' GCA_000817325.1_ASM81732v1_genomic.gff|grep -v "pseudogene" |awk -v FS="\t" -v OFS="\t" '{print $1,$4,$5,$7,$9}'|sed 's/\tID.*;locus_tag=/\t/g'|sed 's/;.*;protein_id=/\t/g'|sed 's/;.*$//g'|awk -v FS='\t' -v OFS='\t' '{print $1,$2-1,$3,$5,"0",$4,$6}'>genome.bed
seqtk subseq /disk1/shares/Seqs/GCA_000817325.1_ASM81732v1_genomic.fna genome.bed >cds_per_gene.fna
seqtk comp cds_per_gene.fna|less
提取前30个蛋白质氨基酸序列
cut -f 7 genome.bed |head -n 30>pro_name.list
seqtk subseq /disk1/shares/Seqs/GCA_000817325.1_ASM81732v1_protein.faa pro_name.list >selected_pro.faa