首先制作bed格式的文件包含基因组全部的外显子区域坐标如下:
#download gff3 file
wget -c http://rice.plantbiology.msu.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3
cmd:perl -alne '{print if $F[2] eq "exon"}' all.gff3 |cut -f 1,3,4 > exon.bed
$head exon.pos
Chr1 2903 3268
Chr1 3354 3616
Chr1 4357 4455
Chr1 5457 5560
Chr1 7136 7944
Chr1 8028 8150
Chr1 8232 8320
Chr1 8408 8608
Chr1 9210 9617
Chr1 10104 10187
从vcf文件中提取位于exon区域的变异位点
cat sample.vcf | java -jar ~/biosoft/new-snpEff/snpEff/SnpSift.jar intervals /disk3/name/MSU/gff/exon.pos >exon.vcf