#!/usr/bin/env python
import re
import os
import sys
import argparse
import gzip
parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input vcf file ', required = True)
parser.add_argument('-b', '--bedfile', help = 'the pathway of the bed file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())
bfile = os.path.abspath(argv['bedfile'].strip())
def bedchr(bfile):
chrID=[]
with open(bfile,'r') as b:
for bl in b:
bli=bl.strip().split('\t')
chrb=str(bli[0])
if chrb not in chrID:
chrID.append(chrb)
return chrID
# GQ >10, indicates a 90% pro
def filtervcf(inputfile,chrid,ofile): # chrID type: list, include the reference chromode ID
oc=open(ofile,'w')
with gzip.open(inputfile,'rb') as v:
for vi in v:
vic = vi.strip().split('\t')
if str(vic[0]).startswith('#'):
if str(vic[0]).startswith('##contig'):
vi1=vi.strip().split('=')[2]
vi2=vi1.strip().split(',')[0]
if str(vi2) in chrid:
oc.write(str(vi))
else:
oc.write(str(vi))
else:
samfinfoGQ = int(vic[-1].strip().split(':')[1])
if str(vic[0]) in chrid and 'RefCall' not in str(vic) and samfinfoGQ >=20:
oc.write(str(vi))
if __name__ == "__main__":
bedchrid=bedchr(bfile)
filtervcf(ifile,bedchrid,ofile)
优化设置阈值
#!/usr/bin/env python
import re
import os
import sys
import argparse
import gzip
parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input vcf file ', required = True)
parser.add_argument('-b', '--bedfile', help = 'the pathway of the bed file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
parser.add_argument('-g', '--GQnumber', help = 'the value of GQ,Conditional genotype quality ', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())
bfile = os.path.abspath(argv['bedfile'].strip())
gnum=int(argv['GQnumber'].strip())
def bedchr(bfile):
chrID=[]
with open(bfile,'r') as b:
for bl in b:
bli=bl.strip().split('\t')
chrb=str(bli[0])
if chrb not in chrID:
chrID.append(chrb)
return chrID
# GQ >10, indicates a 90% pro
def filtervcf(inputfile,chrid,ofile,gnum): # chrID type: list, include the reference chromode ID
oc=open(ofile,'w')
with gzip.open(inputfile,'rb') as v:
for vi in v:
vic = vi.strip().split('\t')
if str(vic[0]).startswith('#'):
if str(vic[0]).startswith('##contig'):
vi1=vi.strip().split('=')[2]
vi2=vi1.strip().split(',')[0]
if str(vi2) in chrid:
oc.write(str(vi))
else:
oc.write(str(vi))
else:
samfinfoGQ = int(vic[-1].strip().split(':')[1])
if str(vic[0]) in chrid and 'RefCall' not in str(vic) and samfinfoGQ >=int(gnum):
oc.write(str(vi))
if __name__ == "__main__":
bedchrid=bedchr(bfile)
filtervcf(ifile,bedchrid,ofile,gnum)