python34

import re
import os
import sys
import argparse
import gzip

parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input vcf file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())

chrID=['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','X','Y','MT']

oc=open(ofile,'w')
vfile = gzip.open(ifile,'rb')
for vi in vfile.readlines():
    vic = vi.decode()
    if vic.startswith('#'):
        if 'contig=<ID' in vic:
            vi1=vi.strip().split('=')[2]
            vi2=vi1.strip().split(',')[0]
            if vi2 in chrID:
                oc.write(str(vi))
        else:
            oc.write(str(vi))
    elif str(vic.strip().split('\t')[0]) in chrID and 'RefCall' not in vic:
        oc.write(str(vi))

每一次写脚本都是在细节中不断优化

import re
import os
import sys
import argparse
import gzip

parser = argparse.ArgumentParser(description="pipeline")
parser.add_argument('-i', '--input', help = 'the pathway of the input vcf file ', required = True)
parser.add_argument('-o', '--output', help = 'the pathway of the output vcf file,filter vcf', required = True)
argv = vars(parser.parse_args())
ifile = os.path.abspath(argv['input'].strip())
ofile = os.path.abspath(argv['output'].strip())

chrID=['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','X','Y','MT']

oc=gzip.open(ofile,'wb')
vfile = gzip.open(ifile,'rb')
for vi in vfile.readlines():
    vic = vi.decode()
    if vic.startswith('#'):
        if 'contig=<ID' in vic:
            vi1=vi.strip().split('=')[2]
            vi2=vi1.strip().split(',')[0]
            if vi2 in chrID:
                oc.write(str(vic))
        else:
            oc.write(str(vic))
    elif str(vic.strip().split('\t')[0]) in chrID and 'RefCall' not in str(vic):
        oc.write(str(vic))

vfile.close()
oc.close()
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容