查看vcf基因型
import argparse
def extract_genotypes(input_vcf, output_file):
with open(input_vcf, 'r') as infile, open(output_file, 'w') as outfile:
for line in infile:
if line.startswith('#'):
continue # 跳过注释行
parts = line.split('\t')
genotype = parts[9].split(':')[0] # 假设基因型在第 10 列(索引 9),且以冒号分割的第一个部分为基因型
outfile.write(genotype + '\n')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Extract genotypes from VCF file')
parser.add_argument('-i', '--input_vcf', required=True, help='Input VCF file')
parser.add_argument('-o', '--output_file', required=True, help='Output file to save genotypes')
args = parser.parse_args()
extract_genotypes(args.input_vcf, args.output_file)
过滤基因型
import argparse
def extract_genotypes(input_vcf, output_file):
with open(input_vcf, 'r') as infile, open(output_file, 'w') as outfile:
# 写入 VCF 文件头部
for line in infile:
if line.startswith('#'):
outfile.write(line)
else:
break
# 处理数据行
for line in infile:
parts = line.split('\t')
genotype = parts[9].split(':')[0]
if genotype not in ['./.', '0/0']:
outfile.write(line)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Extract genotypes from VCF file')
parser.add_argument('-i', '--input_vcf', required=True, help='Input VCF file')
parser.add_argument('-o', '--output_file', required=True, help='Output file to save genotypes')
args = parser.parse_args()
extract_genotypes(args.input_vcf, args.output_file)