def vcf_to_012_without_pysam(vcf_filename, output_filename):
data = [] # 用于保存所有的基因型数据 with open(vcf_filename, 'r') as vcf_file:
samples = []
# Skip header and find sample names for line in vcf_file:
if line.startswith("#CHROM"):
samples = line.strip().split("\t")[9:]
break for line in vcf_file:
line = line.strip()
if not line or line.startswith("#"):
continue fields = line.split("\t")
genotypes = fields[9:]
transformed = []
for genotype in genotypes:
gt = genotype.split(":")[0]
if gt in ["0/0", "0|0"]:
transformed.append('0')
elif gt in ["0/1", "1/0", "0|1", "1|0"]:
transformed.append('1')
elif gt in ["1/1", "1|1"]:
transformed.append('2')
else:
transformed.append('NA')
data.append([fields[2]] + transformed)
# 转置数据 transposed_data = list(map(list, zip(*data)))
with open(output_filename, 'w') as out_file:
out_file.write("\t".join(['Sample'] + [row[0] for row in data]) + "\n")
for i, sample in enumerate(samples):
out_file.write(sample + "\t" + "\t".join(transposed_data[i + 1]) + "\n")# 使用方法vcf_to_012_without_pysam("Rht2.vcf", "Rht2.txt")