2023-09-18

def vcf_to_012_without_pysam(vcf_filename, output_filename):

    data = []  # 用于保存所有的基因型数据    with open(vcf_filename, 'r') as vcf_file:

        samples = []

        # Skip header and find sample names        for line in vcf_file:

            if line.startswith("#CHROM"):

                samples = line.strip().split("\t")[9:]

                break        for line in vcf_file:

            line = line.strip()

            if not line or line.startswith("#"):

                continue            fields = line.split("\t")

            genotypes = fields[9:]

            transformed = []


            for genotype in genotypes:

                gt = genotype.split(":")[0]

                if gt in ["0/0", "0|0"]:

                    transformed.append('0')

                elif gt in ["0/1", "1/0", "0|1", "1|0"]:

                    transformed.append('1')

                elif gt in ["1/1", "1|1"]:

                    transformed.append('2')

                else:

                    transformed.append('NA')

            data.append([fields[2]] + transformed)

    # 转置数据    transposed_data = list(map(list, zip(*data)))

    with open(output_filename, 'w') as out_file:

        out_file.write("\t".join(['Sample'] + [row[0] for row in data]) + "\n")

        for i, sample in enumerate(samples):

            out_file.write(sample + "\t" + "\t".join(transposed_data[i + 1]) + "\n")# 使用方法vcf_to_012_without_pysam("Rht2.vcf", "Rht2.txt")

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容