二代测序:Phred33和Phred64格式判断

原理:取10000行,求平均碱基质量,来判断格式

import gzip
from pickle import bytes_types
from statistics import mean
def line_chr_ascii(aline):
    #print(f'line_chr_ascii:{aline}')
    nline = []
    for i in aline:
        nline.append(ord(i))
    return nline
def main():        
    inputfile = "/data3/Pugionium_cornutum_population/pc-y101-1/3_1_GCCAAT_L008_R1_001.fastq.gz"
    readnum = 0
    qual_list = []
    f = gzip.open(inputfile,'r')
    i = 0
    while i<10001:#取10000行
        line = f.readline().rstrip()
        line = bytes.decode(line)
        #print(line)
        if '@' in line:
                readnum = readnum+1
                i = i+1
        elif '+' in line:
            line = f.readline().rstrip()
            line = bytes.decode(line)
            line = line_chr_ascii(line)
            qual_list = qual_list+line
            i = i+2
        else:
            i=i+1
        
    f.close()
    qual_mean = mean(qual_list)
    print(f'read numbers total:{readnum}')
    print(f'qual mean:{qual_mean}')
    print(f'Phred 33:{qual_mean-33}')
    print(f'Phred 64:{qual_mean-64}')
    readnum = None
    qual_list = None
image.png

很明显是phred33格式的

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容