2022-01-24

def s10_mergeSample(self, depth=1):  ###!!!!!!!!!!!!digital depth 2 by Lyn
        for query in self.l_query:
            make_dir([ self.dir_StatInfo, self.s_idx, query ])
            mergeRatio = "%s/%s/%s/RatioMatrix.xls" % (self.dir_StatInfo, self.s_idx, query)
            f_mergeRatio = open(mergeRatio, "w")
            l_file = [ "%s/%s/singleC/all.%s.bed" %
                (self.dir_singleC, sam, query)
                for sam in self.samInfo_pd_RNA['brief_name']
            ]

            print >>f_mergeRatio, "chrpos\t%s" % ("\t".join(self.samInfo_pd_RNA['brief_name']))
            shell_info = " paste %s " % (" ".join(l_file))
            p=subprocess.Popen(shell_info,stdout=subprocess.PIPE,shell=True)
            for line in p.stdout:
                line = line.strip('\n')
                f   = line.split()

上面是原代码，目的是逐行读取第六行%s/%s/singleC/all.%s.bed这个文件。我需要把bed改为bed.gz做读取，其他不变。下面是我改的代码,改动1:第六行把bed改为bed.gz
2、第十四行的p.stdout改为gzip.open(p.stdout,"rb") 3、加入import gzip

import gzip
def s10_mergeSample(self, depth=1):  ###!!!!!!!!!!!!digital depth 2 by Lyn
        for query in self.l_query:
            make_dir([ self.dir_StatInfo, self.s_idx, query ])
            mergeRatio = "%s/%s/%s/RatioMatrix.xls" % (self.dir_StatInfo, self.s_idx, query)
            f_mergeRatio = open(mergeRatio, "w")
            l_file = [ "%s/%s/singleC/all.%s.bed.gz" %
                (self.dir_singleC, sam, query)
                for sam in self.samInfo_pd_RNA['brief_name']
            ]

            print >>f_mergeRatio, "chrpos\t%s" % ("\t".join(self.samInfo_pd_RNA['brief_name']))
            shell_info = " paste %s " % (" ".join(l_file))
            p=subprocess.Popen(shell_info,stdout=subprocess.PIPE,shell=True)
            for line in gzip.open(p.stdout,"rb"):
                line = line.strip('\n')
                f   = line.split()

2022-01-24

推荐阅读更多精彩内容