def s10_mergeSample(self, depth=1): ###!!!!!!!!!!!!digital depth 2 by Lyn
for query in self.l_query:
make_dir([ self.dir_StatInfo, self.s_idx, query ])
mergeRatio = "%s/%s/%s/RatioMatrix.xls" % (self.dir_StatInfo, self.s_idx, query)
f_mergeRatio = open(mergeRatio, "w")
l_file = [ "%s/%s/singleC/all.%s.bed" %
(self.dir_singleC, sam, query)
for sam in self.samInfo_pd_RNA['brief_name']
]
print >>f_mergeRatio, "chrpos\t%s" % ("\t".join(self.samInfo_pd_RNA['brief_name']))
shell_info = " paste %s " % (" ".join(l_file))
p=subprocess.Popen(shell_info,stdout=subprocess.PIPE,shell=True)
for line in p.stdout:
line = line.strip('\n')
f = line.split()
上面是原代码,目的是逐行读取第六行%s/%s/singleC/all.%s.bed这个文件。我需要把bed改为bed.gz做读取,其他不变。下面是我改的代码,改动1:第六行把bed改为bed.gz
2、第十四行的p.stdout改为gzip.open(p.stdout,"rb") 3、加入import gzip
import gzip
def s10_mergeSample(self, depth=1): ###!!!!!!!!!!!!digital depth 2 by Lyn
for query in self.l_query:
make_dir([ self.dir_StatInfo, self.s_idx, query ])
mergeRatio = "%s/%s/%s/RatioMatrix.xls" % (self.dir_StatInfo, self.s_idx, query)
f_mergeRatio = open(mergeRatio, "w")
l_file = [ "%s/%s/singleC/all.%s.bed.gz" %
(self.dir_singleC, sam, query)
for sam in self.samInfo_pd_RNA['brief_name']
]
print >>f_mergeRatio, "chrpos\t%s" % ("\t".join(self.samInfo_pd_RNA['brief_name']))
shell_info = " paste %s " % (" ".join(l_file))
p=subprocess.Popen(shell_info,stdout=subprocess.PIPE,shell=True)
for line in gzip.open(p.stdout,"rb"):
line = line.strip('\n')
f = line.split()