这次需要一点原料了——水稻ID对照表
相关数据库到(https://shigen.nig.ac.jp/rice/oryzabase/)
就是这个:
#转换基因注释
import pandas as pd
from collections import OrderedDict
name = OrderedDict()
with open("RAP-MSU_2018-03-29.txt", "r") as f:
for line in f:
if line[0] == "N":
break
else:
list_name = line[:12]
line = line[12:]
line = line.strip()
line = line.split(",")
name[list_name] = line
#print(name[list_name])
#break
def CC(a):
output = open("output.txt","a+")
f = open(a,"r")
data = []
all = f.readlines()
for line in all:
line = line.strip()
for i in name[line]:
c = [line,i]
data.append(c)
f.close()
output.close()
return data
w = CC("t.txt")
column = ['Rap_id','Msu_id']
test = pd.DataFrame(columns = column,data = w) #
test.to_csv('test.csv') #写入csv