根据映射改变fasta id
fasta_change_id.py
#!/usr/bin/env python3
'''
NAME:
fasta_change_id.py -- Change fasta id by given mapping file
SYNOPSIS:
fasta_change_id.py <fasta[.gz]-file> <mapping_file> <outfasta-flie> [-g]
'''
import os
import sys
import gzip
import argparse
import fileinput
def parse_args():
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('input', metavar='<fasta[.gz]-file>', type=str,
help='input file in FASTA format (.gz allowed)')
parser.add_argument('map', metavar='<mapping-file>', type=str,
help='each line in mapping file contains orignal ID and corresponding new ID, which are separated by TAB')
parser.add_argument('output', metavar='<outfasta-flie>', type=str,
help='output file name')
parser.add_argument('-g', '--gzip', action='store_true',
help='if -g|--gzip, the out file wll be compressed with gzip and sufixed with ".gz"')
args = parser.parse_args()
return args
def read_map_2_dict(arg_map):
map_dict = {}
try:
with fileinput.input(files=arg_map) as map_fh:
for line in map_fh:
line_lst = line.rstrip('\n').split('\t')
original_id, new_id = line_lst
map_dict[original_id] = new_id
except IOError:
sys.exit(f'[ERROR] failed to open mapping-file or invalid mapping-file.')
return map_dict
def out_fasta(arg_in, arg_out, map_dict, arg_gzip):
in_fh = gzip.open(arg_in, 'rt') if arg_in.endswith('.gz') else open(arg_in, 'rt')
out_fh = gzip.open(arg_out + '.gz', 'wt') if arg_gzip else open(arg_out, 'wt')
for line in in_fh:
if line.startswith('>'):
original_id = line.rstrip('\n').split()[0].lstrip('>')
if map_dict.get(original_id):
new_id = map_dict[original_id]
line = new_id
else:
line = original_id
out_fh.write('>' + line + '\n')
else:
out_fh.write(line)
in_fh.close()
out_fh.close()
if __name__ == '__main__':
args = parse_args()
map_dict = read_map_2_dict(args.map)
out_fasta(args.input, args.output, map_dict, args.gzip)