# -*- coding: utf-8 -*-
# 导入必要的模块
import os
# 原始蛋白质序列(去掉换行符后的完整序列,xxx代表氨基酸)
original_sequence = (
"Mxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
)
# 突变列表
mutations = [
("K", 452, "G"), ("A", 311, "G"), ("F", 182, "Y"), ("G", 211, "A"), ("I", 314, "F")
]
# 创建输出目录(可选)
output_dir = "mutated_sequences"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 函数:将序列格式化为FASTA格式(每行60个字符,用于单独文件)
def format_fasta_sequence(sequence, line_length=60):
return "\n".join(sequence[i:i+line_length] for i in range(0, len(sequence), line_length))
# 用于存储所有突变序列的列表(用于合并文件)
all_fasta_content = []
# 生成并保存每个突变序列
for orig_aa, pos, new_aa in mutations:
# 验证原始序列在该位置的氨基酸是否匹配
pos_index = pos - 1 # 转换为0-based索引
if original_sequence[pos_index] != orig_aa:
print(f"警告:位置 {pos} 的原始氨基酸应为 {orig_aa},但实际为 {original_sequence[pos_index]}")
continue
# 生成突变序列
mutated_sequence = original_sequence[:pos_index] + new_aa + original_sequence[pos_index + 1:]
# 文件名和FASTA内容(单独文件,每行60个字符)
fasta_id = f">{orig_aa}{pos}{new_aa}"
fasta_content_single = f"{fasta_id}\n{format_fasta_sequence(mutated_sequence)}"
# 保存到单独文件
filename = os.path.join(output_dir, f"{orig_aa}{pos}{new_aa}.fasta")
with open(filename, "w") as f:
f.write(fasta_content_single)
print(f"已生成单独文件:{filename}")
# 添加到合并文件内容(单行完整序列)
fasta_content_combined = f"{fasta_id}\n{mutated_sequence}"
all_fasta_content.append(fasta_content_combined)
# 生成合并的FASTA文件(每条序列单行)
combined_filename = "all_mutated_sequences.fasta"
with open(combined_filename, "w") as f:
f.write("\n".join(all_fasta_content))
print(f"完成!共生成 {len(mutations)} 个突变序列文件,保存在 '{output_dir}' 目录下。")
print(f"合并文件已生成:{combined_filename}(每条序列单行)")
复制该代码到本地后,重命名为generate_mutated_sequences.py
,并在本地运行:
python generate_mutated_sequences.py
即可得到每个突变蛋白质的序列,及所有突变体序列的整合,且每个突变体的ID会更改为“原始氨基酸位置突变后氨基酸”的格式。