convert_to_utf8_interactive.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import chardet
def detect_encoding(file_path):
"""检测文件编码"""
with open(file_path, 'rb') as f:
raw_data = f.read()
result = chardet.detect(raw_data)
return result['encoding']
def convert_file_inplace(file_path):
"""将文件原地转换为 UTF-8(覆盖原文件)"""
encoding = detect_encoding(file_path)
print(f"正在处理: {file_path} (检测编码: {encoding})")
# 如果已经是 UTF-8 或 UTF-8 with BOM,跳过
if encoding in ['utf-8', 'utf-8-sig']:
print(f"✅ 已是 UTF-8 编码,跳过: {file_path}")
return False
try:
# 读取原文件(使用检测到的编码)
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
# 写回原文件,使用 UTF-8 编码(无 BOM)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"✅ 已覆盖保存为 UTF-8: {file_path}")
return True
except Exception as e:
print(f"❌ 转换失败 {file_path}: {e}")
return False
def process_directory(root_dir):
"""处理指定目录下的所有支持文件,直接覆盖"""
supported_extensions = {
'.c', '.cpp', '.h', '.hpp', '.txt', '.py', '.js', '.html', '.xml', '.json',
'.rc', '.def', '.inc', '.s', '.asm', '.m', '.mm' # 常见 C/C++/资源文件
}
total_converted = 0
total_skipped = 0
for dirpath, _, filenames in os.walk(root_dir):
for filename in filenames:
ext = os.path.splitext(filename)[1].lower()
if ext not in supported_extensions:
continue
# 跳过隐藏文件或临时文件
if filename.startswith('.') or filename.startswith('~'):
continue
file_path = os.path.join(dirpath, filename)
if convert_file_inplace(file_path):
total_converted += 1
else:
total_skipped += 1
print(f"\n🎉 处理完成!共处理 {total_converted + total_skipped} 个文件")
print(f"✅ 成功转换(覆盖): {total_converted} 个文件")
print(f"❌ 跳过或失败: {total_skipped} 个文件")
def main():
print("🚀 UTF-8 文件编码转换工具(原地覆盖)")
print("📌 本工具将自动检测并转换非 UTF-8 编码文件为 UTF-8 格式")
print("📌 中文内容将保持不乱码")
print("-" * 60)
# 获取用户输入路径
while True:
path = input("📁 请输入要处理的项目根目录路径(支持中文): ").strip()
if not path:
print("❌ 路径不能为空,请重新输入。")
continue
if os.path.exists(path):
break
else:
print(f"❌ 路径不存在: {path}")
continue
# 检查是否为目录
if not os.path.isdir(path):
print(f"❌ 输入的不是目录: {path}")
exit(1)
print(f"\n🔍 开始扫描并转换目录: {path}")
process_directory(path)
print("\n✨ 所有文件已处理完毕,原文件已覆盖为 UTF-8 格式。")
if __name__ == "__main__":
main()