在网上下载了一份代码,然后发现中文全部乱码,应为编码格式是gb2312,所以写了个脚本批量修改编码格式
import os
import sys
import chardet
import codecs
def get_encoding_type(fileName):
with open(fileName, 'rb') as f:
data = f.read()
encoding_type = chardet.detect(data)
return encoding_type
def findAllFile(base):
files = []
for root, ds, fs in os.walk(base):
for f in fs:
fullname = os.path.join(root, f)
files.append(fullname)
return files
def convert_encoding_type(filename_in, filename_out, encode_in="gbk", encode_out="utf-8"):
with codecs.open(filename=filename_in, mode='r', encoding=encode_in) as fi:
data = fi.read()
outdir = os.path.dirname(filename_out)
with open(filename_out, mode='w', encoding=encode_out) as fo:
fo.write(data)
def main():
base = './123/'
files = findAllFile(base)
for fileName in files:
encoding_type = get_encoding_type(fileName)
if encoding_type['encoding']=='GB2312':
print(fileName + " " + str(encoding_type))
convert_encoding_type(fileName,fileName)
if __name__ == '__main__':
main()