JAVA UTF-8 转 GBK

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * UTF-8 转 GBK
 * 只转换 .java 文件,其余文件根据当前目录copy到新目录
 *
 * @author GQ
 * @date 2024-10-31
 */
public class FileEncodingConverter {

    private static final Logger LOGGER = Logger.getLogger(FileEncodingConverter.class.getName());
    //旧文件目录
    private static final String OLD_PATH = "D:\\server-urf8";
    //预备输出文件目录,确认是一个空的目录
    private static final String NEW_PATH = "D:\\server-gbk";

    public static void main(String[] args) throws IOException {
        scan(OLD_PATH, NEW_PATH);
    }

    public static void scan(String oldPath, String newPath) throws IOException {
        File root = new File(oldPath);
        File[] files = root.listFiles();

        if (files != null) {
            for (File file : files) {
                if (file.isDirectory()) {
                    String newSubPath = newPath + File.separator + file.getName();
                    scan(file.getAbsolutePath(), newSubPath);
                } else if (file.getName().endsWith(".java")) {
                    // 对Java文件进行编码转换或直接复制
                    convertOrCopyFile(file, newPath);
                } else {
                    copyFile(file, newPath);
                }
            }
        } else {
            LOGGER.log(Level.WARNING, "目录 {0} 不存在或不是有效的目录", oldPath);
        }
    }

    private static void convertOrCopyFile(File file, String newPath) {
        try {
            // 读取文件内容
            byte[] fileBytes;
            Path path = Paths.get(file.getAbsolutePath());

            try {
                fileBytes = Files.readAllBytes(path);
            } catch (IOException e) {
                LOGGER.log(Level.SEVERE, "读取文件 " + file.getName() + " 时发生错误", e);
                return;
            }

            // 检查编码格式并输出结果
            if (isUTF8(fileBytes)) {
                // 转换编码
                convertFileEncoding(file, newPath);
            } else {
                // 直接复制文件
                copyFile(file, newPath);
            }
        } catch (IOException e) {
            LOGGER.log(Level.SEVERE, "处理文件 " + file.getName() + " 时发生错误", e);
        }
    }

    private static void convertFileEncoding(File file, String newPath) throws IOException {
        String absolutePath = file.getAbsolutePath();
        String newFilePath = newPath + File.separator + file.getName();

        // 创建新目录(如果不存在)
        new File(newPath).mkdirs();

        try (
                FileInputStream fileInputStream = new FileInputStream(absolutePath);
                InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, StandardCharsets.UTF_8);
                FileOutputStream fileOutputStream = new FileOutputStream(newFilePath);
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, "GBK")
        ) {
            int length = 0;
            char[] buffer = new char[1024];
            while ((length = inputStreamReader.read(buffer)) != -1) {
                outputStreamWriter.write(buffer, 0, length);
            }

            LOGGER.info(() -> "文件 " + file.getName() + " 已成功转换为 GBK 编码并保存到 " + newFilePath);
        }
    }

    private static void copyFile(File file, String newPath) throws IOException {
        String absolutePath = file.getAbsolutePath();
        String newFilePath = newPath + File.separator + file.getName();

        // 创建新目录(如果不存在)
        new File(newPath).mkdirs();

        try (
                FileInputStream fileInputStream = new FileInputStream(absolutePath);
                FileOutputStream fileOutputStream = new FileOutputStream(newFilePath)
        ) {
            byte[] buffer = new byte[1024];
            int length;
            while ((length = fileInputStream.read(buffer)) != -1) {
                fileOutputStream.write(buffer, 0, length);
            }

            LOGGER.info(() -> "文件 " + file.getName() + " 已成功复制到 " + newFilePath);
        }
    }

    public static boolean isUTF8(byte[] bytes) {
        int i = 0;
        while (i < bytes.length) {
            // 判断UTF-8的多字节字符
            if ((bytes[i] & 0x80) == 0) {
                // 单字节字符
                i++;
            } else if ((bytes[i] & 0xE0) == 0xC0) {
                // 双字节字符
                if (i + 1 >= bytes.length || (bytes[i + 1] & 0xC0) != 0x80) {
                    return false;
                }
                i += 2;
            } else if ((bytes[i] & 0xF0) == 0xE0) {
                // 三字节字符
                if (i + 2 >= bytes.length || (bytes[i + 1] & 0xC0) != 0x80 || (bytes[i + 2] & 0xC0) != 0x80) {
                    return false;
                }
                i += 3;
            } else if ((bytes[i] & 0xF8) == 0xF0) {
                // 四字节字符
                if (i + 3 >= bytes.length || (bytes[i + 1] & 0xC0) != 0x80 || (bytes[i + 2] & 0xC0) != 0x80 || (bytes[i + 3] & 0xC0) != 0x80) {
                    return false;
                }
                i += 4;
            } else {
                // 非法字符
                return false;
            }
        }
        // 是UTF-8编码
        return true;
    }
}
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。