所有汉字转Unicode编码

public class Utf8Test {

    private static final int MIN_INDEX = 19968;
    private static final int MAX_INDEX = 40869;
    private static final String CR = "\r\n";
    private static final String TAB = "\t";

    public void execute(String fileName) throws IOException {
        File f = new File(fileName);
        FileWriter fw = new FileWriter(f);
        fw.write("字符" + TAB + "Unicode十进制" + TAB + "Unicode十六进制" + TAB + TAB + "GBK十进制" + TAB + "GBK十六进制" + TAB + "unicode转utf-8" + CR);
        fw.write("==================================================================================" + CR);
        int GBKCode;
        for (int i = MIN_INDEX; i <= MAX_INDEX; i++) {
            GBKCode = getGBKCode(i);

            fw.write((char) i + TAB + i + TAB + TAB + Integer.toHexString(i) + TAB + TAB + TAB + GBKCode + TAB + TAB + Integer.toHexString(GBKCode) + TAB + TAB  + CR);
        }
        fw.flush();
        System.out.println("Done!");
    }

    private int getGBKCode(int unicodeCode) throws UnsupportedEncodingException {
        char c = (char) unicodeCode;
        byte[] bytes = (c + "").getBytes("GBK");
        return ((bytes[0] & 255) << 8) + (bytes[1] & 255);
    }

    public static void main(String[] args) throws Exception {
        new Utf8Test().execute("汉字编码一览表.txt");
    }

}

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容