实现两个词的切割
如: 开始 kāishǐ ->kāi shǐ-
待实现 多字切割
-
simple 1 : 打开门 dakaimen
- split to
"dakai men"
- identify
"dakai"
can split to" da kai"
,and"men"
can't split to"m en"
- we have 3 words , so should split first part
"da kai"
- split to
-
- simple 2 : 那男人 na'nan'ren
- simple 3 : 小图案 xiao'tu'an
3 . 程序示例
public class Utils {
private static String[] initialmeta = {"b", "p", "m", "f", "d", "t", "l", "k", "j", "q", "x", "z", "c", "s", "zh", "ch", "sh", "h", "y", "w"};
private static String[] finalmeta = {"ai", "ao", "an", "ou", "ei", "en", "er", "a", "o", "e"};
private static String[] finalmetaFull = {"i" + "u", "v", "ia", "ua", "ie", "ve", "uo", "uai", "uei", "iao", "iou",
"ian", "uan", "van", "in", "uen", "vn", "ang", "iang", "uang", "eng", "ing", "ueng", "ong", "iong"};
private static String[] voicemark_a = {"ā", "á", "ǎ", "à"};
private static String[] voicemark_o = {"ō", "ó", "ǒ", "ò"};
private static String[] voicemark_e = {"ē", "é", "ě", "è"};
private static String[] voicemark_i = {"ī", "í", "ǐ", "ì"};
private static String[] voicemark_u = {"ū", "ú", "ǔ", "ù"};
private static String[] voicemark_v = {"ǖ", "ǘ", "ǚ", "ǜ", "ü"};
public static String splitPinyin(String str) {
StringBuilder sb = new StringBuilder(str);
String after = str.substring(1, str.length());
int tempVoicemeta;
if (startWithFinal(str) != 0) {
tempVoicemeta = startWithFinal(str);
} else {
tempVoicemeta = containsInitial(after);
}
if (tempVoicemeta != 0) {
sb.insert(tempVoicemeta, " ");
} else {
System.out.println("error : " + str + "\n");
}
return sb.toString();
}
private static int containsInitial(String str) {
String metaStr = formatMeta(str);
for (String s : initialmeta) {
if (metaStr.substring(1).contains(s))
return metaStr.substring(1).indexOf(s) + 2;
}
if (metaStr.contains("r")) {
if (metaStr.endsWith("er")) {
return metaStr.length() - 1;
}
if (metaStr.contains("er") && !metaStr.startsWith("er")) {
// er en 传入r en ,r属于word 1
//返回r的后一个字母
return metaStr.indexOf("r") + 2;
} else {
// he ren传入e ren,r属于word 2.
return metaStr.indexOf("r") + 1;
}
}
if (str.contains("gn")) {
return str.indexOf("gn") + 2;
}
if (str.contains("g")) {
if (str.contains("gg")) {
return str.indexOf("gg") + 2;
} else {
String before = str.substring(0, str.indexOf("g"));
String after = str.substring(str.indexOf("g") + 1);
//如果g之后不能独立成拼音 g属于后面
if (after.length() > 0) {
if (equalsFullFinal(after)) {
return str.indexOf("g") + 1;
}
}
//如果 before 不以n结尾 避免了"ng",可以g分割
if (!before.endsWith("n")) {
return str.indexOf("g") + 1;
}
//如果没有 ng 且 before的长度大于1
if (!str.contains("ng")) {
if (!before.startsWith("i") && before.length() > 1) {
return str.indexOf("g") + 1;
}
if (before.startsWith("i") && before.length() > 2) {
return str.indexOf("g") + 1;
}
}
}
}
if (str.contains("n")) {
if (str.contains("nn")) {
return str.indexOf("nn") + 2;
} else if (str.indexOf("n") != str.length() - 1) {
String before = str.substring(0, str.indexOf("n"));
String after = str.substring(str.indexOf("n") + 1);
//如果n之后不能独立成拼音 n属于后面
if (after.length() > 0) {
if (equalsFullFinal(after)) {
return str.indexOf("n") + 1;
}
}
if (!before.startsWith("i") && before.length() > 1) {
return str.indexOf("n") + 1;
}
if (before.startsWith("i") && before.length() > 2) {
return str.indexOf("n") + 1;
}
}
}
String middle = str.substring(0, str.length() - 1);
if (!middle.contains("n") && !middle.contains("g")) {
return containsFinal(str);
}
return 0;
}
public static int startWithFinal(String str) {
String meta = formatMeta(str);
for (String s : finalmeta) {
if (meta.contains(s) && meta.startsWith(s)) {
return s.length();
}
}
return 0;
}
private static int containsFinal(String str) {
String meta = formatMeta(str);
for (String s : finalmeta) {
if (meta.contains(s) && meta.endsWith(s)) {
return meta.indexOf(s) + 1;
}
}
return 0;
}
private static boolean equalsFinal(String str) {
String meta = formatMeta(str);
for (String s : finalmeta) {
if (meta.equals(s)) {
return true;
}
}
return false;
}
private static boolean equalsFullFinal(String str) {
String meta = formatMeta(str);
for (String s : finalmetaFull) {
if (meta.equals(s)) {
return true;
}
}
return false;
}
private static String formatMeta(String str) {
for (String s : voicemark_a) {
while (str.contains(s)) {
str = str.substring(0, str.indexOf(s)) + "a" + str.substring(str.indexOf(s) + 1);
}
}
for (String s : voicemark_o) {
while (str.contains(s)) {
str = str.substring(0, str.indexOf(s)) + "o" + str.substring(str.indexOf(s) + 1);
}
}
for (String s : voicemark_e) {
while (str.contains(s)) {
str = str.substring(0, str.indexOf(s)) + "e" + str.substring(str.indexOf(s) + 1);
}
}
for (String s : voicemark_i) {
while (str.contains(s)) {
str = str.substring(0, str.indexOf(s)) + "i" + str.substring(str.indexOf(s) + 1);
}
}
for (String s : voicemark_u) {
while (str.contains(s)) {
str = str.substring(0, str.indexOf(s)) + "u" + str.substring(str.indexOf(s) + 1);
}
}
for (String s : voicemark_v) {
while (str.contains(s)) {
str = str.substring(0, str.indexOf(s)) + "v" + str.substring(str.indexOf(s) + 1);
}
}
return str;
}
}