iOS从文件读取中午使用UTF-8编码时,部分复杂的中文会占用4-6个字节。如果按1位来分割加入数组就会出现错误。如 𬺓 字,会占用两个长度。
NSString *files = [[NSBundle bundleWithPath:_bundlePath] pathForResource:@"cn_1.1" ofType:@"txt"];
NSString *lines = [[NSString stringWithContentsOfFile:files encoding:NSUTF8StringEncoding error:nil] stringByReplacingOccurrencesOfString:@"\n" withString:@""];
NSMutableArray *characterArray = [NSMutableArray array];
NSRange range = NSMakeRange(0, 1);
for(int i = 0; i < lines.length; i += range.length){
range.location = i;
NSString *character = [lines substringWithRange:range];
[characterArray addObject:character];
}
使用[str rangeOfComposedCharacterSequenceAtIndex:i]方法可以获得改文字的真实长度,用于分割可以得到正确的结果
NSString *files = [[NSBundle bundleWithPath:_bundlePath] pathForResource:@"cn_1.1" ofType:@"txt"];
NSString *lines = [[NSString stringWithContentsOfFile:files encoding:NSUTF8StringEncoding error:nil] stringByReplacingOccurrencesOfString:@"\n" withString:@""];
NSMutableArray *characterArray = [NSMutableArray array];
NSRange range = NSMakeRange(0, 1);
for(int i = 0; i < lines.length; i += range.length){
range = [lines rangeOfComposedCharacterSequenceAtIndex:i];
NSString *character = [lines substringWithRange:range];
[characterArray addObject:character];
}