分词是将一段string根据语法进行切分成若干个短语/词/字,多见于搜索中使用。
- (NSMutableArray *)participleWithString:(NSString *)originalString {
CFLocaleRef locale; //分词工具
CFRange range; //使用范围
{
if (!originalString.length) return nil;
range = CFRangeMake(0, [originalString length]);
locale = CFLocaleCopyCurrent(); //要CFRelease!
}
CFStringTokenizerRef tokenizer; // token解析 (要CFRelease!)
tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, (CFStringRef)originalString, range, kCFStringTokenizerUnitWordBoundary, locale); //初始化
CFStringTokenizerTokenType tokenType; //token状态(监听分词进程)
tokenType = CFStringTokenizerGoToTokenAtIndex(tokenizer, 0);
NSMutableArray *participleArray = [NSMutableArray array];
while (tokenType != kCFStringTokenizerTokenNone) {
//获取当前使用范围
range = CFStringTokenizerGetCurrentTokenRange(tokenizer);
CFTypeRef tokenTag; //token标记 (要CFRelease!)
//将标记存储
tokenTag = CFStringTokenizerCopyCurrentTokenAttribute(tokenizer, kCFStringTokenizerAttributeLatinTranscription);
[participleArray addObject:[originalString substringWithRange:NSMakeRange(range.location, range.length)]];
CFRelease(tokenTag);
//获取当前进程
tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer);
}
//释放
CFRelease(locale);
CFRelease(tokenizer);
return participleArray;
}