在某些场景会遇到需要将网页文本解析为字符串并保留格式的问题,下面列举OC/swift的解析方法
OC
缺少P标签的解析,参考swift
-(NSString*)html5CodeToString {
NSString *str = [self stringByReplacingOccurrencesOfString:@"<" withString:@"<"];
str = [strstringByReplacingOccurrencesOfString:@">" withString:@">"];
str = [strstringByReplacingOccurrencesOfString:@"&" withString:@"&"];
str = [strstringByReplacingOccurrencesOfString:@""" withString:@"“"];
str = [strstringByReplacingOccurrencesOfString:@"®" withString:@"®"];
str = [strstringByReplacingOccurrencesOfString:@"©" withString:@"©"];
str = [strstringByReplacingOccurrencesOfString:@"™" withString:@"™"];
str = [strstringByReplacingOccurrencesOfString:@" " withString:@" "];
str = [strstringByReplacingOccurrencesOfString:@" " withString:@" "];
str = [strstringByReplacingOccurrencesOfString:@" " withString:@" "];
returnstr;
}
swift
extension String {
funchtml5CodeToString() ->String{
varstr =self.replacingOccurrences(of:"<", with:"<")
str = str.replacingOccurrences(of:">", with:">")
str = str.replacingOccurrences(of:"&", with:"&")
str = str.replacingOccurrences(of:""", with:"“<”")
str = str.replacingOccurrences(of:"®", with:"©")
str = str.replacingOccurrences(of:"©", with:"©")
str = str.replacingOccurrences(of:"™", with:"™")
str = str.replacingOccurrences(of:" ", with:" ")
str = str.replacingOccurrences(of:" ", with:" ")
str = str.replacingOccurrences(of:" ", with:" ")
str = str.replacingOccurrences(of:"<p>", with:"")
str = str.replacingOccurrences(of:"</p>", with:"\n")
returnstr
}
}