iOS富文本转换生成html过于标准,但是后端只需要body内容,为了兼容后端 ,由此需要正则替换部分标签内容,我遇到的几种情况如下(替换完成后取出<body>内容即可</body>)
1.ios生成的html的样式直接在<head>标签下,body 内容里面的class需要替换成style
2.自定义引用功能,前后不需要<p><span>引用内容</span></p>,需要去除<p><span>和</span></p> 只保留引用内容
3.自定义线条也需要去除<p><span>和</span></p> 只保留自定义< hr 标签>内容
4.<img><video>也需要去除<p><span>和</span></p> 只保留<img><video>内容
/// 正则匹配
///
/// - Parameters:
/// - regex: 匹配规则
/// - validateString: 匹配对test象
/// - Returns: 返回结果
private func regularExpression(regex:String,validateString:String) -> [String]{
do {
let regex: NSRegularExpression = try NSRegularExpression(pattern: regex, options: [])
let matches = regex.matches(in: validateString, options: [], range: NSMakeRange(0, validateString.count))
var data:[String] = []
for item in matches {
let string = (validateString as NSString).substring(with: item.range)
data.append(string)
}
return data
}
catch {
return []
}
}
/// 正则匹配Html
public func k_regularExpression(greeting:String) -> String{
var temp = greeting
// 查找class
let regexClass = "class=\"[^\"]*\""
let resultClass = regularExpression(regex: regexClass, validateString: greeting)
for str in resultClass {
// 提取class
guard let firstIndex = str.firstIndex(of: "\""),
let lastIndex = str.lastIndex(of: "\"") else { continue }
let className = str[str.index(after: firstIndex)..<lastIndex]
// 查找style
let regexStyle = "\(className) \\{[^}]*\\}"
let resultStyle = regularExpression(regex: regexStyle, validateString: greeting)
// 提取style
guard let styleFirst = resultStyle.first,
let styleFirstIndex = styleFirst.firstIndex(of: "{"),
let styleLastIndex = styleFirst.lastIndex(of: "}") else { continue }
let style = styleFirst[styleFirst.index(after: styleFirstIndex)..<styleLastIndex]
// 替换
temp = temp.replacingOccurrences(of: "class=\"\(className)\"", with: "style=\"\(style)\"")
}
return temp
}
/// 正则替换 [-blockquote-] 、 [/-blockquote-] 前后多余的标签对
public func html_Blockquotexpression(html: String) ->String{
var temp = html
let regexClass = "<p.*-blockquote-.*?</p>"
let resultClass = regularExpression(regex: regexClass, validateString: html)
for str in resultClass {
if str.contains("[-blockquote-]") {
temp = temp.replacingOccurrences(of: str, with: "<blockquote style='border-left: 3px solid #eee; padding-left: 0.5em; margin:0'>")
} else if str.contains("[/-blockquote-]") {
temp = temp.replacingOccurrences(of: str, with: "</blockquote>")
}
}
return temp
}
/// 正则替换 自定义线条
public func html_hrexpression(html: String) ->String{
var temp = html
let regexClass = "<br style=\"font-size:16px\" /> <hr width=\"60%\" color=\"#000000\" style=\"font-size:1px\" /> <br style=\"font-size:16px\" />"
let regexHRClass = "<p.*\(regexClass).*?</p>"
let resultClass = regularExpression(regex: regexHRClass, validateString: html)
for str in resultClass {
temp = temp.replacingOccurrences(of: str, with: regexClass)
}
return temp
}
/// 正则替换 图片
public func html_imagepression(html: String) ->String{
var temp = html
let regexClass = "<img src=.*?</span>"
let resultClass = regularExpression(regex: regexClass, validateString: html)
for str in resultClass {
let replaceStr = "</span>" + str.replacingOccurrences(of: "</span>", with: "")
temp = temp.replacingOccurrences(of: str, with: replaceStr)
}
return temp
}
/// 正则替换 视频
public func html_videopression(html: String) ->String{
var temp = html
let regexClass = "<video src=.*?</span>"
let resultClass = regularExpression(regex: regexClass, validateString: html)
for str in resultClass {
let replaceStr = "</span>" + str.replacingOccurrences(of: "</span>", with: "")
temp = temp.replacingOccurrences(of: str, with: replaceStr)
}
return temp
}
收集的其他正则
//string regexstr = @"<[^>]*>"; //去除所有的标签
//@"<script[^>]*?>.*?</script>" //去除所有脚本,中间部分也删除
// string regexstr = @"<img[^>]*>"; //去除图片的正则
// string regexstr = @"<(?!br).*?>"; //去除所有标签,只剩br
// string regexstr = @"<table[^>]*?>.*?</table>"; //去除table里面的所有内容
//string regexstr = @"<(?!img|br|p|/p).*?>"; //去除所有标签,只剩img,br,p