- 待匹配的字符串
const str = `<div>
<a href="https://www.baidu.com">百度</a>
<a href="http://www.qq.com">QQ</a>
<a href="https://www.sina.com">新浪</a>
<div>
<img src="https://www.sina.com/1.jpg" alt="">
<img src="http://www.qq.com/2.png" alt="">
<img src="https://www.sina.com/3.png" alt="">
</div>
</div>`;
获取字符串中所有 a 标签的 href 的值及其中间的文本内容
const linkReg = /(<a.*href[\s]?=[\s]?['"](.*)['"].*?>(.*)<\/a>)/igm;
const linkArray = [];
str.replace(linkReg, (...args) => linkArray.push({ href: args[1], text: args[2] }));
console.log(linkArray);
输出结果:
[
{
"href": "https://www.baidu.com",
"text": "百度"
},
{
"href": "https://www.qq.com",
"text": "QQ"
},
{
"href": "https://www.sina.com",
"text": "新浪"
}
]
匹配所有的 img 标签的 src 的值
const imgArray = [];
const imgReg = /(<img.*src[\s]?=[\s]?['"](.*?)['"].*\/?>)/igm;
str.replace(imgReg, (...args) => imgArray.push(args[2]));
// ---- 或者 ----
const imgArray2 = [];
let match = null;
do {
match = imgReg.exec(str);
match && imgArray2.push(match[2]);
} while (match);
// 两种方式的结果是一样的
console.log(imgArray);
console.log(imgArray2);
输出结果:
[
"https://www.sina.com/1.jpg",
"https://www.qq.com/2.png",
"https://www.sina.com/3.png"
]
匹配出所有的字符串中所有的URL, 不区分html标签
const urlReg = /(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?/ig;
const urlArrray = str.match(urlReg);
输出结果:
[
"https://www.baidu.com",
"https://www.qq.com",
"https://www.sina.com",
"https://www.sina.com/1.jpg",
"https://www.qq.com/2.png",
"https://www.sina.com/3.png"
]