简单的fetch并行改串行控制
/**
* 基于fetch的并行控制
* 【郑重声明】 该代码著作权归瞌睡虫子所有。其他第三方使用该代码用于商用活动,引起的法律纠纷与作者无关,同时作者保留追责权力。
*
* 2024-12-29
* 后台,序列http下载,后台爬虫框架http部分
* ____________________________________________________________________________________________________________________________________
*/
// 队列抓取限制抓取并发
class FetchQueue {
constructor(maxConcurrentRequests, interval = 1000) {
console.log("创建爬虫");
this.maxConcurrentRequests = maxConcurrentRequests;
this.currentlyRunning = 0;
this.queue = [];
this.interval = interval;
}
add(url) {
return new Promise((resolve, reject) => {
const task = () => {
setTimeout(() => {
if (this.currentlyRunning < this.maxConcurrentRequests) {
this.currentlyRunning++;
// 这里添加异步队列批量处理方法
fetch(url)
.then(response => response.text())
.then(data => {
this.currentlyRunning--;
resolve(data);
this.next();
})
.catch(error => {
this.currentlyRunning--;
reject(error);
this.next();
});
} else {
this.queue.push(task);
}
}, this.interval);
};
if (this.currentlyRunning < this.maxConcurrentRequests) {
task();
} else {
this.queue.push(task);
}
});
}
next() {
if (this.queue.length > 0 && this.currentlyRunning < this.maxConcurrentRequests) {
const task = this.queue.shift();
task();
}
}
}
接受content.js的消息,控制后台下载,返回给content.js渲染都界面
backgroud.js
// 使用 FetchQueue,并设置不同的间隔时间 1 , 600
const maxConcurrentRequests = 1; // 设置最大并发请求数
const fetchQueue = new FetchQueue(maxConcurrentRequests, 620);
chrome.runtime.onMessage.addListener(function (request, sender, sendResponse) {
if (request.method === "fetchUrl") {
// 将 fetch 请求添加到队列
fetchQueue.add(request.url)
.then(function (html) {
// 使用正则表达式抽取人气值
var regex = /(\d+)\s*人次/;
var match = html.match(regex);
if (match && match[1]) {
// match[1] 就是人气值
var popularity = match[1];
sendResponse({
id: request.id,
msg: popularity,
flag: true
});
// console.log('人气值:', popularity);
} else {
sendResponse({
id: request.id,
msg: "未找到人气值",
flag: false
});
console.log('未找到人气值');
}
})
.catch(error => sendResponse({
id: request.id,
msg: "请求失败",
flag: false
}));
return true; // 表示响应是异步的
} else if (request.method === "clear") {
fetchQueue.queue = [];
sendResponse({
msg: "队列清空成功",
flag: true
})
return false; // 不处理
} else if (message.method === "openPopup") {
chrome.action.openPopup();
}
return false; // 不处理
});
// 激活标签页
chrome.tabs.onActivated.addListener(function (activeInfo) {
// 你可以获取激活的标签页的详细信息
chrome.tabs.get(activeInfo.tabId, function (tab) {
// 在这里你可以根据激活的标签页执行一些操作
if (tab.url.match(/目标网页/)) {
console.log('Active tab URL: ' + tab.url);
chrome.tabs.sendMessage(activeInfo.tabId, {
method: "clear"
});
}
});
});
与backgroud.js通讯,发送任务消息等待结果,再渲染
contet.js
/**
* 瞌睡虫子
*
* 2024-11-5
* 人气值抓去显示
*/
console.log('开始工作啦:', location.href);
// 随机唯一ID
function generateUUID() {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function (c) {
var r = Math.random() * 16 | 0,
v = c === 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
function getRQ() {
var res = {}
document.querySelectorAll("li em a:not([class='fetch'])").forEach(function (dd) {
// 获取当前元素的 'href' 属性
var url = dd.href;
var uuid = "L_" + generateUUID();
res[uuid] = dd;
// console.log(url);
chrome.runtime.sendMessage({
method: "fetchUrl",
url: url,
id: uuid
}, function (response) {
var id = response.id;
var msg = response["msg"];
if (msg && res[id] && response.flag) {
var node = res[id];
node.className += "fetch";
node.parentElement.nextElementSibling.innerHTML += ' <span style="font-weight: normal;font-size: 12px;color: black;">人气:' + msg + "</span>";
res[id] = undefined;
}
});
});
}
var MutationObserver = window.MutationObserver || window.WebKitMutationObserver || window.MozMutationObserver;
const options = {
childList: true
// attributes: true,
// characterData: true,
// subtree: true,
// attributeOldValue: true,
// characterDataOldValue: true
};
// 创建MutationObserver实例,返回一个观察者对象
const mutation = new MutationObserver(function (mutationRecoards, observer) {
for (const recoard of mutationRecoards) {
if (recoard.type === 'childList') {
// getRQ(recoard.addedNodes);
// setTimeout(getRQ, 500);
console.log("加载新数据…");
setTimeout(function () {
var res = {};
recoard.addedNodes.forEach(function (dd) {
if (dd.nodeType == 1 && dd.nodeName == "LI") {
// 获取当前元素的 'href' 属性
var aa = dd.querySelector("em a");
var url = aa.href;
var uuid = "L_" + generateUUID();
res[uuid] = aa;
// console.log(url);
chrome.runtime.sendMessage({
method: "fetchUrl",
url: url,
id: uuid
}, function (response) {
var id = response.id;
var msg = response["msg"];
if (msg && res[id] && response.flag) {
var node = res[id];
node.className += "fetch";
node.parentElement.nextElementSibling.innerHTML += ' <span style="font-weight: normal;font-size: 12px;color: black;">人气:' + msg + "</span>";
res[id] = undefined;
}
});
};
});
}, 500);
}
}
});
// 当前页面激活
// content.js
chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
if (message && message.method === "clear") {
// 标签页被激活,执行相关操作
console.log("页面激活,继续抓取…");
try {
chrome.runtime.sendMessage({
method: "clear"
},function(response) {
getRQ();
});
} catch (error) {
}
}
return false;
});
// 对观察者添加需要观察的元素,并设置需要观察元素的哪些方面
document.querySelectorAll('ul').forEach(function (targetElement) {
mutation.observe(targetElement, options);
});
// 清空爬取队列
chrome.runtime.sendMessage({
method: "clear"
});
// 发消息请求
getRQ();