介绍
Puppeteer 是一个 Node 库,它提供了一个高级 API 来通过 DevTools 协议控制 Chromium 或 Chrome。Puppeteer 默认以 headless 模式运行,但是可以通过修改配置文件运行“有头”模式。
https://try-puppeteer.appspot.com/
安装
npm install puppeteer
示例
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
// devtools: true
})
const cookies = []
const page = await browser.newPage()
await page.setViewport({
width: 1280,
height: 960
})
await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36')
await page.setCookie(...cookies)
await page.goto('https://rd5.zhaopin.com/custom/searchv2/result')
await page.evaluate(async () => {
Object.defineProperty(navigator, 'webdriver', { get: () => false });
window.localStorage.setItem('intro_search_version', '1');
})
await page.reload();
await page.waitFor(2000);
let listFrame = await page.frames()[0]
const resumeList = await listFrame.$x("//a[@class='is-text-normal']");
console.log('match Resume ' + resumeList.length);
for (let i in resumeList) {
resumeList[i].click();
await listFrame.waitFor(1000);
}
await page.waitFor(2000);
await page.screenshot({ path: './test.png' })
await page.close()
browser.close()
})()
方法说明
设置页面大小
await page.setViewport({
width: 1280,
height: 960
})
设置UA
await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36')
设置cookie
const cookies = [{'name': '', 'value': ''}]
await page.setCookie(...cookies)
刷新页面
await page.reload();
page页操作
const page = await brower.newPage()
// 监听browser.on('targetcreated')事件
const newPagePromise = new Promise(resolve => browser.on('targetcreated', target => resolve(target.page())))
await page.click("#test")
const newPage = await newPagePromise
await newPage.waitFor(3000)
let page2Content = await newPage.content()
console.log(page2Content)
await newPage.close()
await page.close()
设置代理
const browser = await puppeteer.launch({
headless: false,
// devtools: true,
// 127.0.0.1:1080
args: ['--proxy-server=' + this.proxy]
})
Disable图片
async function disableImage (page) {
await page.setRequestInterception(true)
page.on('request', request => {
if (request.resourceType() === 'image') {
request.abort()
} else {
request.continue()
}
})
}