抓取视频
import requests
import re
res = requests.get("网址")
from lxml import etree
res_xpath = etree.HTML(res.text)
res_xpath.xpath('/html/head/title/text()') # 绝对路径 /text()提取文字
# res_xpath.xpath('//title/text()') # 提取任意子节点
for li in res_xpath.xpath('//ul[@id="categoryList"]/li'):
url = 'http://www.pearvideo.com/' + li.xpath('./div/a/@href)[0]
res = requests.get(url)
video = re.findall('srcUrl="(.*?)"', res.text, re.S)[0] #re.S忽略换行符的干扰
title = re.findall('srcUrl="<title>(.*?)</title>"', res.text, re.S)[0]
response = requests.get(video) # 拿到二进制数据
with open(title + ".mp4", mode = 'wb') as f:
f.write(response.content)