一,用法
opener:添加附加功能,我们把附加功能称为----处理器对象(Handler对象)
二、常用handler对象
- HTTPHandler: 普通的HTPP处理器
- ProxyHandler:代理IP处理器
-
HTTPCookieProcessor:cookie处理器
三、代码演示
(1) HTTPHandler
# -*- coding:utf-8 -*-
from fake_useragent import UserAgent
from urllib.request import Request, urlopen, build_opener, HTTPHandler
def download(url):
headers = {
"User-Agent": UserAgent().random
}
req = Request(url, headers=headers)
# 创建httpc处理器对象,debuglevel打开调试
handler = HTTPHandler(debuglevel=2)
opener = build_opener(handler)
resp = opener.open(req)
print(resp.read().decode())
if __name__ == '__main__':
url = "http://httpbin.org/get"
download(url)
(2) ProxyHandler
# -*- coding:utf-8 -*-
from fake_useragent import UserAgent
from urllib.request import Request, build_opener, ProxyHandler
def proxy_handler(url):
headers = {
"User-Agent": UserAgent().random
}
req = Request(url, headers=headers)
# ProxyHandler({"http": "ip:port"})
handler = ProxyHandler({"http": "218.14.108.53:8060"})
opener = build_opener(handler)
resp = opener.open(req)
print(resp.read().decode())
if __name__ == '__main__':
url = "http://httpbin.org/get"
# download(url)
proxy_handler(url)
(3)、HTTPCookieProcessor
1.以快代理登录为例:https://www.kuaidaili.com/login/
- 代码演示:
import json
from urllib.parse import urlencode
from fake_useragent import UserAgent
from urllib.request import Request, build_opener, HTTPCookieProcessor
def cookie_handler(url):
headers = {
"User-Agent": UserAgent().random
}
with open("./009account.json", "r", encoding="utf-8") as fr:
account_json = json.load(fr)
args = {
"login_type": 1,
"username": "{}".format(account_json["username"]),
"passwd": "{}".format(account_json["pwd"])
}
# print(args)
# 请求登录页面
req = Request(url, headers=headers, data=urlencode(args).encode())
# 创建cookie控制器, 自动保存cookie信息
handler = HTTPCookieProcessor()
opener = build_opener(handler)
resp_login = opener.open(req)
# 请求登录后的url
login_after = "https://www.kuaidaili.com/usercenter/overview"
req_login_after = Request(login_after, headers=headers)
resp_after = opener.open(req_login_after)
print(resp_after.read().decode())
if __name__ == '__main__':
url = "http://httpbin.org/get"
login_url = "https://www.kuaidaili.com/login/"
# download(url)
# proxy_handler(url)
cookie_handler(login_url)