Python爬虫--urllib高级使用之opener

一,用法

opener:添加附加功能,我们把附加功能称为----处理器对象(Handler对象)

二、常用handler对象

  • HTTPHandler: 普通的HTPP处理器
  • ProxyHandler:代理IP处理器
  • HTTPCookieProcessor:cookie处理器

三、代码演示

(1) HTTPHandler

# -*- coding:utf-8 -*-
from fake_useragent import UserAgent
from urllib.request import Request, urlopen, build_opener, HTTPHandler

def download(url):
    headers = {
        "User-Agent": UserAgent().random
    }
    req = Request(url, headers=headers)
    # 创建httpc处理器对象,debuglevel打开调试
    handler = HTTPHandler(debuglevel=2)
    opener = build_opener(handler)
    resp = opener.open(req)
    print(resp.read().decode())


if __name__ == '__main__':
    url = "http://httpbin.org/get"
    download(url)
HTTPHandler处理器

(2) ProxyHandler

# -*- coding:utf-8 -*-
from fake_useragent import UserAgent
from urllib.request import Request, build_opener, ProxyHandler

def proxy_handler(url):
    headers = {
        "User-Agent": UserAgent().random
    }
    req = Request(url, headers=headers)
    # ProxyHandler({"http": "ip:port"})
    handler = ProxyHandler({"http": "218.14.108.53:8060"})
    opener = build_opener(handler)
    resp = opener.open(req)
    print(resp.read().decode())


if __name__ == '__main__':
    url = "http://httpbin.org/get"
    # download(url)
    proxy_handler(url)

ProxyHandler处理器对象

(3)、HTTPCookieProcessor
  1.以快代理登录为例:https://www.kuaidaili.com/login/

HTTPCookieProcessor1.png
HTTPCookieProcessor2.png
HTTPCookieProcessor3.png
HTTPCookieProcessor4.png

HTTPCookieProcessor5.png
  • 代码演示:
import json
from urllib.parse import urlencode
from fake_useragent import UserAgent
from urllib.request import Request,  build_opener, HTTPCookieProcessor


def cookie_handler(url):
    headers = {
        "User-Agent": UserAgent().random
    }
    with open("./009account.json", "r", encoding="utf-8") as fr:
        account_json = json.load(fr)
    args = {
        "login_type": 1,
        "username": "{}".format(account_json["username"]),
        "passwd": "{}".format(account_json["pwd"])
    }
    # print(args)
    # 请求登录页面
    req = Request(url, headers=headers, data=urlencode(args).encode())
    # 创建cookie控制器, 自动保存cookie信息
    handler = HTTPCookieProcessor()
    opener = build_opener(handler)
    resp_login = opener.open(req)
    # 请求登录后的url
    login_after = "https://www.kuaidaili.com/usercenter/overview"
    req_login_after = Request(login_after, headers=headers)
    resp_after = opener.open(req_login_after)
    print(resp_after.read().decode())


if __name__ == '__main__':
    url = "http://httpbin.org/get"

    login_url = "https://www.kuaidaili.com/login/"
    # download(url)
    # proxy_handler(url)
    cookie_handler(login_url)
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容