抓取快代理中的可用代理

抓取到的代理会直接打印在控制台 复制保存就ok

import re
import time
from lxml import etree

import requests
http_url = 'http://myip.kkcha.com/'
https_url = 'https://www.ip138.com/'
url = 'https://www.kuaidaili.com/free/inha/%s/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',

}
for i in range(1, 100):
    resp = requests.get(url % i)
    # print(resp.text)
    tree = etree.HTML(resp.text).xpath('//*[@id="list"]/table/tbody/tr/td')
    for i, v in enumerate(tree):
        if i%7 == 0:

            if tree[i+3].text =='HTTP':
                proxies = {
                    'http': '%s:%s'%(v.text, tree[i + 1].text),
                }
                try:
                    resp = requests.get(http_url, proxies=proxies)
                    if re.findall(proxies['http'][:-7], resp.text):
                        print('http://%s:%s' % (v.text, tree[i + 1].text))
                except:
                    pass
            else:
                proxies = {
                    'https': '%s:%s'% (v.text, tree[i + 1].text),
                }
                try:
                    resp = requests.get(https_url, proxies=proxies)
                    if re.findall(proxies['https'][:-7], resp.text):
                        print('https://%s:%s' % (v.text, tree[i + 1].text))
                except:
                    pass
    time.sleep(0.2)
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。