抓取到的代理会直接打印在控制台 复制保存就ok
import re
import time
from lxml import etree
import requests
http_url = 'http://myip.kkcha.com/'
https_url = 'https://www.ip138.com/'
url = 'https://www.kuaidaili.com/free/inha/%s/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
}
for i in range(1, 100):
resp = requests.get(url % i)
# print(resp.text)
tree = etree.HTML(resp.text).xpath('//*[@id="list"]/table/tbody/tr/td')
for i, v in enumerate(tree):
if i%7 == 0:
if tree[i+3].text =='HTTP':
proxies = {
'http': '%s:%s'%(v.text, tree[i + 1].text),
}
try:
resp = requests.get(http_url, proxies=proxies)
if re.findall(proxies['http'][:-7], resp.text):
print('http://%s:%s' % (v.text, tree[i + 1].text))
except:
pass
else:
proxies = {
'https': '%s:%s'% (v.text, tree[i + 1].text),
}
try:
resp = requests.get(https_url, proxies=proxies)
if re.findall(proxies['https'][:-7], resp.text):
print('https://%s:%s' % (v.text, tree[i + 1].text))
except:
pass
time.sleep(0.2)