- headers.py
import random
first_num = random.randint(55, 62)
third_num = random.randint(0, 3200)
fourth_num = random.randint(0, 140)
class FakeChromeUA:
os_type = [
'(Windows NT 6.1; WOW64)', '(Windows NT 10.0; WOW64)', '(X11; Linux x86_64)',
'(Macintosh; Intel Mac OS X 10_12_6)'
]
chrome_version = 'Chrome/{}.0.{}.{}'.format(first_num, third_num, fourth_num)
@classmethod
def get_ua(cls):
return ' '.join(['Mozilla/5.0', random.choice(cls.os_type), 'AppleWebKit/537.36',
'(KHTML, like Gecko)', cls.chrome_version, 'Safari/537.36']
)
headers = {
'User-Agent': FakeChromeUA.get_ua(),
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Connection': 'keep-alive'
}
- 1.首选定义session
import requests
session=requests.Session()
- 2.proxy的格式
proxies的格式如下
用户名username、 密码password、 代理地址hogehoge.proxy.jp、 端口号8080
proxy_dict = {
"http": "http://username:password@hogehoge.proxy.jp:8080/",
"https": "http://username:password@hogehoge.proxy.jp:8080/"
}
- 3.接上开始设置headers和url
headers=headers # (headers.py的字典)
req=session.get(url,headers=headers,proxies=proxy_dict)