初步使用
In [1]: import requests
In [2]: response = requests.get('http://www.baidu.com')
In [3]: type(response)
Out[3]: requests.models.Response
In [4]: response.status_code
Out[4]: 200
In [5]: type(response.text)
Out[5]: str
In [6]: response.text
Out[6]: '<!DOCTYPE html>\r\n<!--STATUS OK--><html> <head><meta http-equiv=content-type content=text/html;charset=utf-8><
meta http-equiv=X-UA-Compatible content=IE=Edge><meta content=always name=referrer><link rel=stylesheet type=text/css hr
ef=http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css><title>ç\x99¾åº¦ä¸\x80ä¸\x8bï¼\x8cä½\xa0å°±ç\x9f¥é\x81\x93</
title></head> <body link=#0000cc> <div id=wrapper> <div id=head> <div class=head_wrapper> <div class=s_form> <div class=
s_form_wrapper> <div id=lg> <img hidefocus=true src=//www.baidu.com/img/bd_logo1.png width=270 height=129> </div> <form
id=form name=f action=//www.baidu.com/s class=fm> <input type=hidden name=bdorz_come value=1> <input type=hidden name=ie
value=utf-8> <input type=hidden name=f value=8> <input type=hidden name=rsv_bp value=1> <input type=hidden name=rsv_idx
value=1> <input type=hidden name=tn value=baidu><span class="bg s_ipt_wr"><input id=kw name=wd class=s_ipt value maxlen
gth=255 autocomplete=off autofocus></span><span class="bg s_btn_wr"><input type=submit id=su value=ç\x99¾åº¦ä¸\x80ä¸\x8b
class="bg s_btn"></span> </form> </div> </div> <div id=u1> <a href=http://news.baidu.com name=tj_trnews class=mnav>æ\x9
6°é\x97»</a> <a href=http://www.hao123.com name=tj_trhao123 class=mnav>hao123</a> <a href=http://map.baidu.com name=tj_
trmap class=mnav>å\x9c°å\x9b¾</a> <a href=http://v.baidu.com name=tj_trvideo class=mnav>è§\x86é¢\x91</a> <a href=http:
//tieba.baidu.com name=tj_trtieba class=mnav>è´´å\x90§</a> <noscript> <a href=http://www.baidu.com/bdorz/login.gif?logi
n&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2f%3fbdorz_come%3d1 name=tj_login class=lb>ç\x99»å½\x95</a> </noscript> <s
cript>document.write(\'<a href="http://www.baidu.com/bdorz/login.gif?login&tpl=mn&u=\'+ encodeURIComponent(window.locati
on.href+ (window.location.search === "" ? "?" : "&")+ "bdorz_come=1")+ \'" name="tj_login" class="lb">ç\x99»å½\x95</a>\'
);</script> <a href=//www.baidu.com/more/ name=tj_briicon class=bri style="display: block;">æ\x9b´å¤\x9a产å\x93\x81</
a> </div> </div> </div> <div id=ftCon> <div id=ftConw> <p id=lh> <a href=http://home.baidu.com>å\x85³äº\x8eç\x99¾åº¦</a>
<a href=http://ir.baidu.com>About Baidu</a> </p> <p id=cp>©2017 Baidu <a href=http://www.baidu.com/duty/
>使ç\x94¨ç\x99¾åº¦å\x89\x8då¿\x85读</a> <a href=http://jianyi.baidu.com/ class=cp-feedback>æ\x84\x8fè§\x81å\x8
f\x8dé¦\x88</a> 京ICPè¯\x81030173å\x8f· <img src=//www.baidu.com/img/gs.gif> </p> </div> </div> </div> </bo
dy> </html>\r\n'
In [7]: type(response.cookies)
Out[7]: requests.cookies.RequestsCookieJar
In [8]: response.cookies
Out[8]: <RequestsCookieJar[Cookie(version=0, name='BDORZ', value='27315', port=None, port_specified=False, domain='.baid
u.com', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=1541742952,
discard=False, comment=None, comment_url=None, rest={}, rfc2109=False)]>
get请求
In [9]: response = requests.get('http://httpbin.org/get')
In [10]: response.text
Out[10]: '{\n "args": {}, \n "headers": {\n "Accept": "*/*", \n "Accept-Encoding": "gzip, deflate", \n
"Connection": "close", \n "Host": "httpbin.org", \n "User-Agent": "python-requests/2.19.1"\n }, \n
"origin": "183.216.200.80", \n "url": "http://httpbin.org/get"\n}\n'
# 添加参数
In [13]: response = requests.get('http://httpbin.org/get', params={'par': 'get'})
In [15]: print(response.text)
{
"args": {
"par": "get"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "close",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.19.1"
},
"origin": "183.216.200.80",
"url": "http://httpbin.org/get?par=get"
}
post请求
In [11]: response = requests.post('http://httpbin.org/post', data={'hee': 'llo'})
In [12]: response.text
Out[12]: '{\n "args": {}, \n "data": "", \n "files": {}, \n "form": {\n "hee": "llo"\n }, \n "headers": {\n
"Accept": "*/*", \n "Accept-Encoding": "gzip, deflate", \n "Connection": "close", \n "Content-Length": "7", \n
"Content-Type": "application/x-www-form-urlencoded", \n "Host": "httpbin.org", \n "User-Agent": "python-request
s/2.19.1"\n }, \n "json": null, \n "origin": "183.216.200.80", \n "url": "http://httpbin.org/post"\n}\n'
json解析
In [16]: response = requests.get('http://httpbin.org/get', params={'par': 'get'})
In [18]: response.json()
Out[18]:
{'args': {'par': 'get'},
'headers': {'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'close',
'Host': 'httpbin.org',
'User-Agent': 'python-requests/2.19.1'},
'origin': '183.216.200.80',
'url': 'http://httpbin.org/get?par=get'}
In [19]: type(response.json())
Out[19]: dict
获取二进制
In [20]: response = requests.get('https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo_top_86d58ae1.png')
In [22]: fo = open(r'C:\Users\Contry\Desktop\logo.png', 'wb')
In [23]: fo.write(response.content)
Out[23]: 2910
In [24]: fo.close()
headers
In [25]: response = requests.get('https://www.zhihu.com/explore')
In [26]: response.text
Out[26]: '<html>\r\n<head><title>400 Bad Request</title></head>\r\n<body bgcolor="white">\r\n<center><h1>400 Bad Request
</h1></center>\r\n<hr><center>openresty</center>\r\n</body>\r\n</html>\r\n'
# 添加user-agent才能正常访问
In [27]: headers = {
...: 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.13
...: 2 Safari/537.36'
...: }
In [28]: response = requests.get('https://www.zhihu.com/explore', headers=headers)
In [29]: response.text
Out[29]: '<!DOCTYPE html>\n<html lang="zh-CN" dropEffect="none" class="no-js no-auth ">\n<head>\n<meta charset="utf-8" /
>\n\n<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />\n<meta name="renderer" content="webkit" />\n<meta
....
response
In [30]: response.status_code
Out[30]: 200
# 返回体的header
In [31]: response.headers
Out[31]: {'Date': 'Thu, 08 Nov 2018 06:19:48 GMT', 'Content-Type': 'text/html; charset=UTF-8', 'Transfer-Encoding': 'chu
nked', 'Connection': 'keep-alive', 'Set-Cookie': 'tgw_l7_route=7139e401481ef2f46ce98b22af4f4bed; conne
ct-src * wss:;", 'Expires': 'Fri, 02 Jan 2000 00:00:00 GMT', 'Pragma': 'no-cache', 'Cache-Control': 'private, no-store,
max-age=0, no-cache, must-revalidate, post-check=0, pre-check=0', 'X-Za-Experiment': 'default:None,ge3:ge3_9,ge2:ge2_1,n
webQAGrowth:experiment,is_office:false,nweb_growth_people:default,app_store_rate_dialog:close,nweb_search_suggest:defaul
t,search_advert_position:1,live_store:ls_a2_b2_c1_f2,nweb_search:nweb_search_heifetz,search_hybrid_tabs:pin-3#album-7,ne
w_live_feed_mediacard:new,hybrid_zhmore_video:yes,ad_r:a,growth_search:s2,qaweb_related_readings_content_control:close,a
ndroid_pass_through_push:all,new_sign_bg:new,new_mobile_app_header:true,np:1,android_search_tab_style:search_tab_style_b
,android_db_recommend_action:open,android_db_feed_hash_tag_style:button,mobile_feed_guide:block,is_new_noti_panel:no', '
X-Frame-Options': 'DENY', 'X-Backend-Server': 'zhihu-web.zhihu-web-explore.296db8a5---10.70.0.47:31006[10.70.0.47:31006]
', 'Content-Encoding': 'gzip', 'Server': 'ZWS'}
# 返回的cookie
In [32]: response.cookies
Out[32]: <RequestsCookieJar[Cookie(version=0, name='cap_id', value='"ZmRhM2FhNmFjMGExNDE3NmI5MTM4ODA2MTQ2YjAwYWM=|154165
7987|296ac858ffb5a9a1eb4b8efb09b3b7fe66223216"', port=None, port_specified=False, domain='.zhihu.com', Cookie(version=0,
name='q_c1', value='c2ee66d752e344068c9ba17085a51646|1541657987000|1541657987000
', port=None, port_specified=False, domain='.zhihu.com', domain_specified=True, domain_initial_dot=False, path='/', path
cure=False, expires=None, discard=True, comment=None, comment_url=None, rest={}, rfc2109=False), Cookie(version=0, name=
'tgw_l7_route', value='7139e401481ef2f46ce98b22af4f4bed', port=None, port_specified=False, domain='www.zhihu.com', domai
n_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=1541658887, discard=Fa
lse, comment=None, comment_url=None, rest={}, rfc2109=False)]>
# 请求体的url
In [33]: response.request.headers
Out[33]: {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.13
2 Safari/537.36', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}
# 请求的url
In [34]: response.url
Out[34]: 'https://www.zhihu.com/explore'
In [35]: response.history
Out[35]: []
文件上传
# rb方式打开文件
In [49]: file = {
...: 'file': open(r'C:\Users\Contry\Desktop\logo.png','rb')
...: }
In [50]: response = requests.post('http://httpbin.org/post', files=file)
In [51]: print(response.text)
{
"args": {},
"data": "",
"files": {
"file": "data:application/octet-stream;base64,iVBORw0KGgoAAAANSUhEUgAAAHUAAAAmCAYAAAD6HtTlAAAAGXRFWHRTb2Z0d2FyZQBBZ
9iZSBJbWFnZVJlYWR5ccllPAAAA2hpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTe
5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuNS1jMDE0IDc5LjE1MTQ4M
wgMjAxMy8wMy8xMy0xMjowOToxNSAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50Y
gtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wTU09Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9tbS8iIHhtbG5zO
N0UmVmPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvc1R5cGUvUmVzb3VyY2VSZWYjIiB4bWxuczp4bXA9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwL
EuMC8iIHhtcE1NOk9yaWdpbmFsRG9jdW1lbnRJRD0ieG1wLmRpZDpDRkZGMDZBODNDMjA2ODExODA4M0YxRDFGQUY0MkREQiIgeG1wTU06RG9jdW1lbnRJR
0ieG1wLmRpZDpFNTdCMjU2Njc2MzcxMUU1ODQ0QUFDM0UyQzgyOTM5OCIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDpFNTdCMjU2NTc2MzcxMUU1ODQ0Q
FDM0UyQzgyOTM5OCIgeG1wOkNyZWF0b3JUb29sPSJBZG9iZSBQaG90b3Nob3AgQ1M2IChNYWNpbnRvc2gpIj4gPHhtcE1NOkRlcml2ZWRGcm9tIHN0UmVmO
luc3RhbmNlSUQ9InhtcC5paWQ6Q0ZGRjA2QTgzQzIwNjgxMTgwODNGMUQxRkFGNDJEREIiIHN0UmVmOmRvY3VtZW50SUQ9InhtcC5kaWQ6Q0ZGRjA2QTgzQ
IwNjgxMTgwODNGMUQxRkFGNDJEREIiLz4gPC9yZGY6RGVzY3JpcHRpb24+IDwvcmRmOlJERj4gPC94OnhtcG1ldGE+IDw/eHBhY2tldCBlbmQ9InIiPz4DC
SMAAAHjElEQVR42uxbC2xURRSdpZXS8mnlJ5SiUiCGaIHy/xQDiBAhwU/4qIlWFEU0CGgwYJCI0UTxA5IYjRiCP8QviAVUsFohERIwRgRUCpRWiFKxlI/lI
R73TNhOryZ997u63a32Zuc7O68mX373pm599z7ZkO1tbUiHq1b9j6/Q0KEmYTZhNaEdYQZhErLmBRCKuGMnxOVHu4q4tmaiMZjCwmLCVcSWhAmE4rx3mkCP
o4QqgmLCdkNpYb0VhIvYYwz6H9OsLjDu2vEl7Cik4jTCF8S8hIkho/NgVu1Mnux8qUVkiY7tCvN+HZJKnxYwWWYx0IMgimExZZ+k4zuOskqQ1g2R6PjyO0t
Rj0vskSY0P+9fl+Dm89vTwXWlJUuPDSj0ev8yl33nCjiSpsbUrCNcTOmrtmyxjflBy1SqX7/+O8HeS1NjZk4TfCSV4fUsRNcstRYYXNdJMxlWYZ5LqN3Z2H
FpJW3h3303YS3ajhHuIpzWxjHxq5TPW1GQMJFf3BhIDSVAmZAJLCd0MnTlytBipdjAOShXhzYQVmIFqsbHXiHcDlFUifx0KfrmEMYSsgj8I4qEVkaM9zJhI
A6zMVtsgjqHsEpmiJGH4ZAkoWJZZqgOkS4WRVQydpv9DbcjX/kl37tLKFCIZQ9wesOCrkT3HzCFCUSgdTOHvq0CuA8dxCaWYoXDyQKqanlaXHKa5u98t1JD
1POLS1JUwljMIqO0B4l7DeIc6y5bucg13wy/wmbu+ZJDUBJt42l+Oc3vyjtY2BSGqttA2EOPqaMMkhH23ncp6rGvAetBJ1H0KUQMlHTWoa3FN1Pf1wjl0L5
fio93FyDZ7p0F9Vlpu+scOhPID8hRD/xuwWkdqk6GZy++7EMW1pRhCBKdgNZZxLRDjWZE/p7RzeverUf0ebBryK3/Z1X0vws8k1wZIah7hJ7WBSJWPzO4kv
cwhlOdfkrhoSO+o62H870hwk9lpPHkGWfpz5WpvjTZgkwXmKh5hnAQU6HUkjCa8BnyvVjYSgiZA8rEeocwWCGUJ8AKj4QKCJ8JWmpksx8TRihFOf4RuL/NM
itq4CWBmH0BCYb25+EnSL8VMb2qO1Nwi7CHsL7uJ6Qoe/bynsWb+eQFp1C2zGsOC5adNW8yW+WHPsS74nXLHgh6aa3GMRgb+X8202kNkf8zML7DMSDVhALs
R4H5C0CQGRulsTNn6U7kOibt2WpfONeG9zbZkIIUMhyOahuqTGY1kTLpEhgeO9xT7VSOXtNB/4vBd8fwcp4w6jCKPbCExMAUInm0jlwFwlzE80DhI+UT53C
glnlfPOfJiSuNmjxFeiKZ4BUXMwul5wocQUx2hkIuxklXjid3BICZv0dqGWdKluYb2zdrC4TzZbXtlP9kn1RAzq32oQH17pVvQPw3RsYTwkWks1G/I8j0hk
AngAnFNeONIPOAMvtNdi+EnRd72HJsbqwqSukuImmW1rbd5zk5dRiCFbHE1tHi5tLhLeZoE0J6mBMR3IteWJU5tt/j4nqDsmHSlSrut6sDpmo8/N+eahAEJ
0ro5emLk9DYERqM+H6PvcxJhPph9xsxltZeDPZUggkJ9uIc3Gh4rjSvg+Vogp4p55I18ZCaHmxkM9r9pK+lGvul69xv0O/IRoX+03ud5DHH3cOaUG5j4tsh
hXqLQt8EFqCtR2gXKDJhLWuIyrdIiLAsWHXVpbDmLaeGF/OhRLyxHOOzJaaMRHndL8TPjC55hKeAKV1PY+xrPKHaV8XqYQmofyHwu9MhF+4uJmzS3eYDXSh
o4IJXD5OWW4zw5FwdBaj5k/kCfMWxnhOdLEZfuwpfEcYlxq7i4w36LR1Jt198a6ciMANypX+MQd0TJU72kebxSc5lgp4uaZPDfMn/iPK4HPvdASvGU0qcAM
6AMNdfI7F+4tINZxWKsIjkLxMtXY4XQhiej/HK5Nru9AjHrnMidZtDnJS2A+lImdI2WSGVc9YvRf38JyXXotRrAlT/OuldhHsJMZ6stInHio1qh0TdzdO5W
Keofn6oOyUYfUKTCSvbv0rLXa6mdte4ZBPeDFW8vq2162oIHHacq0IP9tV78l+KPlZqR5vnrSrRXhnX6ohLg3Q+vcl/BJQHHIqMXGe+o0I1z37I743xWc9b
gb4/O8fzTAauN7xmXOWxHKuiErWY/8frQSimpQhFkkPZbTSj2Dm+4ErrbMN8Q1nSyeHGeh2tICutAyrW0wUhImtzNSkE0OBZEC3Iw+EUykKtFwthq583ykk
mI85LQIqzahWoICmLng/qccw1utEwX9gV4gbWQ7frjvmzM0kUu4ycCfmxVQMUEv5YHjyNz1P6GMJAPhc7p5UGgPFpSi0Xd/3Ry2W844aZ6mrmvifAD84ExW
WViG31ZTZxV6SFDHUCFSEM5iGVe9Cv+tPtAkpxLDbugd9XxdBZVGJm15NiZBfEuxM2eOxfDQHFqp0f7R33OO4o4TbCX/VE6Bnh7w/OrBFWQKOMh0CcbhKHv
0lrvW55RHcUFSXsjUSyxB/dxtETjd4kxGY7TmI+6wB9mDCLHUiNEbF/KjtPwEGAK+qtuUz8vCNAAAAAElFTkSuQmCC"
},
"form": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "close",
"Content-Length": "3054",
"Content-Type": "multipart/form-data; boundary=a167b9b5c69d63ce01ff3b71cd93ea06",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.19.1"
},
"json": null,
"origin": "183.216.200.80",
"url": "http://httpbin.org/post"
}
cookie
In [59]: for k,v in response.cookies.items():
...: print('{}={}\n'.format(k,v))
...:
_xsrf=hjoVIJFH7KrrsbiEIuKEhPwyHR16sajD
tgw_l7_route=8605c5a961285724a313ad9c1bbbc186
Session
# 创建一个Sesson对象,相当于一个浏览器
In [68]: s = requests.Session()
In [69]: s.get('http://httpbin.org/cookies/set/my/world1998')
Out[69]: <Response [200]>
In [72]: s.get('http://httpbin.org/cookies').text
Out[72]: '{\n "cookies": {\n "my": "world1998"\n }\n}\n'
证书验证
# 本来12306会报错的,现在不报错了
In [76]: response = requests.get('https://www.12306.cn')
# 关闭证书验证会有警告
In [77]: response = requests.get('https://www.12306.cn', verify=False)
C:\My Program Files\Anaconda3\lib\site-packages\urllib3\connectionpool.py:857: InsecureRequestWarning: Unverified HT
request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/l
t/advanced-usage.html#ssl-warnings
InsecureRequestWarning)
C:\My Program Files\Anaconda3\lib\site-packages\urllib3\connectionpool.py:857: InsecureRequestWarning: Unverified HT
request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/l
t/advanced-usage.html#ssl-warnings
InsecureRequestWarning)
In [78]: response.status_code
Out[78]: 200
# 使用urllib3的disable_warnings()就不会显示警告信息
In [80]: import urllib3
In [81]: urllib3.disable_warnings()
In [82]: response = requests.get('https://www.12306.cn', verify=False)
# 可以使用本地证书
In [83]: response = requests.get('https://www.12306.cn', cert=('/path/...', '/path/...))
代理设置
In [1]: import requests
# 有密码的话 'http' : 'http://username:password@ip:port'
In [2]: proxies = {
...: 'http': 'http://**.**.**.90:8888',
...: 'https': 'https://**.**.**.90:8888'
...: }
In [7]: response = requests.get('http://httpbin.org/get', proxies=proxies)
In [8]: response.text
Out[8]: '{\n "args": {}, \n "headers": {\n "Accept": "*/*", \n "Accept-Encoding": "gzip, deflate", \n "Connec
tion": "close", \n "Host": "httpbin.org", \n "User-Agent": "python-requests/2.19.1"\n }, \n "origin": "**.**.**.90", \n "url": "http://httpbin.org/get"\n}\n'
# 使用socket代理, 需要安装
pip install requests[socks]
proxies = {
'http': 'socks5://**.**.**.90:8888',
'https': 'socks5://**.**.**.90:8888'
}
超时设置
requests.get(url, timeout=1)
没有返回会抛出异常
认证设置
有些网站需要登入才能查看
request.get(url, auto=('user', 'passwd'))
异常处理
from request.exceptions import ReadTimeout, HTTPError, RequestException
参考文献:http://www.python-requests.org/en/master/