1.京东商品页抓取
url = "https://item.jd.com/2330392.html"
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.text[:1000])
except:
print("爬虫失败")
2.亚马逊商品页抓取(修改User-Agent,模拟浏览器)
url = "https://www.amazon.cn/gp/product/B0094DVNT6"
try:
kv = {'User-Agent' : 'Mozilla/5.0'}
r = requests.get(url, headers=kv)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.text[:1000])
except:
print("爬虫失败")
3.百度搜索
keyword = "Python"
try:
kv = {'wd' : keyword}
r = requests.get("http://www.baidu.com/s", params = kv)
print(r.requests.url)
r.raise_for_status()
print(len(r.text))
except:
print("爬虫失败")