- requests中使用代理:可以用爬虫爬取xicidaili.com上面的免费代理
- 保存文件
- 使用format
import requestsfrom bs4
import BeautifulSoup
proxies = {'http' : '36.7.172.18:82'}
headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
def save_img(url):
r = requests.get(url, proxies = proxies, headers = headers, stream = True)
if 200 != r.status_code:
print('unable to open {}'.format(url))
return
filename = './images/{}.jpg'.format(url.split('/')[-2])
with open(filename, 'wb') as f:
f.write(r.content)
def get_pic_url(url):
r = requests.get(url, proxies = proxies, headers = headers)
if 200 != r.status_code:
return []
soup = BeautifulSoup(r.text, 'lxml')
imgs = soup.select('img.entry-thumbnail')
urls = []
for i in imgs:
urls.append(i['src'])
return urls
if __name__ == '__main__':
for page in range(1, 2):
url = 'http://weheartit.com/inspirations/taylorswift?page={}'.format(page)
imgs = get_pic_url(url):
print('{} images are found in Page {}.'.format(len(imgs), page))
for p in imgs:
save_img(p)
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。