```
import requests
from pyqueryimport PyQueryas pq
import os
class TiebaPic:
def __init__(self, name):
self.url ='http://tieba.baidu.com/f?ie=utf-8&kw={%s}' % name
self.headers = {
'User-Agent':'Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0) '
}
# 发送请求,获取响应,返回Response对象经content方法后的内容(in bytes)
def get_data(self, url):
response = requests.get(url, headers=self.headers)
return response.content
# 解析首页数据,获取列表页面帖子的标题和链接,返回标题:url列表和下一页url
def get_page_info(self, _url):
doc = pq(url=_url, encoding='utf-8')
node_list = doc("#thread_list li.j_thread_list.clearfix>div>div:nth-child(2)>div>div:first-child>a")
data_list = []
for nodein node_list.items():
temp = {}
temp['url'] ='http://tieba.baidu.com' + node.attr.href
temp['title'] = node.text()
data_list.append(temp)
# 提取下一页的节点
next_node = doc('#frs_list_pager').children('a.next').attr.href
next_url ='http:' + next_node
return data_list, next_url
# 获取当前页面详细图片信息,返回图片列表
def get_detail(self, _url):
doc = pq(url=_url, encoding='utf-8')
image_list = doc("cc img.BDE_Image")
return image_list
# 下载图片,保存图片文件
def download(self, image_list):
if not os.path.exists('images'):
os.makedirs('images')
for imagein image_list.items():
img_src = image.attr.src
print(img_src)
file_name ='images' + os.sep + img_src.split('/')[-1]
image_data =self.get_data(img_src)
with open(file_name, 'wb')as f:
f.write(image_data)
def main(self):
next_url =self.url
while next_url:
data_list, next_url =self.get_page_info(next_url)
for datain data_list:
url = data['url']
image_list =self.get_detail(url)
self.download(image_list)
if __name__ =='__main__':
tiebapic = TiebaPic('校花吧')
tiebapic.main()
```