国际标准书号:9787115428028
- 引入相关库
import requests
from lxml import html
import pandas as pd
- 函数体
def spider(isbn):
# 用国际标准书号的方式获取查询网站的链接
url = "http://search.dangdang.com/?key={}&act=input".format(isbn)
print(url)
# 获取网页源代码
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'}
html_data = requests.get(url, headers=headers).text
# 使用xpath语法提取我们需要的内容
selector = html.fromstring(html_data)
ul_list = selector.xpath('//div[@id="search_nature_rg"]/ul/li')
print('有{}家商家出售此书'.format(len(ul_list)))
book_info_list = []
for li in ul_list:
title = li.xpath('a/@title')
title = '没有这数据!' if len(title) == 0 else title[0]
link = li.xpath('a/@href')
link = '没有这数据!' if len(link) == 0 else link[0]
price = li.xpath('p[@class="price"]/span[@class="search_now_price"]/text()')
price = '没有这数据!' if len(price) == 0 else price[0]
price = price.replace('¥', '')
store = li.xpath('p[@class="search_shangjia"]/a/text()')
store = '当当自营' if len(store) == 0 else store[0]
# 往book_info_list列表中添加元素
book_info_list.append(
{"title": title,
"price": price,
"link": link,
"store": store}
)
print(book_info_list)
for book in book_info_list:
print('排序前:', book)
print("#######################################")
print("#######################################")
print("#######################################")
book_info_list.sort(key=lambda x: float(x['price']))
print(book_info_list)
for book in book_info_list:
print('排序后:', book)
df = pd.DataFrame(book_info_list) # 转化成dataframe格式
df.to_csv('当当图书信息.csv') # 存储成csv,csv:逗号分隔符文件
- 引用函数
isbn = input("请输入你要查询的书号:")
spider(isbn)