因为是本地网页,网页文件就不上传了
直接上爬取代码
p.p1 {margin: 0.0px 0.0px 0.0px 0.0px; font: 11.0px Menlo}p.p2 {margin: 0.0px 0.0px 0.0px 0.0px; font: 11.0px Menlo; min-height: 13.0px}p.p3 {margin: 0.0px 0.0px 0.0px 0.0px; font: 11.0px Menlo; color: #272ad8}p.p4 {margin: 0.0px 0.0px 0.0px 0.0px; font: 11.0px Menlo; color: #008400}span.s1 {font-variant-ligatures: no-common-ligatures; color: #bb2ca2}span.s2 {font-variant-ligatures: no-common-ligatures}span.s3 {font-variant-ligatures: no-common-ligatures; color: #000000}span.s4 {font-variant-ligatures: no-common-ligatures; color: #272ad8}
from bs4 import BeautifulSoup
import requests
with open('/Users/wangpegnfei/Desktop/Plan-for-combating-master/week1/1_2/1_2answer_of_homework/1_2_homework_required/index.html') as web_data:
bsObj = BeautifulSoup(web_data, 'lxml')
images = bsObj.select('div.thumbnail > img')
titles = bsObj.select('div.caption > h4 > a')
prices = bsObj.select('div.caption > h4.pull-right')
stars = bsObj.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')
reviews = bsObj.select('div.ratings > p.pull-right')
#print(stars)
#for star in stars:
#print(star)
for image, title, price, star, review in zip(images, titles, prices, stars, reviews):
data = {
'title':title.get_text(),
'image':image.get('src'),
'price':price.get_text(),
'star':len(star.find_all('span',{'class':'glyphicon glyphicon-star'})),
'review':review.get_text()
}
print(data)