将豆瓣250评分电影导入excel文件中
需要openpyxl模块
pipenv installl openpyxl
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
具体用法
from openpyxl import Workbook
import datetime
wb = Workbook()
ws = wb.active
ws['A1'] = 520
ws.append([1,2,3,4])
ws.append([1,2,3,4])
ws['A4'] = datetime.datetime.now()
wb.save('demo.xlsx')
效果图·
将爬取的豆瓣250电影资料导入excel操作如下
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook
def url_open(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
res = requests.get(url,headers = headers)
return res
def find_depth(res):
soup = BeautifulSoup(res.text,'html.parser')
depth= soup.find('span',class_ = 'next').previous_sibling.previous_sibling.text
return int(depth)
def find_movies(res):
soup = BeautifulSoup(res.text,'html.parser')
# 电影名
movies = []
targets = soup.find_all('div',class_ = 'hd')
for i in targets:
movies.append(i.a.span.text)
# 评分
ranks = []
targets = soup.find_all('span', class_='rating_num')
for i in targets:
ranks.append(i.text)
# 资料
messages = []
targets = soup.find_all('div', class_='bd')
for i in targets:
try:
messages.append(i.p.text.split('\n')[1].strip()+i.p.text.split('\n')[2].strip())
except:
continue
results = []
length = len(movies)
for i in range(length):
results.append([movies[i],ranks[i],messages[i]])
return results
def save_to_excel(result):
wb = Workbook()
ws = wb.active
ws['A1'] = '电影'
ws['B1'] = '评分'
ws['C1'] = '资料'
for i in result:
ws.append(i)
wb.save('250.xlsx')
def main():
host = 'https://movie.douban.com/top250'
res = url_open(host)
depth = find_depth(res)
result = []
for i in range(depth):
url = host + '?start='+str(25 * i)+'&filter='
res = url_open(url)
result.extend(find_movies(res))
save_to_excel(result)
if __name__ == '__main__':
main()