案例24:爬豆瓣top250电影信息,并储存csv,excel

import requests,bs4,csv
#引入requests,bs4,csv库

csv_file = open('douban.csv','w',newline='')
#创建csv文件,并命名
write = csv.writer(csv_file)
#csv.writer函数创建writer对象
write.writerow(['序号','电影名称','豆瓣评分','推荐语','网址'])
#先创建表头

headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
for x in range(10):
    url = 'https://movie.douban.com/top250?start=' + str(x*25) + '&filter='
    res = requests.get(url, headers=headers)
    bs = bs4.BeautifulSoup(res.text, 'html.parser')
    bs = bs.find('ol', class_="grid_view")
    for titles in bs.find_all('li'):
        num = titles.find('em',class_="").text
        title = titles.find('span', class_="title").text
        comment = titles.find('span',class_="rating_num").text
        url_movie = titles.find('a')['href']

        if titles.find('span',class_="inq") != None:
            tes = titles.find('span',class_="inq").text
            
        else:
            tes = '无'

        write.writerow([num,title,comment,tes,url_movie])
        #为write对象写入信息
csv_file.close()
#保存关闭文件
import requests,bs4,openpyxl
#引入requests,bs4,csv库

wb = openpyxl.Workbook()
sheet = wb.active
sheet.title = '豆瓣123'
sheet['A1'] = '序号'
sheet['B1'] = '电影名称'
sheet['C1'] = '评份'
sheet['D1'] = '推荐语'
sheet['A1'] = '网址'

headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
for x in range(10):
    url = 'https://movie.douban.com/top250?start=' + str(x*25) + '&filter='
    res = requests.get(url, headers=headers)
    bs = bs4.BeautifulSoup(res.text, 'html.parser')
    bs = bs.find('ol', class_="grid_view")
    for titles in bs.find_all('li'):
        num = titles.find('em',class_="").text
        title = titles.find('span', class_="title").text
        comment = titles.find('span',class_="rating_num").text
        url_movie = titles.find('a')['href']

        if titles.find('span',class_="inq") != None:
            tes = titles.find('span',class_="inq").text
            
        else:
            tes = '无'

        sheet.append([num,title,comment,tes,url_movie])
wb.save('douban123.xlsx')
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

友情链接更多精彩内容