登录注册写文章

猫眼（第九次作业）

猫眼（第九次作业）

爬取猫眼电影榜单

import requests

from lxml import etree

import csv

headers = {

'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3724.8 Safari/537.36'

}

def get_url(url):

res = requests.get(url,headers=headers)

html = etree.HTML(res.text)

infos = html.xpath('//dl[@class="board-wrapper"]/dd')

for infoin infos:

title = info.xpath('div/div/div[1]/p[1]/a/text()')[0]

author = info.xpath('div/div/div[1]/p[2]/text()')[0].strip().strip('主演：')

pub_time = info.xpath('div/div/div[1]/p[3]/text()')[0].strip('上映时间：')

star_1 = info.xpath('div/div/div[2]/p/i[1]/text()')[0]

star_2 = info.xpath('div/div/div[2]/p/i[2]/text()')[0]

star = star_1 + star_2

movie_url ='https://maoyan.com' + info.xpath('div/div/div[1]/p[1]/a/@href')[0]

get_info(movie_url,title,author,pub_time,star)

def get_info(url,title,author,pub_time,star):

res = requests.get(url, headers=headers)

html = etree.HTML(res.text)

style = html.xpath('/html/body/div[3]/div/div[2]/div[1]/ul/li[1]/text()')[0]

long_time = html.xpath('/html/body/div[3]/div/div[2]/div[1]/ul/li[2]/text()')[0].split('/')[1].strip().strip('分钟')

writer.writerow([title,author,pub_time,star,style,long_time])

if __name__ =='__main__':

fp =open('maoyan.csv', 'w', newline='', encoding='utf-8')

writer = csv.writer(fp)

writer.writerow(['title', 'author', 'pub_time', 'star', 'style', 'long_time'])

urls = ['https://maoyan.com/board/4?offset={}'.format(str(i))for iin range(0, 100, 10)]

for urlin urls:

get_url(url)

分析

最后编辑于：2019.07.18 00:07:40

©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成，浏览时请结合常识与多方信息审慎甄别。
平台声明：文章内容（如有图片或视频亦包括在内）由作者上传并发布，文章内容仅代表作者本人观点，简书系信息发布平台，仅提供信息存储服务。

友情链接更多精彩内容

赞1赞

赞赏

手机看全文