【python】从豆瓣获取评分

目的

快速的从豆瓣获取电影的评分情况

方案

直接访问https://www.douban.com/search?q={movie_name},获取网页相关内容,终端直接输出。
借此步骤,以后配合alfred做快速信息浏览,美滋滋。

依赖库安装

pip3 install bs4 requests

执行效果

~ python3 douban.py 四体
[电影]    4.9 144人评价  2004    四体
[电影]    6.3 21288人评价    2005    美国派(番外篇)4:集体露营

支持开源

#! /usr/bin/env python3
# -*- coding:utf-8 -*-
import requests
import bs4
import sys


"""
python3 {douban.py} {movie_name}
"""


def get_web(url):
    header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59"
    }
    res = requests.get(url, headers=header, timeout=5)
    return res.text


def parse_city_date(soup):
    location = soup.find("div", class_="crumbs fl")

    date = soup.find("h1", class_="clearfix city")
    return (
        location.text.strip().replace("\n", "").replace(" ", ""),
        date.i.text.strip()[:16],
    )


def temp_string(high, low):
    return f"{high} / {low}" if high and low else f"{high}{low}"


def format_infos(*infos):
    datas = list(*infos)
    return f"%-6s\t%s\t%s" % (datas[0], datas[1], "\t".join(datas[2:]))


def parse_content(e):
    def text_or_empty(o):
        return o.text if o else ""

    def sub_cast_year(o):
        return o.split("/")[-1].strip() if o else ""

    def format_rating_person(s):
        return s[1:-1]

    type = text_or_empty(e.h3.span)
    name = text_or_empty(e.h3.a)
    rating_info = e.div
    rating_nums = text_or_empty(rating_info.find("span", class_="rating_nums"))
    sub_cast = text_or_empty(rating_info.find("span", class_="subject-cast"))
    year = sub_cast_year(sub_cast)
    rating_person_nums = format_rating_person(
        text_or_empty(rating_info.find("span", class_=None))
    )
    return type, rating_nums, rating_person_nums, year, name


def parse_contents(soup):
    def filter(infos):
        for i in range(len(infos) - 1, -1, -1):
            if (
                infos[i].find("[小组]") == 0
                or infos[i].find(" ") == 0
                or infos[i].find("[日记]") == 0
            ):
                infos.pop(i)
        return infos

    contents = soup.find_all("div", class_="title")
    return filter([format_infos(parse_content(content)) for content in contents])


def print_weather(day, weather, tem, wind):
    for i in range(0, 7):
        print(f"{day[i]:<10}{tem[i]:^15}{wind[i]:<10}\t{weather[i]}")


def create_soup(movie_name):
    return bs4.BeautifulSoup(
        get_web(f"https://www.douban.com/search?q={movie_name}"), "lxml"
    )


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("please input movie name")
        exit(0)

    movie_name = sys.argv[1]
    soup = create_soup(movie_name)
    contents = parse_contents(soup)
    print(*contents, sep="\n")

感觉好玩的就来个赞呀!

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容