进阶,获取豆瓣top250的电影.评分,简评
运行结果
/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5 /Users/wjw/PycharmProjects/class9/豆瓣.py
[
<Movie:
score = (9.6)
name = (肖申克的救赎)
quote = (希望让人自由。)
>,
<Movie:
score = (9.4)
name = (这个杀手不太冷)
quote = (怪蜀黍和小萝莉不得不说的故事。)
>,
<Movie:
score = (9.4)
name = (阿甘正传)
quote = (一部美国近现代史。)
>,
<Movie:
score = (9.4)
name = (霸王别姬)
quote = (风华绝代。)
>,
<Movie:
score = (9.5)
name = (美丽人生)
quote = (最美的谎言。)
>,
<Movie:
score = (9.2)
name = (千与千寻)
quote = (最好的宫崎骏,最好的久石让。 )
>,
<Movie:
score = (9.4)
name = (辛德勒的名单)
quote = (拯救一个人,就是拯救整个世界。)
>,
<Movie:
score = (9.2)
name = (海上钢琴师)
quote = (每个人都要走一条自己坚定了的路,就算是粉身碎骨。 )
>,
<Movie:
score = (9.3)
name = (机器人总动员)
quote = (小瓦力,大人生。)
>,
<Movie:
score = (9.2)
name = (盗梦空间)
quote = (诺兰给了我们一场无法盗取的梦。)
>,
<Movie:
score = (9.1)
name = (泰坦尼克号)
quote = (失去的才是永恒的。 )
>,
<Movie:
score = (9.1)
name = (三傻大闹宝莱坞)
quote = (英俊版憨豆,高情商版谢耳朵。)
>,
<Movie:
score = (9.2)
name = (放牛班的春天)
quote = (天籁一般的童声,是最接近上帝的存在。 )
>,
<Movie:
score = (9.2)
name = (忠犬八公的故事)
quote = (永远都不能忘记你所爱的人。)
>,
<Movie:
score = (9.1)
name = (大话西游之大圣娶亲)
quote = (一生所爱。)
>,
<Movie:
score = (9.1)
name = (龙猫)
quote = (人人心中都有个龙猫,童年就永远不会消失。)
>,
<Movie:
score = (9.2)
name = (教父)
quote = (千万不要记恨你的对手,这样会让你失去理智。)
>,
<Movie:
score = (9.2)
name = (乱世佳人)
quote = (Tomorrow is another day.)
>,
<Movie:
score = (9.1)
name = (天堂电影院)
quote = (那些吻戏,那些青春,都在影院的黑暗里被泪水冲刷得无比清晰。)
>,
<Movie:
score = (8.9)
name = (当幸福来敲门)
quote = (平民励志片。 )
>,
<Movie:
score = (9.0)
name = (搏击俱乐部)
quote = (邪恶与平庸蛰伏于同一个母体,在特定的时间互相对峙。)
>,
<Movie:
score = (9.0)
name = (楚门的世界)
quote = (如果再也不能见到你,祝你早安,午安,晚安。)
>,
<Movie:
score = (9.1)
name = (触不可及)
quote = (满满温情的高雅喜剧。)
>,
<Movie:
score = (9.1)
name = (指环王3:王者无敌)
quote = (史诗的终章。)
>,
<Movie:
score = (8.9)
name = (罗马假日)
quote = (爱情哪怕只有一天。)
>]
Process finished with exit code 0
源代码
import requests
from lxml import html
class Model(object):
def __repr__(self):
class_name = self.__class__.__name__
properties = ('{0} = ({1})'.format(k, v) for k, v in self.__dict__.items())
return '\n<{0}:\n {1}\n>'.format(class_name, '\n '.join(properties))
class Movie(Model):
def __init__(self):
super(Movie, self).__init__()
self.name = ''
self.score = 0
self.quote = ''
self.cover_url = ''
def movie_from_div(div):
movie = Movie()
movie.name = div.xpath('.//span[@class="title"]')[0].text
movie.score = div.xpath('.//span[@class="rating_num"]')[0].text
movie.quote = div.xpath('.//span[@class="inq"]')[0].text
img_url = div.xpath('.//div[@class="pic"]/a/img/@src')[0]
print(img_url)
movie.cover_url = img_url
return movie
def movies_from_url(url):
page = requests.get(url)
root = html.fromstring(page.content)
# <div class="item">
movie_divs = root.xpath('//div[@class="item"]')
# movies = [movie_from_div(div) for div in movie_divs]
# 上面一行相当于下面四行
movies = []
for div in movie_divs:
movie = movie_from_div(div)
movies.append(movie)
return movies
def download_img(url, name):
r = requests.get(url)
with open(name, 'wb') as f:
f.write(r.content)
def save_covers(movies):
for m in movies:
download_img(m.cover_url, m.name + '.jpg')
def main():
url = 'https://movie.douban.com/top250'
movies = movies_from_url(url)
print(movies)
save_covers(movies)
if __name__ == '__main__':
main()