python3爬虫实例(一)-bs4抓取猫眼电影保存到mysql

运行版本:
Python 3.7.0
完整代码如下:

# -*- coding: utf-8 -*-
"""
@author:lee
@create_time:2018/10/18 11:42
"""
from bs4 import BeautifulSoup
import requests
import bs4
import pymysql.cursors

def gethtml(url,headers):
    response =  requests.get(url,headers=headers)
    try:
        if response.status_code == 200:
            print('抓取成功网页长度:',len(response.text))
            response.encoding = 'utf-8'
            return response.text
    except BaseException as e:
        print('抓取出现错误:',e)


def getsoup(html,list):
    soup = BeautifulSoup(html,'lxml')
    for dd in  soup.find_all('dd'):
        if isinstance(dd,bs4.element.Tag):
            top = dd.i.string #获取排名
            name = dd.find('p',class_='name').string
            replease_times = dd.find('p',class_="releasetime").string
            s = dd.find('p',class_="score").contents
            score = s[0].string+s[1].string
            list.append([top,name,replease_times,score])

def write_sql(data):
    conn = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='spiders')
    cur = conn.cursor()
    for i in data:
        movie = i
        sql = 'INSERT INTO maoyan(top,name,replease_times,score) VALUES (%s,%s,%s,%s)'
        try:
           cur.execute(sql,movie)
           conn.commit()
           print('写入成功')
        except BaseException as e:
            print('导入失败',e)
            conn.rollback()
    conn.close()


def main():
    start_url = 'http://maoyan.com/board/4'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    depth = 10
    for i in range(depth):
        url = start_url+'?offset='+str(10 * i)
        html = gethtml(url, headers)
        list = []
        getsoup(html, list)
        write_sql(list)
        print(list)

if __name__ == '__main__':
    main()

运行结果:

图片.png

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。