1.在settings.py文件下定义数据库信息字段
mysql_host = '127.0.0.1'
mysql_user = 'root'
mysql_db = 'pad_woll'
mysql_db_charset ='utf8'
mysql_password = '123456'
2.在pipelines.py文件下定义MySQL连接通道
from pad_wool_crawl import settings // pad_wool_crawl:项目名称
class MySQLPipeline(object):
def __init__(self):
self.mysql_host = settings.mysql_host
self.mysql_user = settings.mysql_user
self.mysql_password = settings.mysql_password
self.mysql_db = settings.mysql_db
self.mysql_db_charset = settings.mysql_db_charset
self.connect()
def connect(self):
self.conn = pymysql.connect(host=self.mysql_host, user=self.mysql_user, password=self.mysql_password, db=self.mysql_db, charset=self.mysql_db_charset)
self.cursor = self.conn.cursor()
def process_item(self, item, spider): // 插入数据
sql = 'INSERT INTO line_report_table (title, time, content, images, type) VALUES ("%s", "%s", "%s", "%s", "%s")' % (item['title'], item['time'], item['content'], item['images'], item['type'])
self.cursor.execute(sql)
self.conn.commit()
return item
def close_spider(self, spider):
self.conn.close()
self.cursor.close()
3.启用MySQL连接通道(接收到数据,会自动进入定义的通道)
// 在settings.py文件中打开注释并添加定义的MySQL连接通道
ITEM_PIPELINES = {
'pad_wool_crawl.pipelines.MySQLPipeline': 301,
}