目录:
1.正常模式使用pymysql存入mysql
2.正常模式使用MySQLdb存入mysql
3.在scrapy中pipelines.py中存入mysql
4.在scrapy中pipelines.py中使用异步存入mysql
正文:
1.正常模式使用pymysql存入mysql
import pymysql
conn = pymysql.connect(host='XXX',user='XXX',password='XXX',db='XXX',charset="utf8")
cursor = conn.cursor()
insert_sql = """
insert into XXX(id,big_name,small_name,GB_num,GB_name,GB_bumen,GB_time,GB_zhuangt) VALUES (null,%s,%s,%s,%s,%s,%s,%s)
"""
cursor.execute(insert_sql,(item['big_name'],item['small_name'],item['GB_num'],item['GB_name'],item['GB_bumen'],item['GB_time'],item['GB_zhuangt']))
conn.commit()
cursor.close()
conn.close()
2.正常模式使用MySQLdb存入mysql
import MySQLdb
conn = MySQLdb.connect(host='XXX',user='XXX',password='XXX',db='XXX',charset="utf8")
cursor = conn.cursor()
insert_sql = """
insert intoXXX(id,big_name,small_name,GB_num,GB_name,GB_bumen,GB_time,GB_zhuangt) VALUES (null,%s,%s,%s,%s,%s,%s,%s)
"""
cursor.execute(insert_sql,(item['big_name'],item['small_name'],item['GB_num'],item['GB_name'],item['GB_bumen'],item['GB_time'],item['GB_zhuangt']))
conn.commit()
cursor.close()
conn.close()
3.在scrapy中pipelines.py中存入mysql
import MySQLdb
class XXXMySQLPipeline(object):
def __init__(self):
self.conn= MySQLdb.connect(host='XXX',user='XXX',password='XXX',db='XXX',charset="utf8",use_unicode=True)
self.cursor = self.conn.cursor()
def process_item(self,item,spider):
insert_sql = """
insert into XXX(id,big_name,small_name,GB_num,GB_name,GB_bumen,GB_time,GB_zhuangt) VALUES (null,%s,%s,%s,%s,%s,%s,%s)
"""
self.cursor.execute(insert_sql,(item['big_name'],item['small_name'],item['GB_num'],item['GB_name'],item['GB_bumen'],item['GB_time'],item['GB_zhuangt']))
self.conn.commit()
然后在setting中开启 XXXMySQLPipeline
ITEM_PIPELINES= {
'XXX.pipelines.XXXMySQLPipeline': 300,
}
4.在scrapy中pipelines.py中使用异步存入mysql
from twisted.enterpriseimport adbapi
from pymysqlimport cursors
class XXXTwistedPipeling(object):
def __init__(self):
dbparams= {
'host': 'XXX',
'port': XXX,
'user': 'XXX',
'password': 'XXX',
'database': 'XXX',
'charset': 'utf8',
'cursorclass':cursors.DictCursor
}
self.dbpool= adbapi.ConnectionPool("pymysql",**dbparams)
self._sql= None
@property
def sql(self):
if not self._sql:
self._sql= """
insert into XXX(id,title,content,article_id,origen_url,author,avatar ,pub_time,word_count,read_count,like_count,comment_count,subjects) values (null,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
return self._sql
return self._sql
def process_item(self,item,spider):
defer= self.dbpool.runInteraction(self.insert_item,item)
#错误处理
defer.addErrback(self.handle_error,item,spider)
def insert_item(self,cursor,item):
cursor.execute(self.sql,(item['title'],item['content'],item['article_id'],item['origen_url'],item['author'],item['avatar'],item['pub_time'],item['word_count'],item['read_count'],item['like_count'],item['comment_count'],item['subjects']))
def handle_error(self,error,item,spider):
print('+'*10)
print(error)
print('+' * 10)