思路解读:
- 通过
Navicat
中sql
查询语句(以小说章节表为例)查询出出
现重复小说章节的内容(以url的唯一性为例)- 将数据导入
Excel
表格中- 读取
Excel
,通过select
查询url出现数据重复的id- 将查询的id导入删除的sql语句中,删除成功
sql查询语句,查询chapter_url在表中出现的次数
SELECT chapter_url,count(chapter_url) as number from chapter GROUP BY chapter_url HAVING number >1 ORDER BY number desc;
"""sql删除方法"""
DELETE
from
chapter
where id in (
select
id
from
(SELECT
chapter_url,count(chapter_url) as number,id
FROM `data_engine`
GROUP BY chapter_url HAVING number >1 ORDER BY number desc)
as t)
脚本方法
# -*- coding: utf-8 -*-
import pymysql
import xlrd
#打开excel
def open_excel(file):
try:
data = xlrd.open_workbook(file)
return data
except Exception as e:
print(str(e))
#调用打开excel的方法
def excel_table_byindex():
# 此处为Excel的路劲
data = open_excel('F:\\workspace\\delete_sql.xls')
table = data.sheet_by_name(u'Sheet1') #通过名称获取
db = connectMysql()
for i in range(415, 65536):
url = table.cell(i, 0).value #读取Excel的url
number = table.cell(i, 1).value
with db.cursor() as cursor:
sql = "SELECT id FROM `chapter` where chapter_url='%s';" % (url)
# yield sql
print(sql)
db.ping(reconnect=True)
cursor.execute(sql)
data = cursor.fetchall()
db.commit()
# return data
db.close()
get_id(data)
def get_id(ids):
new_ids = list(ids)
if len(new_ids)> 0:
# 移除id的第0项
new_ids.remove(new_ids[0])
# 将id传进行循环删除
for i in new_ids:
db = connectMysql()
with db.cursor() as cursor:
sql = "DELETE FROM chapter WHERE id ='%s';" % (i[0])
print(sql)
db.ping(reconnect=True)
cursor.execute(sql)
db.commit()
db.close()
# 此处填写你的mysql数据库账号及密码
def connectMysql():
db = pymysql.connect(
host='localhost',
database='novellist',
user='root',
password='admin123',
port=3306,
charset='utf8'
)
return db
def main(): # 主方法调用
con = excel_table_byindex()
if __name__=="__main__":
table = main()