有个同事有一张表,该表无主键,但是由于死锁原因,产生部分重复数据,该部分数据重复会影响报表生成,故而需要对重复数据进行筛选并且剔除,建表脚本:
DROP TABLE IF EXISTS `T_ECFN_LIMIT_INIT`;
CREATE TABLE `T_ECFN_LIMIT_INIT` (
`PLAT_DATE` char(8) NOT NULL,
`MONTH` int(11) NOT NULL,
`BILL_NO` varchar(64) NOT NULL,
`EN_CODE` varchar(32) NOT NULL,
`EN_NAME` varchar(255) DEFAULT NULL,
`AREA_NO` varchar(20) NOT NULL,
`PLAN_ID` varchar(64) DEFAULT '',
`AMT` decimal(16,2) NOT NULL DEFAULT '0.00',
`AWT_DATE` varchar(20) DEFAULT NULL,
`ITEM_NO` varchar(32) NOT NULL,
`PRJ_CODE` varchar(32) DEFAULT NULL,
`DEPT_NO` varchar(32) DEFAULT NULL,
`BUD_TYPE` varchar(32) DEFAULT NULL,
`GUIDE_LINE` varchar(32) DEFAULT NULL,
`FUND_TYPE` varchar(32) DEFAULT NULL,
`REMAK` varchar(255) DEFAULT NULL,
`NOTE` varchar(255) DEFAULT NULL,
`BRCH_NO` varchar(20) DEFAULT NULL,
`TELLER_NO` varchar(20) DEFAULT NULL,
`REVS` varchar(255) DEFAULT NULL,
`DAC` varchar(32) DEFAULT NULL,
UNIQUE KEY `index_unique_limit_init_1` (`PLAT_DATE`,`BILL_NO`,`AREA_NO`,`PLAN_ID`),
KEY `index_ecfn_limit_init_1` (`EN_CODE`,`AREA_NO`,`ITEM_NO`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
现在插入几条测试数据,最后两条为重复数据:
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000012410500005105201900258', '182001', '云南省人民检察院', '530000', '989844', '1400000.00', '20190709', '2040499', '201600024491', null, '003', null, null, '测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000018710500005105201900258', '182001', '云南省人民检察院', '530000', '989842', '50000.00', '20190709', '2040499', '201700036719', null, '003', null, null, '测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000025610500005105201900258', '182001', '云南省人民检察院', '530000', '989841', '100000.00', '20190709', '2049901', '00226782', null, '003', null, null, '测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000028910500005105201900258', '182001', '云南省人民检察院', '530000', '989843', '392800.00', '20190709', '2040499', '201600024491', null, '003', null, null, '测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000046010500005105201900259', '182001', '云南省人民检察院', '530000', '989845', '200000.00', '20190709', '2070702', '201800054640', null, '001', null, null, '测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000055310500005105201900260', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989852', '30000.00', '20190709', '2010499', '201700036719', null, '003', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000056910500005105201900260', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989855', '264280.00', '20190709', '2010408', '201600016316', null, '003', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000065510500005105201900261', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989849', '332000.00', '20190709', '2010499', '201900057666', null, '001', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000067110500005105201900261', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989847', '497500.00', '20190709', '2130599', '201700038747', null, '001', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000071410500005105201900261', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989853', '80000.00', '20190709', '2010407', '201900057651', null, '001', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000072810500005105201900261', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989854', '30000.00', '20190709', '2110199', '201900057663', null, '001', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000074010500005105201900261', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989848', '280000.00', '20190709', '2010405', '201900057650', null, '001', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000075910500005105201900261', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989846', '90000.00', '20190709', '2010404', '201900057662', null, '001', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000078310500005105201900261', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989851', '3960000.00', '20190709', '2011099', '201900057652', null, '001', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000079510500005105201900261', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989850', '117000.00', '20190709', '2010401', '201800041817', null, '001', null, null, '测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000084010500005105201900268', '182001', '云南省人民检察院', '530000', '989882', '272815.00', '20190709', '2210201', '201800041398', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000084910500005105201900268', '182001', '云南省人民检察院', '530000', '989879', '405000.00', '20190709', '2040401', '201800041398', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000086310500005105201900268', '182001', '云南省人民检察院', '530000', '989884', '3035.00', '20190709', '2040450', '201800041818', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000087510500005105201900268', '182001', '云南省人民检察院', '530000', '989878', '9190.00', '20190709', '2040401', '201800041398', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000089010500005105201900268', '182001', '云南省人民检察院', '530000', '989876', '57920.00', '20190709', '2040401', '201800041820', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000090610500005105201900268', '182001', '云南省人民检察院', '530000', '989877', '5210.00', '20190709', '2080505', '201800041398', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160000091910500005105201900268', '182001', '云南省人民检察院', '530000', '989881', '575725.00', '20190709', '2040401', '201800041818', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100001810500005105201900268', '182001', '云南省人民检察院', '530000', '989883', '371380.00', '20190709', '2080505', '201800041398', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100003710500005105201900268', '182001', '云南省人民检察院', '530000', '989880', '605.00', '20190709', '2040450', '201800041398', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100005310500005105201900268', '182001', '云南省人民检察院', '530000', '989885', '11605.00', '20190709', '2080506', '201800041398', null, '001', null, null, '1-4月基本支出', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100006610500005105201900268', '182001', '云南省人民检察院', '530000', '989845', '90000.00', '20190709', '2070702', '201800054640', null, '001', null, null, '建行测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100012010500005105201900269', '182001', '云南省人民检察院', '530000', '989844', '630000.00', '20190709', '2040499', '201600024491', null, '003', null, null, '建行测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100013510500005105201900269', '182001', '云南省人民检察院', '530000', '989842', '22500.00', '20190709', '2040499', '201700036719', null, '003', null, null, '建行测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100015510500005105201900269', '182001', '云南省人民检察院', '530000', '989841', '45000.00', '20190709', '2049901', '00226782', null, '003', null, null, '建行测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100017210500005105201900269', '182001', '云南省人民检察院', '530000', '989843', '176760.00', '20190709', '2040499', '201600024491', null, '003', null, null, '建行测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100020510500005105201900295', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989927', '3000.00', '20190709', '2010401', '201800041820', null, '001', null, null, '', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100022010500005105201900295', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989928', '2800.00', '20190709', '2010450', '201800041812', null, '001', null, null, '', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100023410500005105201900295', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989929', '11200.00', '20190709', '2010401', '201800041398', null, '001', null, null, '', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100027510500005105201900301', '182001', '云南省人民检察院', '530000', '989879', '76950.00', '20190709', '2040401', '201800041398', null, '001', null, null, '0322额度测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100029010500005105201900301', '182001', '云南省人民检察院', '530000', '989827', '1239.00', '20190709', '2080501', '201800041818', null, '001', null, null, '0322额度测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100030310500005105201900301', '182001', '云南省人民检察院', '530000', '989884', '576.65', '20190709', '2040450', '201800041818', null, '001', null, null, '0322额度测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100031910500005105201900301', '182001', '云南省人民检察院', '530000', '989878', '1746.10', '20190709', '2040401', '201800041398', null, '001', null, null, '0322额度测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100033510500005105201900301', '182001', '云南省人民检察院', '530000', '989876', '11004.80', '20190709', '2040401', '201800041820', null, '001', null, null, '0322额度测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100035010500005105201900301', '182001', '云南省人民检察院', '530000', '989877', '989.90', '20190709', '2080505', '201800041398', null, '001', null, null, '0322额度测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100036510500005105201900301', '182001', '云南省人民检察院', '530000', '989881', '109387.75', '20190709', '2040401', '201800041818', null, '001', null, null, '0322额度测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100038010500005105201900301', '182001', '云南省人民检察院', '530000', '989883', '70562.20', '20190709', '2080505', '201800041398', null, '001', null, null, '0322额度测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '3', '19070910160100041410500005105201900308', '182001', '云南省人民检察院', '530000', '989827', '999.00', '20190709', '2080501', '201800041818', null, '001', null, null, '', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160100044610500005105201900312', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989953', '1000.00', '20190709', '2010401', '201800041812', null, '001', null, null, '0506测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160100046010500005105201900312', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989954', '1000.00', '20190709', '2080501', '201800041818', null, '001', null, null, '0506测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160100047510500005105201900312', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989919', '1000.00', '20190709', '2080506', '201800041398', null, '001', null, null, '0506测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160100049010500005105201900312', '182002001', '云南省人民检察院昆明铁路运输检察分院', '530000', '989991', '1000.00', '20190709', '2210201', '201800041398', null, '001', null, null, '0506测试', null, '530945040', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160100050210500005105201900312', '101001001', '云南省发展和改革委员会(机关合并)', '530000', '989918', '1000.00', '20190709', '2080506', '201800041398', null, '001', null, null, '0506测试', null, '530615438', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160100052010500005105201900312', '182002001', '云南省人民检察院昆明铁路运输检察分院', '530000', '989992', '1000.00', '20190709', '2040401', '201800041398', null, '001', null, null, '0506测试', null, '530945040', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160100053210500005105201900312', '182002001', '云南省人民检察院昆明铁路运输检察分院', '530000', '989993', '1000.00', '20190709', '2080501', '201800041818', null, '001', null, null, '0506测试', null, '530945040', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160100057210500005105201900313', '182001', '云南省人民检察院', '530000', '989885', '22222.00', '20190709', '2080506', '201800041398', null, '001', null, null, '0506测试', null, '530615536', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160111153210500005105201900312', '182002001', '云南省人民检察院昆明铁路运输检察分院', '530000', '989993', '1000.00', '20190709', '2080501', '201800041818', null, '001', null, null, '重复', null, '530945040', null, null, null);
INSERT INTO `T_ECFN_LIMIT_INIT` VALUES ('20190709', '5', '19070910160122257210500005105201900313', '182001', '云南省人民检察院', '530000', '989885', '22222.00', '20190709', '2080506', '201800041398', null, '001', null, null, '去重复', null, '530615536', null, null, null);
最后两条数据是手工添加的重复数据,其BILL_NO分别对应:
19070910160111153210500005105201900312
19070910160122257210500005105201900313
我们现在需要针对重复数据进行筛选,那么,我们首先需要考虑的一个问题就是:什么样的数据算是重复数据?
想清楚这一个问题,我们才可以有针对性的对数据进行group by
处理,所以针对该表:同一个财政级别(AREA_NO)下同一个月份(MONTH)的同一个预算单位(EN_CODE)的同一个科目(ITEM_NO)的相同金额(AMT),如果PLAN_ID是一致的,那么该数据就是重复的!
所以,确定重复数据条件,我们就可以通过COUNT函数确定重复数据。针对该表,BILL_NO可以区分重复数据,有时间戳加上凭证号生成,所以我们可以先查出全部重复数据:
SELECT * FROM T_ECFN_LIMIT_INIT li WHERE (li.AREA_NO,li.PLAN_ID,li.AMT,li.EN_CODE,li.ITEM_NO,li.MONTH)
IN(SELECT AREA_NO,PLAN_ID,AMT,EN_CODE,ITEM_NO,MONTH FROM T_ECFN_LIMIT_INIT GROUP BY AREA_NO,PLAN_ID,AMT,EN_CODE,ITEM_NO,MONTH
HAVING COUNT(*)>1)
我们要查目标重复数据,可以选出最大BILL_NO,语句如下:
SELECT * FROM T_ECFN_LIMIT_INIT WHERE BILL_NO IN(
SELECT MAX(BILL_NO) FROM T_ECFN_LIMIT_INIT
GROUP BY AREA_NO,PLAN_ID,AMT,EN_CODE,ITEM_NO,MONTH HAVING COUNT(*)>1)
由于BILL_NO确定唯一,所以删除数据就比较简单啦!
知识点回顾
一、查找重复记录
1、查找全部重复记录
//单字段重复
Select * From Table Where 重复字段 In (Select 重复字段 From Table Group By 重复字段 Having Count(*)>1)
//多字段重复
Select * From Table Where (重复字段1,重复字段2,重复字段3) In (Select 重复字段1,重复字段2,重复字段3 From Table Group By 重复字段1,重复字段2,重复字段3 Having Count(*)>1)
2、过滤重复记录(只显示一条)
Select * From Table Where ID In (Select Max(ID) From Table Group By 重复字段 HAVING COUNT(*)>1)
//显示ID最大一条记录
二、删除重复记录
1、删除全部重复记录(慎用)
Delete Table Where 重复字段 In (Select 重复字段 From table Group By 重复字段 Having Count(*)>1)
2、保留一条(推荐)
Delete from Table Where ID Not In (Select Max(ID) From Table Group By 重复字段)
三、举例
1、查找表中多余的重复记录,重复记录是根据单个字段(peopleId)来判断
select * from people where peopleId in (select peopleId from people group by peopleId having count(peopleId) > 1)
2、删除表中多余的重复记录,重复记录是根据单个字段(peopleId)来判断,只留有rowid最小的记录
delete from people where peopleId in (select peopleId from people group by peopleId having count(peopleId) > 1) and rowid not in (select min(rowid) from people group by peopleId having count(peopleId )>1)
3、查找表中多余的重复记录(多个字段)
select * from vitae a where (a.peopleId,a.seq) in (select peopleId,seq from vitae group by peopleId,seq having count() > 1)
4、删除表中多余的重复记录(多个字段),只留有rowid最小的记录
delete from vitae a where (a.peopleId,a.seq) in (select peopleId,seq from vitae group by peopleId,seq having count() > 1) and rowid not in (select min(rowid) from vitae group by peopleId,seq having count()>1)
5、查找表中多余的重复记录(多个字段),不包含rowid最小的记录
select * from vitae a where (a.peopleId,a.seq) in (select peopleId,seq from vitae group by peopleId,seq having count() > 1) and rowid not in (select min(rowid) from vitae group by peopleId,seq having count(*)>1)
四、补充
有两个以上的重复记录,一是完全重复的记录,也即所有字段均重复的记录,二是部分关键字段重复的记录,比如Name字段重复,而其他字段不一定重复或都重复可以忽略。
1、对于第一种重复,比较容易解决,使用
select distinct * from tableName
就可以得到无重复记录的结果集。
如果该表需要删除重复的记录(重复记录保留1条),可以按以下方法删除
select distinct * into #Tmp from tableName
drop table tableName
select * into tableName from #Tmp
drop table #Tmp
发生这种重复的原因是表设计不周产生的,增加唯一索引列即可解决。
2、这类重复问题通常要求保留重复记录中的第一条记录,操作方法如下
假设有重复的字段为Name,Address,要求得到这两个字段唯一的结果集
select identity(int,1,1) as autoID, * into #Tmp from tableName
select min(autoID) as autoID into #Tmp2 from #Tmp group by Name,autoID
select * from #Tmp where autoID in(select autoID from #tmp2)