MySQL: how to delete duplicates from a table
Posted by Arul Ramachandran on January 4, 2012
To show an example of this, let us create a table called DUPS with duplicate records:
mysql> create table temp ( id smallint unsigned not null auto_increment primary key, table_schema varchar(64), table_name varchar(64) ); Query OK, 0 rows affected (0.02 sec) mysql> desc temp; +--------------+----------------------+------+-----+---------+----------------+ | Field | Type | Null | Key | Default | Extra | +--------------+----------------------+------+-----+---------+----------------+ | id | smallint(5) unsigned | NO | PRI | NULL | auto_increment | | table_schema | varchar(64) | YES | | NULL | | | table_name | varchar(64) | YES | | NULL | | +--------------+----------------------+------+-----+---------+----------------+ 3 rows in set (0.00 sec) mysql> insert into temp ( table_schema, table_name ) select table_schema, table_name from information_schema.tables; Query OK, 89 rows affected (0.01 sec) Records: 89 Duplicates: 0 Warnings: 0 mysql> select max(id), count(*) from temp; +---------+----------+ | max(id) | count(*) | +---------+----------+ | 89 | 89 | +---------+----------+ 1 row in set (0.00 sec)
let us create a new table that will contain some duplicate records:
mysql> create table dups as select * from temp ; Query OK, 89 rows affected (0.03 sec) Records: 89 Duplicates: 0 Warnings: 0 mysql> desc dups; +--------------+----------------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +--------------+----------------------+------+-----+---------+-------+ | id | smallint(5) unsigned | NO | | 0 | | | table_schema | varchar(64) | YES | | NULL | | | table_name | varchar(64) | YES | | NULL | | +--------------+----------------------+------+-----+---------+-------+ 3 rows in set (0.00 sec)
let us insert 50 duplicates records:
mysql> insert into dups select * from temp limit 50 ; Query OK, 50 rows affected (0.00 sec) Records: 50 Duplicates: 0 Warnings: 0 mysql> select max(id), count(*) from dups ; +---------+----------+ | max(id) | count(*) | +---------+----------+ | 89 | 139 | +---------+----------+ 1 row in set (0.00 sec)
There are a few options to delete duplicate records from DUPS table:
Option 1:
create a new temporary table without duplicates and rename this table to original table:
mysql> create table dups_temp as select * from dups group by table_schema, table_name ; Query OK, 89 rows affected (0.04 sec) Records: 89 Duplicates: 0 Warnings: 0 mysql> select max(id), count(*) from dups_temp ; +---------+----------+ | max(id) | count(*) | +---------+----------+ | 89 | 89 | +---------+----------+ 1 row in set (0.00 sec) mysql> drop table dups; Query OK, 0 rows affected (0.12 sec) mysql> rename table dups_temp to dups; Query OK, 0 rows affected (0.02 sec)
Option 2:
Create an unique index (with IGNORE clause) on columns table_schema, table_name eliminates duplicates:
mysql> select max(id), count(*) from dups ;
+---------+----------+
| max(id) | count(*) |
+---------+----------+
| 89 | 139 |
+---------+----------+
1 row in set (0.00 sec)
mysql> alter ignore table dups add unique index (table_schema, table_name);
Query OK, 139 rows affected (0.14 sec)
Records: 139 Duplicates: 50 Warnings: 0
mysql> select max(id), count(*) from dups ;
+---------+----------+
| max(id) | count(*) |
+---------+----------+
| 89 | 89 |
+---------+----------+
1 row in set (0.00 sec)
mysql> show indexes from dups\G
*************************** 1. row ***************************
Table: dups
Non_unique: 0
Key_name: table_schema
Seq_in_index: 1
Column_name: table_schema
Collation: A
Cardinality: 6
Sub_part: NULL
Packed: NULL
Null: YES
Index_type: BTREE
Comment:
*************************** 2. row ***************************
Table: dups
Non_unique: 0
Key_name: table_schema
Seq_in_index: 2
Column_name: table_name
Collation: A
Cardinality: 89
Sub_part: NULL
Packed: NULL
Null: YES
Index_type: BTREE
Comment:
2 rows in set (0.00 sec)
mysql> alter table dups drop index table_schema ;
Query OK, 89 rows affected (0.12 sec)
Records: 89 Duplicates: 0 Warnings: 0
Option 3:
In this method we will use a DELETE statement; also we would need to create a column to serve as unique identifier if one is not already present:
mysql> alter table dups add ( dummy_id smallint unsigned not null auto_increment primary key );
Query OK, 139 rows affected (0.11 sec)
Records: 139 Duplicates: 0 Warnings: 0
mysql> select min(dummy_id), max(dummy_id) from dups ;
+---------------+---------------+
| min(dummy_id) | max(dummy_id) |
+---------------+---------------+
| 1 | 139 |
+---------------+---------------+
1 row in set (0.00 sec)
mysql> select max(id), count(*) from dups ;
+---------+----------+
| max(id) | count(*) |
+---------+----------+
| 89 | 139 |
+---------+----------+
1 row in set (0.00 sec)
mysql> delete from dups using dups, dups as vtable where (dups.dummy_id > vtable.dummy_id)
-> and (dups.table_schema = vtable.table_schema)
-> and (dups.table_name = vtable.table_name);
Query OK, 50 rows affected (0.01 sec)
mysql> select max(id), count(*) from dups ;
+---------+----------+
| max(id) | count(*) |
+---------+----------+
| 89 | 89 |
+---------+----------+
1 row in set (0.00 sec)
mysql> alter table dups drop column dummy_id ;
Query OK, 89 rows affected (0.15 sec)
Records: 89 Duplicates: 0 Warnings: 0
Advertisement