关于MySQL字符集 可以参考文章MySQL字符集 & 重谈MySQL字符集
目录
大小写
_ci(case insensitive) 大小写不敏感
_cs(case sensitive) 大小写敏感
CREATE DATABASE IF NOT EXISTS collation_demo DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
USE collation_demo;
CREATE TABLE t1 (
id int(10) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,
content varchar(255) NOT NULL
) DEFAULT CHARSET=utf8;
INSERT INTO t1 ( content ) VALUES ( 'china' );
INSERT INTO t1 ( content ) VALUES ( 'China' );
SELECT * FROM t1 WHERE content = 'china';
# Time: 0.018s
+----+---------+
| id | content |
+----+---------+
| 1 | china |
| 2 | China |
+----+---------+
SELECT * FROM t1 WHERE content = 'China';
# Time: 0.020s
+----+---------+
| id | content |
+----+---------+
| 1 | china |
| 2 | China |
+----+---------+
ALTER TABLE t1 CONVERT TO CHARACTER SET utf8 COLLATE utf8_general_cs;
# (1273, "Unknown collation: 'utf8_general_cs'")
ALTER TABLE t1 CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin;
# Query OK, 2 rows affected
SELECT * FROM t1 WHERE content = 'china';
# Time: 0.020s
+----+---------+
| id | content |
+----+---------+
| 1 | china |
+----+---------+
SELECT * FROM t1 WHERE content = 'China';
# Time: 0.018s
+----+---------+
| id | content |
+----+---------+
| 2 | China |
+----+---------+
比较规则
_bin 按位比较 (大小写二进制值不同)
_general 按字节比较?
_unicode 按unicode规则比较
TRUNCATE t1;
ALTER TABLE t1 CONVERT TO CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO t1 ( content ) VALUES ( 'ij' );
INSERT INTO t1 ( content ) VALUES ( 'ij' );
SELECT * FROM t1 WHERE content = 'ij';
# Time: 0.018s
+----+---------+
| id | content |
+----+---------+
| 1 | ij |
+----+---------+
SELECT * FROM t1 WHERE content = 'ij';
# Time: 0.017s
+----+---------+
| id | content |
+----+---------+
| 2 | ij |
+----+---------+
ALTER TABLE t1 CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci;
SELECT * FROM t1 WHERE content = 'ij';
# Time: 0.061s
+----+---------+
| id | content |
+----+---------+
| 1 | ij |
| 2 | ij |
+----+---------+
SELECT * FROM t1 WHERE content = 'ij';
# Time: 0.020s
+----+---------+
| id | content |
+----+---------+
| 1 | ij |
| 2 | ij |
+----+---------+
unicode有更高的精度 但general有更高的效率