简单记录
说修改了
character_set_server = utf8mb4
后如下使用mysql客户端连接:
mysql> show variables like '%char%';
+---------------------------+-------------------------------+
| Variable_name | Value |
+---------------------------+-------------------------------+
| character_set_client | utf8 |
| character_set_connection | utf8 |
| character_set_database | utf8mb4 |
| character_set_filesystem | binary |
| character_set_results | utf8 |
| character_set_server | utf8mb4 |
| character_set_system | utf8 |
| character_sets_dir | /opt/my_mysql/share/charsets/ |
| ft_query_extra_word_chars | OFF |
+---------------------------+-------------------------------+
问题原因:
函数
bool thd_init_client_charset(THD *thd, uint cs_number)
{
CHARSET_INFO *cs;
/*
Use server character set and collation if
- opt_character_set_client_handshake is not set
- client has not specified a character set
- client character set is the same as the servers
- client character set doesn't exists in server
*/
if (!opt_character_set_client_handshake ||// 如果设置了skip-character-set-client-handshake则直接跳过客户端的字符集设置,否则使用服务端的设置,这些设置在服务端均继承来之character_set_server
!(cs= get_charset(cs_number, MYF(0))) || // 0
!my_strcasecmp(&my_charset_latin1,
global_system_variables.character_set_client->name,
cs->name)) //0
{
if (!is_supported_parser_charset(
global_system_variables.character_set_client))
{
/* Disallow non-supported parser character sets: UCS2, UTF16, UTF32 */
my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "character_set_client",
global_system_variables.character_set_client->csname);
return true;
}
thd->variables.character_set_client=
global_system_variables.character_set_client;
thd->variables.collation_connection=
global_system_variables.collation_connection;
thd->variables.character_set_results=
global_system_variables.character_set_results;
}
else
{
if (!is_supported_parser_charset(cs))
{
/* Disallow non-supported parser character sets: UCS2, UTF16, UTF32 */
my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "character_set_client",
cs->csname);
return true;
}
thd->variables.character_set_results=
thd->variables.collation_connection=
thd->variables.character_set_client= cs;
}
return false;
}
因此如果设置了skip-character-set-client-handshake选项,则可以在忽略掉客户端字符集的字符集设置,直接使用服务端的字符集。涉及3个参数
- character_set_results
- collation_connection
- character_set_client
作用参考:
https://www.jianshu.com/p/f1bfdb02007d
也可在不设置skip-character-set-client-handshake的情况下载客户端指定字符集参数,比如mysql客户端有这样的选项。
[root@mgr3 ~]# /opt/my_mysql/bin/mysql --default-character-set=utf8mb4
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 10
Server version: 5.7.22-22-debug-log Source distribution
Copyright (c) 2009-2018 Percona LLC and/or its affiliates
Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> show variables like '%char%';
+---------------------------+-------------------------------+
| Variable_name | Value |
+---------------------------+-------------------------------+
| character_set_client | utf8mb4 |
| character_set_connection | utf8mb4 |
| character_set_database | utf8mb4 |
| character_set_filesystem | binary |
| character_set_results | utf8mb4 |
| character_set_server | utf8mb4 |
| character_set_system | utf8 |
| character_sets_dir | /opt/my_mysql/share/charsets/ |
| ft_query_extra_word_chars | OFF |
+---------------------------+-------------------------------+
9 rows in set (0.06 sec)
mysql>
可以debug thd_init_client_charset函数,观察其中的cs_number.
根本原因:
客户端5.7由于 mysql_autodetect_character_set函数中MYSQL_DEFAULT_CHARSET_NAME为latin1,但是8.0中已经变成MYSQL_DEFAULT_CHARSET_NAME UTF8MB4.
但是这个问题主要由于5.7中utf8就是utf8自动检测函数mysql_autodetect_character_set 检测linux locale中的信息,得到的utf8,则使用utf8。而8.0mysql_autodetect_character_set 检测到utf8会做映射为utf8mb4,如果设置export LANG="en_US" 则自动检测获取的是 ISO-8859-1 也就是latin1 单字节字符集。当然默认是 export LANG="en_US.utf8"。函数调用在mysql_autodetect_character_set中
(gdb) p csp->my_name
$3 = 0x4e4d8f "latin1"
(gdb) p csp->os_name
$4 = 0x4e4e84 "ISO-8859-1"
(gdb) p csname
$5 = 0x7fffb2f69b3c "ISO-8859-1"
最新总结(20241208)
- 检查locale是否正常集
- 默认locale的字符会覆盖默认字符集
- 保险设置--default-character-set=utf8mb4 改变连接字符为utf8mb4
这里直接跳过自动检测流程和如果没有检测到的默认字符集
3038 if (!mysql->options.charset_name) {
(gdb) n
3043 } else if (!strcmp(mysql->options.charset_name,
(gdb)
3048 mysql_set_character_set_with_default_collation(mysql);
(gdb)
3050 if (!mysql->charset) {
(gdb)
3065 return 0;
static int mysql_autodetect_character_set(MYSQL *mysql) { default-character-set
const char *csname = MYSQL_DEFAULT_CHARSET_NAME; //首先设置默认的字符集
#ifdef _WIN32
char cpbuf[64];
{
my_snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)GetConsoleCP());
csname = my_os_charset_to_mysql_charset(cpbuf);
}
#elif defined(HAVE_NL_LANGINFO)
{
if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET))) //其中locale种的LC_CTYPE代表字符集
csname = my_os_charset_to_mysql_charset(csname); //获取locale的字符集
}
#endif
if (mysql->options.charset_name)
my_free(mysql->options.charset_name);
if (!(mysql->options.charset_name =
my_strdup(key_memory_mysql_options, csname, MYF(MY_WME))))
return 1;
return 0;
}
- mysqldump和mysql
- mysql 虽然受到locale的影响,但是导出的mysqldump通常带有导出数据的时候所使用字符集也就是set names=utf8(utfmb4)(这个也相当重要)导出的用的什么放到了导出脚本种,默认5.7为UTF8,8.0为UTF8MB4,可以使用--default-character-set=charset_name指定。所以导入的时候即便受到影响也不会mysql登录字符集的影响。
但是很明显如果不是mysqldump的导出,如果没有带set names=utf8(utfmb4),那么很明显这会导致用mysql的连接的字符集来解析(比如GBK)可能的UTF8的文本,也就是文本是UTF8-->转换字符集是GBK-->入UTF8表,导致数据乱码。 - mysqldump导出表结构的时候其comment用的是binary 字符集,而导入的时候使用的utf8字符集,因为字典一定为utf8类型,因此这是对的,也就是受到字符集的影响,这个如下,
DROP TABLE IF EXISTS `testluanma`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `testluanma` (
`name` varchar(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
/*!40101 SET character_set_client = @saved_cs_client */;
#0 mysql_autodetect_character_set (mysql=0x9ea0a0 <mysql>) at /home/mysql/soft/percona-server-5.7.29-32/sql-common/client.c:3174
#1 0x0000000000464fbf in mysql_init_character_set (mysql=0x9ea0a0 <mysql>) at /home/mysql/soft/percona-server-5.7.29-32/sql-common/client.c:3220
#2 0x000000000046db2a in mysql_set_character_set (mysql=0x9ea0a0 <mysql>, cs_name=0x48efbb "auto") at /home/mysql/soft/percona-server-5.7.29-32/sql-common/client.c:6123
#3 0x0000000000410239 in init_connection_options (mysql=0x9ea0a0 <mysql>) at /home/mysql/soft/percona-server-5.7.29-32/client/mysql.cc:5153
#4 0x000000000040ff7a in sql_real_connect (host=0x0, database=0x0, user=0x0, password=0x0, silent=0) at /home/mysql/soft/percona-server-5.7.29-32/client/mysql.cc:5017
#5 0x000000000041036a in sql_connect (host=0x0, database=0x0, user=0x0, password=0x0, silent=0) at /home/mysql/soft/percona-server-5.7.29-32/client/mysql.cc:5183
#6 0x0000000000407c57 in main (argc=3, argv=0x9f23d8) at /home/mysql/soft/percona-server-5.7.29-32/client/mysql.cc:1332
(gdb)