MHA具体的搭建环境如下:
角色 IP地址 主机名
VIP 192.168.1.169
manager 192.168.1.170 170
master 192.168.1.171 171
slave01 192.168.1.172 172
slave02 192.168.1.173 173
#一旦master宕机,将会把备选master提升为新的master,slave指向新的master.
1、安装配置mha
1.1 环境准备,先要安装epel源,安装全部依赖
yum -y install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager --skip-broken
1.2 修改host解析
[root@localhost ~]# hostnamectl set-hostname 170 #修改所有主机主机名
[root@170 ~]# vim /etc/hosts
192.168.1.170 170
192.168.1.171 171
192.168.1.172 172
192.168.1.173 173
1.3 配置所有主机相互SSH登录无密码验证
[root@170 ~]# ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa): 回车
Enter passphrase (empty for no passphrase): 回车
Enter same passphrase again: 回车
[root@170 ~]# ssh-copy-id 192.168.1.170
[root@170 ~]# ssh-copy-id 192.168.1.171
[root@170 ~]# ssh-copy-id 192.168.1.172
[root@170 ~]# ssh-copy-id 192.168.1.173
//170、171、172、173、174主机重复上面操作
1.4 验证各节点互信
[root@170 ~]# ssh 192.168.1.171 date && ssh 192.168.1.172 date && ssh 192.168.1.173 date && ssh 192.168.1.170 date
1.5 上传MHA相关包,在所有的节点安装mha-node
[root@170 ~]# mkdir /soft
[root@170 soft]# scp /soft/MHA-2019-6.28.zip root@192.168.1.171:/soft/
[root@170 soft]# scp /soft/MHA-2019-6.28.zip root@192.168.1.172:/soft/
[root@170 soft]# scp /soft/MHA-2019-6.28.zip root@192.168.1.173:/soft/
[root@170 soft]# unzip MHA-2019-6.28.zip
[root@170 soft]# rpm -ivh mha4mysql-node-0.56-0.el6.noarch.rpm
//安装完成后会在/usr/bin/目录下生成以下脚本文件
[root@170 soft]# cd /usr/bin
[root@170 bin]# ll app* filter* purge* save*
-r-xr-xr-x 1 root root 15498 Apr 20 10:05 apply_diff_relay_logs
-r-xr-xr-x 1 root root 4807 Apr 20 10:05 filter_mysqlbinlog
-r-xr-xr-x 1 root root 7401 Apr 20 10:05 purge_relay_logs
-r-xr-xr-x 1 root root 7263 Apr 20 10:05 save_binary_logs
2、配置mysql主从同步(一主多从)
2.1 安装mysql,主从节点都需安装
[root@171 ~]# rpm -ivh mysql57-community-release-el7-10.noarch.rpm
[root@171 ~]# yum install mysql-community-server -y
[root@171 ~]# systemctl start mysqld
[root@171 ~]#systemctl enable mysqld
//如果mysql登陆需要密码,请查看该文件
[root@171 ~]# grep 'temporary password' /var/log/mysqld.log
//登陆mysql重新配置密码
[root@171 ~]# mysql -uroot -p'password'
mysql> ALTER USER 'root'@'localhost' IDENTIFIED BY 'MyNewPass4!';
2.2 创建需要同步的数据库:
mysql> create database ha;
mysql> use ha;
mysql> create table test(id int,name varchar(20));
2.3 配置主数据库服务器:
vim /etc/my.cnf
log-bin=mysql-bin-master171
server-id=171 #本机数据库ID 标示
binlog-do-db=ha #可以被从服务器复制的库。二进制需要同步的数据库名
binlog-ignore-db=mysql #不可以被从服务器复制的库
validate-password=off #关闭密码策略
//授权:
mysql> grant replication slave on *.* to repl@'192.168.1.%' identified by 'password';
mysql> flush privileges;
//重启mysql:
systemctl restart mysqld
//查看状态信息:
mysql> show master status;
//导出数据库到从服务器
[root@171 ~]# mysqldump -uroot -p --databases ha > ha.sql
[root@171 ~]# scp ha.sql root@192.168.1.172:/root
[root@171 ~]# scp ha.sql root@192.168.1.173:/root
2.4 配置从服务并在主数据服务器导入数据库
[root@172 ~]# mysql -uroot -p < ha.sql
vim /etc/my.cnf #配置my.cnf
log-bin=mysql-slave172 #启用二进制日志
server-id=172 #本机数据库ID 标示
binlog-do-db=ha #可以被从服务器复制的库。二进制需要同步的数据库名
binlog-ignore-db=mysql #不可以被从服务器复制的库
log_slave_updates=1 #只有开启log_slave_updates,从库binlog才会记录主库同步的操作日志
validate-password=off
//授权
mysql> grant replication slave on *.* to 'repl'@'192.168.1.%' identified by 'password';
mysql> flush privileges;
[root@172 ~]# systemctl restart mysqld 重启mysql
//建立主从关系
mysql> stop slave;
mysql> change master to master_host='192.168.1.171',master_user='repl',master_password='password';
mysql> start slave;
mysql> show slave status \G; #查看从状态
Slave_IO_Running :一个负责与主机的io通信
Slave_SQL_Running:负责自己的slave mysql进程
两个为YES 就成功了!
//173配置同上
2.5 两台slave服务器设置read_only(从库对外提供读服务,只所以没有写进配置文件,是因为slave随时会提升为master)
[root@172 ~]# mysql -uroot -ppassword -e 'set global read_only=1'
或mysql> set global read_only=1;
[root@173 ~]# mysql -uroot -pTalent*19871988 -e 'set global read_only=1'
2.6 创建监控用户(在主库创建后,从库自动会创建)
mysql> grant all privileges on *.* to 'mha'@'192.168.1.%' identified by 'pwassword';
mysql> flush privileges;
3、Manager配置
3.1 Manager软件安装
[root@170 ~]# yum install -y perl-Config-Tiny epel-release perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes #安装依赖
[root@170 ~]# rpm -ivh mha4mysql-manager-0.56-0.el6.noarch.rpm #MHA Manager软件包
//安装完成后会在/usr/bin目录下面生成以下脚本文件
[root@xuegod67 ~]# ll /usr/bin/mast*
-rwxr-xr-x 1 root root 1995 12月 13 2012 /usr/bin/masterha_check_repl
-rwxr-xr-x 1 root root 1779 12月 13 2012 /usr/bin/masterha_check_ssh
-rwxr-xr-x 1 root root 1865 12月 13 2012 /usr/bin/masterha_check_status
-rwxr-xr-x 1 root root 3201 12月 13 2012 /usr/bin/masterha_conf_host
-rwxr-xr-x 1 root root 2517 12月 13 2012 /usr/bin/masterha_manager
-rwxr-xr-x 1 root root 2165 12月 13 2012 /usr/bin/masterha_master_monitor
-rwxr-xr-x 1 root root 2373 12月 13 2012 /usr/bin/masterha_master_switch
-rwxr-xr-x 1 root root 3879 12月 13 2012 /usr/bin/masterha_secondary_check
-rwxr-xr-x 1 root root 1739 12月 13 2012 /usr/bin/masterha_stop
3.2 准备manger配置文件
[root@170 ~]# mkdir -p /etc/mha
[root@170 ~]# mkdir -p /var/log/mha/app1
vim/etc/mha/app1.cnf
[server default]
manager_log=/var/log/mha/app1/manager
manager_workdir=/var/log/mha/app1
master_binlog_dir=/var/lib/mysql
user=mha
password="password"
ping_interval=2
repl_password="password"
repl_user=repl
ssh_user=root
[server1]
hostname=192.168.1.171
port=3306
[server2]
hostname=192.168.1.172
port=3306
[server3]
hostname=192.168.1.173
port=3306
3.3 验证互信
[root@170 ~]# masterha_check_ssh --conf=/etc/mha/app1.cnf #互信检查
Thu Jul 8 01:42:00 2021 - [info] All SSH connection tests passed successfully.
3.4 主从状态检查
[root@170 ~]# masterha_check_repl --conf=/etc/mha/app1.cnf #主从状态检查
MySQL Replication Health is OK.
3.5 启动manager
[root@170 ~]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null> /var/log/mha/app1/manager.log 2>&1 &
3.6 查看MHA状态
[root@170 ~]# masterha_check_status --conf=/etc/mha/app1.cnf
app1 (pid:7350) is running(0:PING_OK), master:192.168.1.171
4、模拟故障
4.1 故障模拟及处理
[root@171 ~]# pkill mysqld #停止主库
[root@170 ~]# tail -f /var/log/mha/app1/manager #末尾必须显示successfully,才算正常切换成功。
Started automated(non-interactive) failover.
The latest slave 192.168.1.172(192.168.1.172:3306) has all relay logs for recovery.
Selected 192.168.1.172(192.168.1.172:3306) as a new master.
192.168.1.172(192.168.1.172:3306): OK: Applying all logs succeeded.
192.168.1.173(192.168.1.173:3306): This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
192.168.1.173(192.168.1.173:3306): OK: Applying all logs succeeded. Slave started, replicating from 192.168.1.172(192.168.1.172:3306)
192.168.1.172(192.168.1.172:3306): Resetting slave info succeeded.
Master failover to 192.168.1.172(192.168.1.172:3306) completed successfully.
4.2 查看主库状态
[root@170 ~]# masterha_check_status --conf=/etc/mha/app1.cnf
app1 (pid:7676) is running(0:PING_OK), master:192.168.1.172
//查看173从库状态
mysql> show slave status \G;
Master_Host: 192.168.1.172 #slave指向新的主库
4.3 修复主库
[root@171 ~]# systemctl restart mysqld
[root@170 ~]# vim /var/log/mha/app1/manager #查询一下日志记录
[root@171 ~]# mysql -uroot -p
mysql> CHANGE MASTER TO MASTER_HOST='192.168.1.171', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-slave171.000002', MASTER_LOG_POS=154, MASTER_USER='repl', MASTER_PASSWORD='xxx';
mysql> start slave;
mysql> show slave status \G; #已经指定新的主库
4.4 修改manger配置文件
vim /etc/mha/app1.cnf
[server1]
hostname=192.168.1.171
port=3306
4.5 启动manager
nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null> /var/log/mha/app1/manager.log 2>&1 &
5、MHA的vip功能
5.1 上传脚本并修改内容(/usr/local/bin)
[root@170 ~]# ls
anaconda-ks.cfg email_2019-最新.zip MHA-2019-6.28.zip mha4mysql-node-0.56-0.el6.noarch.rpm
Atlas-2.2.1.el6.x86_64.rpm master_ip_failover.txt mha4mysql-manager-0.56-0.el6.noarch.rpm mysql57-community-release-el7-10.noarch.rpm
[root@170 ~]# cp ./master_ip_failover.txt /usr/local/bin/master_ip_failover
//修改文件
[root@170 ~]# vim /usr/local/bin/master_ip_failover
my $vip = '192.168.1.169/24';
my $key = '1';
my $ssh_start_vip = "/sbin/ifconfig ens192:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig eth192:$key down";
5.2 修改权限及格式
[root@170 ~]# dos2unix /usr/local/bin/master_ip_failover
dos2unix: converting file /usr/local/bin/master_ip_failover to Unix format ...
[root@170 ~]# chmod +x /usr/local/bin/master_ip_failover
5.3 更改manager配置文件:
[root@170 ~]# vim /etc/mha/app1.cnf
添加:master_ip_failover_script=/usr/local/bin/master_ip_failover
5.4 第一次配置VIP的时候,需要在主库手工生成VIP
[root@171 ~]# ifconfig ens192:1 192.168.1.169 #开启VIP
[root@171 ~]# ifconfig ens192:1 down #关闭VIP
5.5、重新启动manager
[root@170 ~]# masterha_stop --conf=/etc/mha/app1.cnf
[root@170 ~]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null> /var/log/mha/app1/manager.log 2>&1 &
[root@174 ~]# mysql -uroot -p -h 192.168.1.169 #测试VIP登录mysql
6、MHA Send_Report故障邮件提醒
6.1 添加邮件脚本
[root@170 ~]# cd /usr/local/bin/
[root@170 bin]# vim /usr/local/bin/send_report
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Mail::Sender;
use Getopt::Long;
#new_master_host and new_slave_hosts are set only when recovering master succeeded
my ( $dead_master_host, $new_master_host, $new_slave_hosts, $subject, $body );
my $smtp='smtp.qq.com';
my $mail_from='373370405@qq.com';
my $mail_user='373370405@qq.com';
my $mail_pass='*******'; #授权码
#my $mail_to=['to1@qq.com','to2@qq.com'];
my $mail_to='373370405@qq.com';
GetOptions(
'orig_master_host=s' => \$dead_master_host,
'new_master_host=s' => \$new_master_host,
'new_slave_hosts=s' => \$new_slave_hosts,
'subject=s' => \$subject,
'body=s' => \$body,
);
# Do whatever you want here
mailToContacts($smtp,$mail_from,$mail_user,$mail_pass,$mail_to,$subject,$body);
sub mailToContacts {
my ($smtp, $mail_from, $mail_user, $mail_pass, $mail_to, $subject, $msg ) = @_;
open my $DEBUG, ">/tmp/mail.log"
or die "Can't open the debug file:$!\n";
my $sender = new Mail::Sender {
ctype => 'text/plain;charset=utf-8',
encoding => 'utf-8',
smtp => $smtp,
from => $mail_from,
auth => 'LOGIN',
TLS_allowed => '0',
authid => $mail_user,
authpwd => $mail_pass,
to => $mail_to,
subject => $subject,
debug => $DEBUG
};
$sender->MailMsg(
{
msg => $msg,
debug => $DEBUG
}
) or print $Mail::Sender::Error;
return 1;
}
exit 0;
[root@170 bin]# chmod +x *
6.2 修改manager配置文件,调用邮件脚本
vim /etc/mha/app1.cnf
report_script=/usr/local/bin/send #新增一行
6.3 重新启动manager
[root@170 ~]# masterha_stop --conf=/etc/mha/app1.cnf
[root@170 ~]# nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null> /var/log/mha/app1/manager.log 2>&1 &
6.4 模拟主库故障,验证邮件提示
7、MHA binlog server
7.1 Manager额外参数介绍
说明:主库宕机谁来接管?
1. 所有从节点日志都是一致的,默认会以配置文件的顺序去选择一个新主。
2. 从节点日志不一致,自动选择最接近于主库的从库
3. 如果对于某节点设定了权重(candidate_master=1),权重节点会优先选择。但是此节点日志量落后主库100M日志的话,也不会被选择。可以配合check_repl_delay=0,关闭日志量的检查,强制选择候选节点。
(1) ping_interval=1#设置监控主库,发送ping包的时间间隔,尝试三次没有回应的时候自动进行failover
(2) candidate_master=1#设置为候选master,如果设置该参数以后,发生主从切换以后将会将此从库提升为主库,即使这个主库不是集群中事件最新的slave
(3)check_repl_delay=0#默认情况下如果一个slave落后master 100M的relay logs的话,MHA将不会选择该slave作为一个新的master,因为对于这个slave的恢复需要花费很长时间,通过设置check_repl_delay=0,MHA触发切换在选择一个新的master的时候将会忽略复制延时,这个参数对于设置了candidate_master=1的主机非常有用,因为这个候选主在切换的过程中一定是新的maste
7.2 配置MHA binlog server
binlogserver配置:找一台额外的机器,必须要和源库版本一致,支持gtid并开启,我们直接用的174主机
[root@170 ~]vim /etc/mha/app1.cnf
[binlog1]
no_master=1
hostname=192.168.1.174
master_binlog_dir=/data/mysql/binlog
//创建必要目录,这个目录不能和源binlog目录一致
[root@174 ~]# mkdir -p /data/mysql/binlog
[root@174 ~]# chown -R mysql.mysql /data/mysql/binlog/
修改完成后,将主库binlog拉过来(从000001开始拉,之后的binlog会自动按顺序过来)
拉取主库binlog日志
[root@174 ~]cd /data/mysql/binlog #必须进入到自己创建好的目录
[root@174 ~]mysqlbinlog -R --host=192.168.1.172 --user=mha --password=mha --raw --stop-never mysql-slave172.000001 & # 注意:拉取日志的起点,需要按照目前从库的已经获取到的二进制日志点mysql-slave172.000001,通过[root@171 ~]# ll /var/lib/mysql #master二进制日志路径
7.3 重启MHA
[root@170 ~]# masterha_stop --conf=/etc/mha/app1.cnf
[root@170 ~]#nohup masterha_manager --conf=/etc/mha/app1.cnf --remove_dead_master_conf --ignore_last_failover </dev/null> /var/log/mha/app1/manager.log 2>&1 &
7.4 主库flush logs验证 binlog server同步日志
flush logs;
show master status;
[root@174 ~]ll /data/mysql/binlog #验证binlog是否同步
7.5 故障处理
主库宕机,binlogserver 自动停掉,manager 也会自动停止。处理思路:
1、重新获取新主库的binlog到binlogserver中
2、重新配置文件binlog server信息
3、最后再启动MHA