MHA(Master High Availability)在MySQL高可用方面是一个相对成熟的解决方案,它可以实现MySQL在高可用性环境下的故障切换和主从转换。当主结点的MySQL服务器产生故障时,MHA能自动完成数据库的故障切换操作,而且在进行故障切换的过程中,MHA能在最大程度上保证数据的一致性
MHA组成部分
MHA由MHA Manager和MHA Node组成,MHA Node运行在后台MySQL服务器上,MHA Manager会定时监控master结点的存活状态,当master出现故障时,它可以实现将slave自动提升为新的master的功能,并将其他所有的slave的master指向新产生的master
MHA工作过程
MHA试图从宕机的主服务器上保存二进制日志,最大程度的保证数据的不丢失,但这并不总是可行的。当主服务器硬件故障或无法通过ssh访问的时候,MHA无法进行二进制日志的保存,仅能进行故障转移,因此有可能会造成最新数据的丢失。若使用半同步复制,可以使丢失数据的风险大大降低,当只有一个slave收到最新的二进制日志时,MHA可以将最新的二进制日志应用于其他所有的slave服务器上,因此可以保证结点的数据一致性
前期准备
准备三台Centos7虚拟机,关闭防火墙和selinux,配置IP地址和hostname,同步系统时间,配置IP地址和Hostname映射
hostname | ip |
---|---|
node1 | 192.168.29.143 |
node2 | 192.168.29.142 |
node3 | 192.168.29.144 |
其中node1结点为管理结点,其余node结点为从节点
本次部署需要用到VIP
VIP |
---|
192.168.29.121 |
下载压缩包
mha4mysql-manager-0.58.tar.gz
mha4mysql-node-0.58.tar.gz
配置SSH免密登录
[root@node1 ~]# ssh-keygen
[root@node2 ~]# ssh-keygen
[root@node3 ~]# ssh-keygen
[root@node1 ~]# ssh-copy-id root@192.168.29.143
[root@node1 ~]# ssh-copy-id root@192.168.29.142
[root@node1 ~]# ssh-copy-id root@192.168.29.144
[root@node2 ~]# ssh-copy-id root@192.168.29.143
[root@node2 ~]# ssh-copy-id root@192.168.29.142
[root@node2 ~]# ssh-copy-id root@192.168.29.144
[root@node3 ~]# ssh-copy-id root@192.168.29.143
[root@node3 ~]# ssh-copy-id root@192.168.29.142
[root@node3 ~]# ssh-copy-id root@192.168.29.144
#验证
[root@node1 ~]# ssh root@ip
[root@node2 ~]# ssh root@ip
[root@node3 ~]# ssh root@ip
搭建主从配置
角色 | IP |
---|---|
master | 192.168.29.143 |
slave | 192.168.29.142 |
slave | 192.168.29.144 |
步骤参考
https://blog.51cto.com/14832653/2500735
搭建完成后通过创建数据库和添加数据测试主从可用性
#在node1结点执行
[root@node1 ~]# mysql -u root -p
mysql> create database mydb charset utf8;
mysql> use mydb
mysql> create table test(id int primary key);
mysql> insert into test values(1);
mysql> insert into test values(2);
mysql> insert into test values(3;
mysql> insert into test values(4);
mysql> insert into test values(5);
#node2结点验证
[root@node2 ~]# mysql -u root -p -e "select * from mydb.test;"
Enter password:
+----+
| id |
+----+
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
+----+
#node3结点验证
[root@node3 ~]# mysql -u root -p -e "select * from mydb.test;"
Enter password:
+----+
| id |
+----+
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
+----+
开启半同步复制
mysql> INSTALL PLUGIN rpl_semi_sync_master SONAME ‘semisync_master.so‘;
mysql> SET GLOBAL rpl_semi_sync_master_enabled=ON;
三个结点均安装依赖包
[root@node1 ~]# yum install perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker perl-CPAN perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes perl-DBI mysql-libs perl-Email-Date-Format perl-File-Remove perl-Mail-Sender perl-Mail-Sendmail perl-MIME-Lite perl-MIME-Types perl-Module-Install perl-Module-ScanDeps perl-YAML -y
[root@node2 ~]# yum install perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker perl-CPAN perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes perl-DBI mysql-libs perl-Email-Date-Format perl-File-Remove perl-Mail-Sender perl-Mail-Sendmail perl-MIME-Lite perl-MIME-Types perl-Module-Install perl-Module-ScanDeps perl-YAML -y
[root@node3 ~]# yum install perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker perl-CPAN perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes perl-DBI mysql-libs perl-Email-Date-Format perl-File-Remove perl-Mail-Sender perl-Mail-Sendmail perl-MIME-Lite perl-MIME-Types perl-Module-Install perl-Module-ScanDeps perl-YAML -y
部署MHA集群架构
全部结点部署mha4mysql-node
上传文件并解压
[root@node1 ~]# tar -zxvf mha4mysql-node-0.58.tar.gz
[root@node1 ~]# cd mha4mysql-node-0.58/
[root@node1 mha4mysql-node-0.58]# perl Makefile.PL
[root@node1 mha4mysql-node-0.58]# make && make install
#执行完成后会得到四个工具
[root@node1 ~]# ls -al /usr/local/bin/
-r-xr-xr-x 1 root root 17639 4月 30 15:05 apply_diff_relay_logs
-r-xr-xr-x 1 root root 4807 4月 30 15:05 filter_mysqlbinlog
-r-xr-xr-x 1 root root 8337 4月 30 15:05 purge_relay_logs
-r-xr-xr-x 1 root root 7525 4月 30 15:05 save_binary_logs
#在node2和node3结点执行相同的操作
node1结点部署mha4mysql-manager
上传文件并解压
[root@node1 ~]# tar -zxvf mha4mysql-manager-0.58.tar.gz
[root@node1 ~]# cd mha4mysql-manager-0.58/
[root@node1 mha4mysql-manager-0.58]# perl Makefile.PL
#执行完成后会得到manager工具
[root@node1 ~]# ls -al /usr/local/bin/
-r-xr-xr-x 1 root root 1995 4月 30 15:20 masterha_check_repl
-r-xr-xr-x 1 root root 1779 4月 30 15:20 masterha_check_ssh
-r-xr-xr-x 1 root root 1865 4月 30 15:20 masterha_check_status
-r-xr-xr-x 1 root root 3201 4月 30 15:20 masterha_conf_host
-r-xr-xr-x 1 root root 2517 4月 30 15:20 masterha_manager
-r-xr-xr-x 1 root root 2165 4月 30 15:20 masterha_master_monitor
-r-xr-xr-x 1 root root 2373 4月 30 15:20 masterha_master_switch
-r-xr-xr-x 1 root root 5172 4月 30 15:20 masterha_secondary_check
-r-xr-xr-x 1 root root 1739 4月 30 15:20 masterha_stop
node1结点配置MHA
编写配置文件
#创建目录
[root@node1 ~] mkdir /usr/local/mha
#编写配置文件
[root@node1 mha]# vi /usr/local/mha/mha.cnf
[server default]
#设置日志位置
manager_log=/usr/local/mha/manager.log
#设置工作目录
manager_workdir=/usr/local/mha
#设置ip自动飘移脚本路径
master_ip_failover_script="/usr/local/mha/scripts/master_ip_failover"
#设置ip手动飘移脚本路径
master_ip_online_change_script="/usr/local/mha/scripts/master_ip_online_change"
#设置监控mysql的用户
user=root
#设置监控用户对应密码
password=your_password
#设置ping包发送时间间隔
ping_interval=1
#设置远程mysql切换时binlog保存路径
remote_workdir=/usr/local/mha
#设置主从复制用户
repl_user=repl
#主从复制用户密码
repl_password=your_password
#发生漂移时报警脚本路径
report_script=""
#检查MySQL主服务器的可用性
secondary_check_script=/usr/local/bin/masterha_secondary_check -s 192.168.29.143 -s 192.168.29.142 -s 192.168.29.144
#发生故障后关闭故障主机的脚本路径
shutdown_script=""
#ssh的用户
ssh_user=root
[server1]
hostname=192.168.29.143
port=3306
[server2]
hostname=192.168.29.142
port=3306
[server3]
hostname=192.168.29.144
port=3306
#设置候选master
candidate_master=1
#设置当slave的relay_logs落后master达到100M时不会选择此slave作为新的master
check_repl_delay=0
编写飘移脚本
#创建脚本目录
[root@node1 ~]# mkdir /usr/loca/mha/scripts
#编写自动ip飘移脚本master_ip_failover
[root@node1 ~]#vi /usr/local/mha/scripts/master_ip_failover
#!/usr/bin/env perl
use strict;
use warnings FATAL => ‘all‘;
use Getopt::Long;
my (
$command, $ssh_user, $orig_master_host, $orig_master_ip,
$orig_master_port, $new_master_host, $new_master_ip, $new_master_port
);
my $vip = ‘192.168.29.121/24‘; # Virtual IP
my $key = "1";
my $int = "ens33";
my $ssh_start_vip = "/sbin/ifconfig $int:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig $int:$key down";
my $arp_effect = "/sbin/arping -Uq -s192.168.29.121 -I $int 192.168.29.254 -c 3"; # Virtual IP and gatway
GetOptions(
‘command=s‘ => \$command,
‘ssh_user=s‘ => \$ssh_user,
‘orig_master_host=s‘ => \$orig_master_host,
‘orig_master_ip=s‘ => \$orig_master_ip,
‘orig_master_port=i‘ => \$orig_master_port,
‘new_master_host=s‘ => \$new_master_host,
‘new_master_ip=s‘ => \$new_master_ip,
‘new_master_port=i‘ => \$new_master_port,
);
exit &main();
sub main {
print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";
if ( $command eq "stop" || $command eq "stopssh" ) {
# $orig_master_host, $orig_master_ip, $orig_master_port are passed.
# If you manage master ip address at global catalog database,
# invalidate orig_master_ip here.
my $exit_code = 1;
eval {
print "Disabling the VIP on old master: $orig_master_host \n";
&stop_vip();
$exit_code = 0;
};
if ($@) {
warn "Got Error: $@\n";
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "start" ) {
# all arguments are passed.
# If you manage master ip address at global catalog database,
# activate new_master_ip here.
# You can also grant write access (create user, set read_only=0, etc) here.
my $exit_code = 10;
eval {
print "Enabling the VIP - $vip on the new master - $new_master_host \n";
&start_vip();
$exit_code = 0;
};
if ($@) {
warn $@;
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "status" ) {
print "Checking the Status of the script.. OK \n";
#`ssh $ssh_user\@cluster1 \" $ssh_start_vip \"`;
exit 0;
}
else {
&usage();
exit 1;
}
}
sub start_vip() {
`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
sub stop_vip() {
return 0 unless ($ssh_user);
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}
#编写手动ip飘移脚本master_ip_online_change
[root@node1 ~]#vi /usr/local/mha/scripts/master_ip_online_change
#!/bin/bash
source /root/.bash_profile
vip=`echo ‘192.168.29.121/24‘` #设置VIP
key=`echo ‘1‘`
command=`echo "$1" | awk -F = ‘{print $2}‘`
orig_master_host=`echo "$2" | awk -F = ‘{print $2}‘`
new_master_host=`echo "$7" | awk -F = ‘{print $2}‘`
orig_master_ssh_user=`echo "${12}" | awk -F = ‘{print $2}‘`
new_master_ssh_user=`echo "${13}" | awk -F = ‘{print $2}‘`
#要求服务的网卡识别名一样(本机为ens33)
stop_vip=`echo "ssh root@$orig_master_host /usr/sbin/ifconfig ens33:$key down"`
start_vip=`echo "ssh root@$new_master_host /usr/sbin/ifconfig ens33:$key $vip"`
if [ $command = ‘stop‘ ]
then
echo -e "\n\n\n****************************\n"
echo -e "Disabled thi VIP - $vip on old master: $orig_master_host \n"
$stop_vip
if [ $? -eq 0 ]
then
echo "Disabled the VIP successfully"
else
echo "Disabled the VIP failed"
fi
echo -e "***************************\n\n\n"
fi
if [ $command = ‘start‘ -o $command = ‘status‘ ]
then
echo -e "\n\n\n*************************\n"
echo -e "Enabling the VIP - $vip on new master: $new_master_host \n"
$start_vip
if [ $? -eq 0 ]
then
echo "Enabled the VIP successfully"
else
echo "Enabled the VIP failed"
fi
echo -e "***************************\n\n\n"
fi
#为脚本添加执行权限
[root@node1 scripts]# chmod +x master_ip_failover
[root@node1 scripts]# chmod +x master_ip_online_change
验证MHA状态
#验证SSH状态
[root@node1 ~]# masterha_check_ssh --conf=/usr/local/mha/mha.cnf
[info] All SSH connection tests passed successfully.
#验证数据库集群状态
[root@node1 ~]# masterha_check_repl --conf=/usr/local/mha/mha.cnf
MySQL Replication Health is OK.
启动MHA集群
node1结点添加虚拟IP
[root@node1 ~]# ifconfig ens33:1 192.168.29.121/24
启动MHA进程
[root@node1 ~]# masterha_manager --conf=/usr/local/mha/mha.cnf > /tmp/mha_manager.log < /dev/null 2>&1 &
检查MHA集群运行状态
[root@node1 ~]# masterha_check_status --conf=/usr/local/mha/mha.cnf
mha (pid:2702) is running(0:PING_OK), master:192.168.29.143
#启动成功
高可用测试
宿主机通过VIP访问数据库
>mysql -uroot -p -h192.168.29.121
>mysql> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| mydb |
| mysql |
| performance_schema |
| sys |
+--------------------+
mysql> use mydb;
Database changed
mysql> select * from test;
+----+
| id |
+----+
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
+----+
5 rows in set (0.00 sec)
模拟node1(master)结点宕机
[root@node1 ~]# systemctl stop mysqld.service
#node3结点查看vip飘移情况
[root@node3 ~]# ip a
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
inet 192.168.29.121/24 brd 192.168.29.255 scope global secondary ens33:1
#可见VIP已经飘移到node3结点中,同时node2的master变成了node3
#宿主机再次通过VIP访问数据库
>mysql -u root -h 192.168.29.121 -p -e "select * from mydb.test;"
+----+
| id |
+----+
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
+----+
#高可用性可实现
node1(master)结点恢复正常
[root@node1 ~]# systemctl start mysqld.service
#需要把node1结点机器设置为node3的slave节点
[root@node3 ~]# mysql -u root -p -e "show master status\G;"
*************************** 1. row ***************************
File: binlog.000025
Position: 2225
Binlog_Do_DB: mydb
Binlog_Ignore_DB: mysql
Executed_Gtid_Set:
#node1结点配置
mysql> stop slave;
mysql> change master to
-> master_host=‘192.168.29.144‘,
-> master_user=‘repl‘,
-> master_password=‘your_password‘,
-> master_log_file=‘binlog.000025‘,
-> master_log_pos=2225;
mysql> start slave;
mysql> show slave status\G;
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
在线切换master
[root@node1 ~]# masterha_master_switch --conf=/usr/local/mha/mha.cnf --master_state=alive --new_master_host=192.168.29.143 --orig_master_is_new_slave --running_updates_limit=10000 --interactive=0
[info] Switching master to 192.168.29.143(192.168.29.143:3306) completed successfully.
#node1结点查看VIP
[root@node1 ~]# ip a
ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
inet 192.168.29.121/24 brd 192.168.29.255 scope global secondary ens33:1
#node2和node3的master也变回了node1
#宿主机再次通过VIP访问数据库
>mysql -u root -h 192.168.29.121 -p -e "select * from mydb.test;"
+----+
| id |
+----+
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
+----+
注意:
1)当node1结点宕机后,node2和node3结点会继续工作,此时若产生数据写入或删除则先不要执行在线切换master,要确保node1中的数据与node2、node3完全一致才可执行在线切换master,否则可能会导致最新的数据丢失
2)在线切换master完成后需要把workdir中的保存文件删除,以免影响MHA使用
saved_master_binlog_from_192.168.29.143_3306_20200608100840.binlog
mha.failover.complete
3)在线切换完成后,需要重新启动MHA进程
[root@node1 ~]# masterha_manager --conf=/usr/local/mha/mha.cnf > /tmp/mha_manager.log < /dev/null 2>&1 &
#检查状态
[root@node1 ~]# masterha_check_status --conf=/usr/local/mha/mha.cnf