实现MHA高可用
环境准备
MHA-manager:10.0.0.7
Master:10.0.0.18
Slave1:10.0.0.28
Slave2:10.0.0.38
#关闭防火墙
#禁用selinux
#时间同步
MHA部分
所有节点基于key验证
#在MHA-manager执行
[root@MHA-manager ~]#ssh-keygen
[root@MHA-manager ~]#ssh-copy-id 127.0.0.1
[root@MHA-manager ~]#rsync -av /root/.ssh 10.0.0.18:/root
[root@MHA-manager ~]#rsync -av /root/.ssh 10.0.0.28:/root
[root@MHA-manager ~]#rsync -av /root/.ssh 10.0.0.38:/root
在MHA-manager节点安装mha4mysql-manager和mha4mysql-node两个包
#注意:建议先装node包.亲自踩的坑,如果现状manager包,会报错提示缺少各种依赖包
[root@MHA-manager ~]#yum -y install mha4mysql-node-0.58-0.el7.centos.noarch.rpm
[root@MHA-manager ~]#yum -y install mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
在其他节点上安装mha4mysql-manager-0.58-0.el7.centos.noarch.rpm包
yum -y install mha4mysql-node-0.58-0.el7.centos.noarch.rpm
在管理节点建立配置文件
[root@MHA-manager ~]#mkdir /data/mastermha
[root@MHA-manager ~]#vi /etc/mastermha/app1.cnf
[server default]
user=mhauser ##用于远程连接MySQL所有节点的用户,需要有管理员的权限
password=magedu
manager_workdir=/data/mastermha/app1/ #目录会自动生成,无需手动创建
manager_log=/data/mastermha/app1/manager.log
remote_workdir=/data/mastermha/app1/
ssh_user=root #用于实现远程ssh基于KEY的连接,访问二进制日志
repl_user=repluser #主从复制的用户信息
repl_password=repluser #主从复制的用户信息
ping_interval=1 #健康性检查的时间间隔
master_ip_failover_script=/usr/local/bin/master_ip_failover #切换VIP的perl脚本
report_script=/usr/local/bin/sendmail.sh #当执行报警脚本
check_repl_delay=0 #默认如果slave中从库落后主库relaylog超过100M,主库不会选择这个从库为新的master,因为这个从库进行恢复需要很长的时间.通过这个参数,mha触发主从切换的时候会忽略复制的延时,通过check_repl_delay=0这个参数,mha触发主从切换时会忽略复制的延时,对于设置candidate_master=1的从库非常有用,这样确保这个从库一定能成为最新的master
master_binlog_dir=/data/mysql/ #指定二进制日志存放的目录,mha4mysql-manager-0.58必须指定,之前版本不需要指定
[server1]
hostname=10.0.0.18
candidate_master=1 #设置为优先候选master,即使不是集群中事件最新的slave,也会优先当master
[server2]
hostname=10.0.0.28
candidate_master=1
[server3]
hostname=10.0.0.38
脚本准备
#邮件报警脚本
[root@MHA-manager ~]# cat /usr/local/bin/sendmail.sh
#!/bin/bash
echo "mysql is down" | mail -s "MHA Warning"
[root@MHA-manager ~]#chmod +x /usr/local/bin/sendmail.sh
#虚拟ip漂移脚本
[root@MHA-manager ~]# cat /usr/local/bin/master_ip_failover
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
my (
$command, $ssh_user, $orig_master_host, $orig_master_ip,
$orig_master_port, $new_master_host, $new_master_ip, $new_master_port
);
my $vip = '10.0.0.100/24'; #设置Virtual IP
my $gateway = '10.0.0.100'; #网关Gateway IP
my $interface = 'eth0'; #指定VIP所在网卡
my $key = "1";
my $ssh_start_vip = "/sbin/ifconfig $interface:$key $vip;/sbin/arping -I
$interface -c 3 -s $vip $gateway >/dev/null 2>&1";
my $ssh_stop_vip = "/sbin/ifconfig $interface:$key down";
GetOptions(
'command=s' => \$command,
'ssh_user=s' => \$ssh_user,
'orig_master_host=s' => \$orig_master_host,
'orig_master_ip=s' => \$orig_master_ip,
'orig_master_port=i' => \$orig_master_port,
'new_master_host=s' => \$new_master_host,
'new_master_ip=s' => \$new_master_ip,
'new_master_port=i' => \$new_master_port,
);
exit &main();
sub main {
print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";
if ( $command eq "stop" || $command eq "stopssh" ) {
# $orig_master_host, $orig_master_ip, $orig_master_port are passed.
# If you manage master ip address at global catalog database,
# invalidate orig_master_ip here.
my $exit_code = 1;
eval {
print "Disabling the VIP on old master: $orig_master_host \n";
&stop_vip();
$exit_code = 0;
};
if ($@) {
warn "Got Error: $@\n";
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "start" ) {
# all arguments are passed.
# If you manage master ip address at global catalog database,
# activate new_master_ip here.
# You can also grant write access (create user, set read_only=0, etc) here.
my $exit_code = 10;
eval {
print "Enabling the VIP - $vip on the new master - $new_master_host \n";
&start_vip();
$exit_code = 0;
};
if ($@) {
warn $@;
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "status" ) {
print "Checking the Status of the script.. OK \n";
`ssh $ssh_user\@$orig_master_host \" $ssh_start_vip \"`;
exit 0;
}
else {
&usage();
exit 1;
}
}
# A simple system call that enable the VIP on the new master
sub start_vip() {
`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIP on the old_master
sub stop_vip() {
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --
orig_master_host=host --orig_master_ip=ip --orig_master_port=port --
new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}
[root@MHA-manager ~]#chmod +x /usr/local/bin/master_ip_failover
主从架构
mysql5.7.30安装脚本
[root@centos8 ~]# vi install_mysql5.7or8.0_for_centos.sh
#!/bin/bash
#
#********************************************************************
#Author: wangxiaochun
#QQ: 29308620
#Date: 2020-02-12
#FileName: install_mysql5.7_for_centos.sh
#URL: http://www.magedu.com
#Description: The test script
#Copyright (C): 2020 All rights reserved
#********************************************************************
#MySQL Download URL: https://dev.mysql.com/get/Downloads/MySQL-5.7/mysql-5.7.29-linux-glibc2.12-x86_64.tar.gz
. /etc/init.d/functions
SRC_DIR=`pwd`
MYSQL='mysql-5.7.30-linux-glibc2.12-x86_64.tar.gz'
COLOR='echo -e \E[01;31m'
END='\E[0m'
MYSQL_ROOT_PASSWORD=magedu
check (){
if [ $UID -ne 0 ]; then
action "当前用户不是root,安装失败" false
exit 1
fi
cd $SRC_DIR
if [ ! -e $MYSQL ];then
$COLOR"缺少${MYSQL}文件"$END
$COLOR"请将相关软件放在${SRC_DIR}目录下"$END
exit
elif [ -e /usr/local/mysql ];then
action "数据库已存在,安装失败" false
exit
else
return
fi
}
install_mysql(){
$COLOR"开始安装MySQL数据库..."$END
yum -y -q install libaio numactl-libs libaio &> /dev/null
cd $SRC_DIR
tar xf $MYSQL -C /usr/local/
MYSQL_DIR=`echo $MYSQL| sed -nr 's/^(.*[0-9]).*/\1/p'`
ln -s /usr/local/$MYSQL_DIR /usr/local/mysql
chown -R root.root /usr/local/mysql/
id mysql &> /dev/null || { useradd -s /sbin/nologin -r mysql ; action "创建mysql用户"; }
echo 'PATH=/usr/local/mysql/bin/:$PATH' > /etc/profile.d/mysql.sh
. /etc/profile.d/mysql.sh
ln -s /usr/local/mysql/bin/* /usr/bin/
cat > /etc/my.cnf <<-EOF
[mysqld]
server-id=1
log-bin
datadir=/data/mysql
socket=/data/mysql/mysql.sock
log-error=/data/mysql/mysql.log
pid-file=/data/mysql/mysql.pid
[client]
socket=/data/mysql/mysql.sock
EOF
mysqld --initialize --user=mysql --datadir=/data/mysql
cp /usr/local/mysql/support-files/mysql.server /etc/init.d/mysqld
chkconfig --add mysqld
chkconfig mysqld on
service mysqld start
[ $? -ne 0 ] && { $COLOR"数据库启动失败,退出!"$END;exit; }
MYSQL_OLDPASSWORD=`awk '/A temporary password/{print $NF}' /data/mysql/mysql.log`
mysqladmin -uroot -p$MYSQL_OLDPASSWORD password $MYSQL_ROOT_PASSWORD &>/dev/null
action "数据库安装完成"
}
check
install_mysql
Master节点
[root@Master ~]#vi /etc/my.cnf
[mysqld]
server-id=18
log-bin
skip_name_resolve=1
[root@Master ~]#mysql -uroot -pmagedu
mysql>grant replication slave on *.* to repluser@'10.0.0.%' identified by 'repluser'; #主从复制账号
mysql>grant all on *.* to mhauser@'10.0.0.%' identified by 'magedu'; #MHA管理账号
mysql>show master logs;
+--------------------+-----------+
| Log_name | File_size |
+--------------------+-----------+
| centos8-bin.000001 | 177 |
| centos8-bin.000002 | 998 |
+--------------------+-----------+
6 rows in set (0.00 sec)
Slave1节点
[root@Slave1 ~]#vi /etc/my.cnf
[mysqld]
server-id=28
log-bin=/data/mysql/mysql-bin
read-only
relay_log_purge=0
skip_name_resolve=1
mysql>CHANGE MASTER TO
MASTER_HOST='10.0.0.18',
MASTER_USER='repluser',
MASTER_PASSWORD='repluser',
MASTER_PORT=3306,
MASTER_LOG_FILE='centos8-bin.000002',
MASTER_LOG_POS=998;
mysql>start slave;
mysql>show slave status\G;
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 10.0.0.18
Master_User: repluser
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000001
Read_Master_Log_Pos: 154
Relay_Log_File: centos8-relay-bin.000002
Relay_Log_Pos: 320
Relay_Master_Log_File: mysql-bin.000001
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 154
Relay_Log_Space: 529
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 28
Master_UUID: 4cb0ba9e-0ea8-11eb-9a2b-000c29542d1a
Master_Info_File: /data/mysql/master.info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set:
Auto_Position: 0
Replicate_Rewrite_DB:
Channel_Name:
Master_TLS_Version:
1 row in set (0.00 sec)
Slave2节点
[root@Slave2 ~]#vi /etc/my.cnf
[mysqld]
server-id=38
log-bin=/data/mysql/mysql-bin
read-only
relay_log_purge=0
skip_name_resolve=1
mysql>CHANGE MASTER TO
MASTER_HOST='10.0.0.18',
MASTER_USER='repluser',
MASTER_PASSWORD='repluser',
MASTER_PORT=3306,
MASTER_LOG_FILE='centos8-bin.000002',
MASTER_LOG_POS=998;
mysql>start slave;
mysql>show slave status\G;
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 10.0.0.18
Master_User: repluser
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000001
Read_Master_Log_Pos: 154
Relay_Log_File: centos8-relay-bin.000002
Relay_Log_Pos: 320
Relay_Master_Log_File: mysql-bin.000001
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 154
Relay_Log_Space: 529
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 28
Master_UUID: 4cb0ba9e-0ea8-11eb-9a2b-000c29542d1a
Master_Info_File: /data/mysql/master.info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set:
Auto_Position: 0
Replicate_Rewrite_DB:
Channel_Name:
Master_TLS_Version:
1 row in set (0.00 sec)
检差MHA环境
[root@MHA-manager ~]#masterha_check_ssh --conf=/etc/mastermha/app1.cnf
[root@MHA-manager ~]#masterha_check_repl --conf=/etc/mastermha/app1.cnf
#查看状态
[root@MHA-manager ~]#masterha_check_status --conf=/etc/mastermha/app1.cnf
模拟故障
当master宕机后,MHA会自动退出
#前台方式运行MHA
[root@MHA-manager ~]#masterha_manager --conf=/etc/mastermha/app1.cnf
Wed Jun 17 10:02:58 2020 - [warning] Global configuration file
/etc/masterha_default.cnf not found. Skipping.
Wed Jun 17 10:02:58 2020 - [info] Reading application default configuration
from /etc/mastermha/app1.cnf..
Wed Jun 17 10:02:58 2020 - [info] Reading server configuration from
/etc/mastermha/app1.cnf..
Wed Jun 17 10:06:37 2020 - [warning] Global configuration file
/etc/masterha_default.cnf not found. Skipping.
Wed Jun 17 10:06:37 2020 - [info] Reading application default configuration
from /etc/mastermha/app1.cnf..
Wed Jun 17 10:06:37 2020 - [info] Reading server configuration from
/etc/mastermha/app1.cnf..
#查看日志
[root@MHA-manager ~]#tail -f /data/mastermha/app1/manager.log
10.0.0.28(10.0.0.28:3306): OK: Applying all logs succeeded.
10.0.0.28(10.0.0.28:3306): OK: Activated master IP address.
10.0.0.38(10.0.0.38:3306): This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
10.0.0.38(10.0.0.38:3306): OK: Applying all logs succeeded. Slave started, replicating from 10.0.0.28(10.0.0.28:3306)
10.0.0.28(10.0.0.28:3306): Resetting slave info succeeded.
Master failover to 10.0.0.28(10.0.0.28:3306) completed successfully.
验证VIP漂移到新的Master上
[root@Slave1 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
link/ether 00:0c:29:54:2d:1a brd ff:ff:ff:ff:ff:ff
inet 10.0.0.28/24 brd 10.0.0.255 scope global noprefixroute eth0
valid_lft forever preferred_lft forever
inet 10.0.0.100/24 brd 10.0.0.255 scope global secondary eth0:1 #vip漂移成功
valid_lft forever preferred_lft forever
inet6 fe80::c8d7:3b5b:24f5:9ad9/64 scope link dadfailed tentative noprefixroute
valid_lft forever preferred_lft forever
inet6 fe80::bfa2:9260:f5b8:fae2/64 scope link dadfailed tentative noprefixroute
valid_lft forever preferred_lft forever
inet6 fe80::5ba4:2bc1:c4a1:2eb0/64 scope link noprefixroute
valid_lft forever preferred_lft forever
显示从节点复制线程
mysql> show slave status\G;
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 10.0.0.28 #现在由10.0.0.28当Master节点
Master_User: repluser
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql-bin.000001
Read_Master_Log_Pos: 154
Relay_Log_File: centos8-relay-bin.000002
Relay_Log_Pos: 320
Relay_Master_Log_File: mysql-bin.000001
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 154
Relay_Log_Space: 529
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 28
Master_UUID: 4cb0ba9e-0ea8-11eb-9a2b-000c29542d1a
Master_Info_File: /data/mysql/master.info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set:
Auto_Position: 0
Replicate_Rewrite_DB:
Channel_Name:
Master_TLS_Version:
1 row in set (0.00 sec)
如果想要再次运行MHA,需要删除下面的文件
[root@MHA-manager ~]# ls /data/mastermha/app1/app1.failover.complete -l
-rw-r--r--. 1 root root 0 Oct 16 01:15 /data/mastermha/app1/app1.failover.complete
[root@MHA-manager ~]# rm -rf /data/mastermha/app1/app1.failover.complete