Linux - MHA 配置实验

# 本次实验四台机器

# 10.0.0.21 -  centos 7 - MHA服务器

# 10.0.0.15,16,17 - Centos 8 - Mysql Master, Slave1, Slave2

# 先从gitbub大神那里下载mha manage包和node包

Release mha4mysql-manager-0.58 · yoshinorim/mha4mysql-manager · GitHub

Release mha4mysql-node-0.58 · yoshinorim/mha4mysql-node · GitHub

# MHA服务器,需要都安装这两个

# mysql服务器需要安装node

# 实现基于key验证,可用脚本实现(参考其他大神作品写的)

[23:31:39 root@centos8 ~]#cat /data/push_ssh_key.sh 
#!/bin/bash
#
#**************************************************************************************
#Author:                                   Noise Lys
#QQ:                                       578110218
#Date:                                     2021-05-27
#Filename:                                 push_ssh_key.sh
#URL:                                      https://www.cnblogs.com/noise/
#Description:                              The test script
#Copyright (C):                            2021 All rights reserved
#**************************************************************************************
PASS=123456
#设置网段最后的地址,4-255之间,越小扫描越快
END=254

#挑选出当前IP
IP=`ip a s eth0 | awk -F[ /]+ NR==3{print $3}`
#挑选出当前IP的前3位
NET=${IP%.*}.

#先删除原来的key和log file
rm -f /root/.ssh/id_rsa
[ -e ./SCANIP.log ] && rm -f SCANIP.log

#循环ping当前IP所有的主机从10.0.0.3~10.0.0.254,ping成功将其IP存入log
for((i=3;i<="$END";i++));do
ping -c 1 -w 1  ${NET}$i &> /dev/null  && echo "${NET}$i" >> SCANIP.log &
done
wait

#生成key
ssh-keygen -P "" -f /root/.ssh/id_rsa
#查询是否存在包sshpass,用来传输密码
rpm -q sshpass || yum -y install sshpass
#传输密码到IP
sshpass -p $PASS ssh-copy-id -o StrictHostKeyChecking=no $IP

#将log里面的所有IP存入变量,以便后续遍历传输密码和key
AliveIP=(`cat SCANIP.log`)
for n in ${AliveIP[*]};do
sshpass -p $PASS scp -o StrictHostKeyChecking=no -r /root/.ssh root@${n}:
done

#把.ssh/known_hosts拷贝到所有主机,使它们第一次互相访问时不需要输入回车
for n in ${AliveIP[*]};do
scp /root/.ssh/known_hosts ${n}:.ssh/
done

 

# 编辑配置文件

[23:33:11 root@centos7 ~]#cat -A /etc/mastermha/app1.cnf (需要提前建立/etc/mastermha文件夹)
[server default]$
user=mhauser$
password=123456$
manager_workdir=/data/mastermha/app1/$
manager_log=/data/mastermha/app1/manager.log$
remote_workdir=/data/mastermha/app1/$
ssh_user=root$
repl_user=repluser$
repl_password=123456$
ping_interval=1$
$
master_ip_failover_script=/usr/local/bin/master_ip_failover$
report_script=/usr/local/bin/sendmail.sh$
master_binlog_dir=/data/mysql/$
$
[server1]$
hostname=10.0.0.15$
candidate_master=1$
$
[server2]$
hostname=10.0.0.16$
candidate_master=1$
$
[server3]$
hostname=10.0.0.17$

# 邮件服务

[23:33:22 root@centos7 ~]#cat /usr/local/bin/sendmail.sh 
#!/bin/bash
#
#**************************************************************************************
#Author:                                   Noise Lys
#QQ:                                       578110218
#Date:                                     2021-06-27
#Filename:                                 /usr/local/bin/sendmail.sh
#URL:                                      https://www.cnblogs.com/noise/
#Description:                              The test script
#Copyright (C):                            2021 All rights reserved
#**************************************************************************************
echo "MySQL is down" | mail -s "MHA Warning" root@centos720091.com

[23:36:20 root@centos7 ~]#chmod +x /usr/local/bin/sendmail.sh

 

# vip飘逸脚本(类似keepalived)

#!/usr/bin/env perl
use strict;
use warnings FATAL => all; 
use Getopt::Long; 
my ( 
$command, $ssh_user, $orig_master_host, $orig_master_ip, 
$orig_master_port, $new_master_host, $new_master_ip, $new_master_port 
); 
my $vip = 10.0.0.100/24; 
my $gateway = 10.0.0.254; 
my $interface = eth0;    
my $key = "1"; 
my $ssh_start_vip = "/sbin/ifconfig $interface:$key $vip;/sbin/arping -I $interface -c 3 -s $vip $gateway >/dev/null 2>&1"; 
my $ssh_stop_vip = "/sbin/ifconfig $interface:$key down"; 
GetOptions( 
command=s => \$command, 
ssh_user=s => \$ssh_user, 
orig_master_host=s => \$orig_master_host, 
orig_master_ip=s => \$orig_master_ip, 
orig_master_port=i => \$orig_master_port, 
new_master_host=s => \$new_master_host, 
new_master_ip=s => \$new_master_ip,new_master_port=i => \$new_master_port, 
); 

exit &main(); 
sub main { 
print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n"; 
if ( $command eq "stop" || $command eq "stopssh" ) { 
my $exit_code = 1;
eval { 
print "Disabling the VIP on old master: $orig_master_host \n"; 
&stop_vip(); 
$exit_code = 0; 
}; 

if ($@) { 
warn "Got Error: $@\n"; 
exit $exit_code; 
} 

exit $exit_code;
} 

elsif ( $command eq "start" ) { 
my $exit_code = 10; 
eval { 
print "Enabling the VIP - $vip on the new master - $new_master_host \n"; 
&start_vip(); 
$exit_code = 0; 
}; 

if ($@) { 
warn $@; 
exit $exit_code; 
} 

exit $exit_code; 

} 

elsif ( $command eq "status" ) { 

print "Checking the Status of the script.. OK \n"; 

`ssh $ssh_user\@$orig_master_host \" $ssh_start_vip \"`; 

exit 0; 

} 

else { 

&usage(); 

exit 1; 

}

} 

sub start_vip() { 

`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`; 

} 

sub stop_vip() { 
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`; 
} 

sub usage { 
print"Usage: master_ip_failover --command=start|stop|stopssh|status -- 

orig_master_host=host --orig_master_ip=ip --orig_master_port=port -- 

new_master_host=host --new_master_ip=ip --new_master_port=port\n"; 

} 
[23:38:46 root@centos7 ~]#chmod +x /usr/local/bin/master_ip_failover

 

# mysql master 配置文件

[23:20:04 root@centos8 ~]#cat /etc/my.cnf.d/mysql-server.cnf 
#
# This group are read by MySQL server.
# Use it for options that only the server (but not clients) should see
#
# For advice on how to change settings please see
# http://dev.mysql.com/doc/refman/en/server-configuration-defaults.html

# Settings user and group are ignored when systemd is used.
# If you need to run mysqld under a different user or group,
# customize your systemd unit file for mysqld according to the
# instructions in http://fedoraproject.org/wiki/Systemd

[mysqld]
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
log-error=/var/log/mysql/mysqld.log
pid-file=/run/mysqld/mysqld.pid
server_id=15
log-bin=/data/mysql/mysql-bin
skip_name_resolve=1
general_log

# 并创建user和记录下当前logbin (主从需要)

mysql> create user repluser@10.0.0.% identified by magedu;
mysql> grant replication slave on *.* to repluser@10.0.0.%;
mysql> create user mhauser@10.0.0.% identified by magedu;
mysql> grant all on *.* to mhauser@10.0.0.%;

 

# mysql slave 配置文件

[23:40:04 root@centos8 ~]#cat /etc/my.cnf.d/mysql-server.cnf 
#
# This group are read by MySQL server.
# Use it for options that only the server (but not clients) should see
#
# For advice on how to change settings please see
# http://dev.mysql.com/doc/refman/en/server-configuration-defaults.html

# Settings user and group are ignored when systemd is used.
# If you need to run mysqld under a different user or group,
# customize your systemd unit file for mysqld according to the
# instructions in http://fedoraproject.org/wiki/Systemd

[mysqld]
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
log-error=/var/log/mysql/mysqld.log
pid-file=/run/mysqld/mysqld.pid
server_id=17 
log-bin=/data/mysql/mysql-bin
read_only
relay_log_purge=0
skip_name_resolve=1    
general_log

 

# 给mysql master设立vip

[22:38:12 root@centos8 ~]#ifconfig eth0:1 10.0.0.100/24
[22:39:48 root@centos8 ~]#ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    link/ether 00:0c:29:b7:3e:e4 brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.15/24 brd 10.0.0.255 scope global noprefixroute eth0
       valid_lft forever preferred_lft forever
    inet 10.0.0.100/24 brd 10.0.0.255 scope global secondary eth0:1
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:feb7:3ee4/64 scope link 
       valid_lft forever preferred_lft forever

#在manager上检查mha配置

masterha_check_ssh --conf=/etc/mastermha/app1.cnf
masterha_check_repl --conf=/etc/mastermha/app1.cnf
 
#开启MHA,默认是前台运行
nohup masterha_manager --conf=/etc/mastermha/app1.cnf &> /dev/null
#查看状态
masterha_check_status --conf=/etc/mastermha/app1.cnf  
 
# master 上面检查,会不断有SELECT 1 As Value
tail -f /var/lib/mysql/centos8.log

2021-06-28T15:19:37.345677Z 13 Query SELECT 1 As Value
2021-06-28T15:19:38.339499Z 13 Query SELECT 1 As Value
2021-06-28T15:19:39.340015Z 13 Query SELECT 1 As Value
2021-06-28T15:19:40.340974Z 13 Query SELECT 1 As Value
2021-06-28T15:19:41.340748Z 13 Query SELECT 1 As Value
2021-06-28T15:19:42.341860Z 13 Query SELECT 1 As Value
2021-06-28T15:19:43.343077Z 13 Query SELECT 1 As Value
2021-06-28T15:19:44.343118Z 13 Query SELECT 1 As Value
2021-06-28T15:19:45.344224Z 13 Query SELECT 1 As Value
2021-06-28T15:19:46.344526Z 13 Query SELECT 1 As Value
2021-06-28T15:19:47.345494Z 13 Query SELECT 1 As Value
2021-06-28T15:19:48.347683Z 13 Query SELECT 1 As Value
2021-06-28T15:19:48.801340Z 8 Quit
2021-06-28T15:19:49.348587Z 13 Query SELECT 1 As Value
2021-06-28T15:19:50.348986Z 13 Query SELECT 1 As Value
2021-06-28T15:19:51.349671Z 13 Query SELECT 1 As Value
2021-06-28T15:19:52.351619Z 13 Query SELECT 1 As Value
2021-06-28T15:19:53.351102Z 13 Query SELECT 1 As Value
2021-06-28T15:19:54.351510Z 13 Query SELECT 1 As Value
2021-06-28T15:19:55.353596Z 13 Query SELECT 1 As Value
2021-06-28T15:19:56.361825Z 13 Query SELECT 1 As Value

#当mha运行后,down掉master

[23:19:07 root@centos7 ~]#tail -f /data/mastermha/app1/manager.log
Mon Jun 28 23:19:37 2021 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes.
Mon Jun 28 23:19:37 2021 - [info] Starting ping health check on 10.0.0.15(10.0.0.15:3306)..
Mon Jun 28 23:19:37 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesnt respond..
Mon Jun 28 23:19:56 2021 - [warning] Got error on MySQL select ping: 1053 (Server shutdown in progress)
Mon Jun 28 23:19:56 2021 - [info] Executing SSH check script: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/ --output_file=/data/mastermha/app1//save_binary_logs_test --manager_version=0.58 --binlog_prefix=mysql-bin
Mon Jun 28 23:19:56 2021 - [info] HealthCheck: SSH to 10.0.0.15 is reachable.
Mon Jun 28 23:19:57 2021 - [warning] Got error on MySQL connect: 2003 (Cant connect to MySQL server on 10.0.0.15 (111))
Mon Jun 28 23:19:57 2021 - [warning] Connection failed 2 time(s)..
Mon Jun 28 23:19:58 2021 - [warning] Got error on MySQL connect: 2003 (Cant connect to MySQL server on 10.0.0.15 (111))
Mon Jun 28 23:19:58 2021 - [warning] Connection failed 3 time(s)..
Mon Jun 28 23:19:59 2021 - [warning] Got error on MySQL connect: 2003 (Cant connect to MySQL server on 10.0.0.15 (111))
Mon Jun 28 23:19:59 2021 - [warning] Connection failed 4 time(s)..
Mon Jun 28 23:19:59 2021 - [warning] Master is not reachable from health checker!
Mon Jun 28 23:19:59 2021 - [warning] Master 10.0.0.15(10.0.0.15:3306) is not reachable!
Mon Jun 28 23:19:59 2021 - [warning] SSH is reachable.
Mon Jun 28 23:19:59 2021 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/mastermha/app1.cnf again, and trying to connect to all servers to check server status..
Mon Jun 28 23:19:59 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jun 28 23:19:59 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf..
Mon Jun 28 23:19:59 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf..
Mon Jun 28 23:20:00 2021 - [info] GTID failover mode = 0
Mon Jun 28 23:20:00 2021 - [info] Dead Servers:
Mon Jun 28 23:20:00 2021 - [info]   10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:00 2021 - [info] Alive Servers:
Mon Jun 28 23:20:00 2021 - [info]   10.0.0.16(10.0.0.16:3306)
Mon Jun 28 23:20:00 2021 - [info]   10.0.0.17(10.0.0.17:3306)
Mon Jun 28 23:20:00 2021 - [info] Alive Slaves:
Mon Jun 28 23:20:00 2021 - [info]   10.0.0.16(10.0.0.16:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:00 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:00 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Mon Jun 28 23:20:00 2021 - [info]   10.0.0.17(10.0.0.17:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:00 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:00 2021 - [info] Checking slave configurations..
Mon Jun 28 23:20:00 2021 - [info]  read_only=1 is not set on slave 10.0.0.16(10.0.0.16:3306).
Mon Jun 28 23:20:00 2021 - [info] Checking replication filtering settings..
Mon Jun 28 23:20:00 2021 - [info]  Replication filtering check ok.
Mon Jun 28 23:20:00 2021 - [info] Master is down!
Mon Jun 28 23:20:00 2021 - [info] Terminating monitoring script.
Mon Jun 28 23:20:00 2021 - [info] Got exit code 20 (Master dead).
Mon Jun 28 23:20:00 2021 - [info] MHA::MasterFailover version 0.58.
Mon Jun 28 23:20:00 2021 - [info] Starting master failover.
Mon Jun 28 23:20:00 2021 - [info] 
Mon Jun 28 23:20:00 2021 - [info] * Phase 1: Configuration Check Phase..
Mon Jun 28 23:20:00 2021 - [info] 
Mon Jun 28 23:20:01 2021 - [info] GTID failover mode = 0
Mon Jun 28 23:20:01 2021 - [info] Dead Servers:
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:01 2021 - [info] Checking master reachability via MySQL(double check)...
Mon Jun 28 23:20:01 2021 - [info]  ok.
Mon Jun 28 23:20:01 2021 - [info] Alive Servers:
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.16(10.0.0.16:3306)
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.17(10.0.0.17:3306)
Mon Jun 28 23:20:01 2021 - [info] Alive Slaves:
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.16(10.0.0.16:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:01 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:01 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.17(10.0.0.17:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:01 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:01 2021 - [info] Starting Non-GTID based failover.
Mon Jun 28 23:20:01 2021 - [info] 
Mon Jun 28 23:20:01 2021 - [info] ** Phase 1: Configuration Check Phase completed.
Mon Jun 28 23:20:01 2021 - [info] 
Mon Jun 28 23:20:01 2021 - [info] * Phase 2: Dead Master Shutdown Phase..
Mon Jun 28 23:20:01 2021 - [info] 
Mon Jun 28 23:20:01 2021 - [info] Forcing shutdown so that applications never connect to the current master..
Mon Jun 28 23:20:01 2021 - [info] Executing master IP deactivation script:
Mon Jun 28 23:20:01 2021 - [info]   /usr/local/bin/master_ip_failover --orig_master_host=10.0.0.15 --orig_master_ip=10.0.0.15 --orig_master_port=3306 --command=stopssh --ssh_user=root  


IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.100/24;/sbin/arping -I eth0 -c 3 -s 10.0.0.100/24 10.0.0.254 >/dev/null 2>&1===

Disabling the VIP on old master: 10.0.0.15 
Mon Jun 28 23:20:01 2021 - [info]  done.
Mon Jun 28 23:20:01 2021 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Mon Jun 28 23:20:01 2021 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Mon Jun 28 23:20:01 2021 - [info] 
Mon Jun 28 23:20:01 2021 - [info] * Phase 3: Master Recovery Phase..
Mon Jun 28 23:20:01 2021 - [info] 
Mon Jun 28 23:20:01 2021 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Mon Jun 28 23:20:01 2021 - [info] 
Mon Jun 28 23:20:01 2021 - [info] The latest binary log file/position on all slaves is mysql-bin.000005:156
Mon Jun 28 23:20:01 2021 - [info] Latest slaves (Slaves that received relay log files to the latest):
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.16(10.0.0.16:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:01 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:01 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.17(10.0.0.17:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:01 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:01 2021 - [info] The oldest binary log file/position on all slaves is mysql-bin.000005:156
Mon Jun 28 23:20:01 2021 - [info] Oldest slaves:
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.16(10.0.0.16:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:01 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:01 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Mon Jun 28 23:20:01 2021 - [info]   10.0.0.17(10.0.0.17:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:01 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:01 2021 - [info] 
Mon Jun 28 23:20:01 2021 - [info] * Phase 3.2: Saving Dead Masters Binlog Phase..
Mon Jun 28 23:20:01 2021 - [info] 
Mon Jun 28 23:20:02 2021 - [info] Fetching dead masters binary logs..
Mon Jun 28 23:20:02 2021 - [info] Executing command on the dead master 10.0.0.15(10.0.0.15:3306): save_binary_logs --command=save --start_file=mysql-bin.000005  --start_pos=156 --binlog_dir=/data/mysql/ --output_file=/data/mastermha/app1//saved_master_binlog_from_10.0.0.15_3306_20210628232000.binlog --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.58
  Creating /data/mastermha/app1 if not exists..    ok.
 Concat binary/relay logs from mysql-bin.000005 pos 156 to mysql-bin.000005 EOF into /data/mastermha/app1//saved_master_binlog_from_10.0.0.15_3306_20210628232000.binlog ..
 Binlog Checksum enabled
  Dumping binlog format description event, from position 0 to 156.. ok.
  No need to dump effective binlog data from /data/mysql//mysql-bin.000005 (pos starts 156, filesize 156). Skipping.
 Binlog Checksum enabled
 /data/mastermha/app1//saved_master_binlog_from_10.0.0.15_3306_20210628232000.binlog has no effective data events.
Event not exists.
Mon Jun 28 23:20:03 2021 - [info] Additional events were not found from the orig master. No need to save.
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info] * Phase 3.3: Determining New Master Phase..
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info] Finding the latest slave that has all relay logs for recovering other slaves..
Mon Jun 28 23:20:03 2021 - [info] All slaves received relay logs to the same position. No need to resync each other.
Mon Jun 28 23:20:03 2021 - [info] Searching new master from slaves..
Mon Jun 28 23:20:03 2021 - [info]  Candidate masters from the configuration file:
Mon Jun 28 23:20:03 2021 - [info]   10.0.0.16(10.0.0.16:3306)  Version=8.0.21 (oldest major version between slaves) log-bin:enabled
Mon Jun 28 23:20:03 2021 - [info]     Replicating from 10.0.0.15(10.0.0.15:3306)
Mon Jun 28 23:20:03 2021 - [info]     Primary candidate for the new Master (candidate_master is set)
Mon Jun 28 23:20:03 2021 - [info]  Non-candidate masters:
Mon Jun 28 23:20:03 2021 - [info]  Searching from candidate_master slaves which have received the latest relay log events..
Mon Jun 28 23:20:03 2021 - [info] New master is 10.0.0.16(10.0.0.16:3306)
Mon Jun 28 23:20:03 2021 - [info] Starting master failover..
Mon Jun 28 23:20:03 2021 - [info] 
From:
10.0.0.15(10.0.0.15:3306) (current master)
 +--10.0.0.16(10.0.0.16:3306)
 +--10.0.0.17(10.0.0.17:3306)

To:
10.0.0.16(10.0.0.16:3306) (new master)
 +--10.0.0.17(10.0.0.17:3306)
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info] * Phase 3.4: New Master Diff Log Generation Phase..
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info]  This server has all relay logs. No need to generate diff files from the latest slave.
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info] * Phase 3.5: Master Log Apply Phase..
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info] *NOTICE: If any error happens from this phase, manual recovery is needed.
Mon Jun 28 23:20:03 2021 - [info] Starting recovery on 10.0.0.16(10.0.0.16:3306)..
Mon Jun 28 23:20:03 2021 - [info]  This server has all relay logs. Waiting all logs to be applied.. 
Mon Jun 28 23:20:03 2021 - [info]   done.
Mon Jun 28 23:20:03 2021 - [info]  All relay logs were successfully applied.
Mon Jun 28 23:20:03 2021 - [info] Getting new masters binlog name and position..
Mon Jun 28 23:20:03 2021 - [info]  mysql-bin.000002:1229
Mon Jun 28 23:20:03 2021 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST=10.0.0.16, MASTER_PORT=3306, MASTER_LOG_FILE=mysql-bin.000002, MASTER_LOG_POS=1229, MASTER_USER=repluser, MASTER_PASSWORD=xxx;
Mon Jun 28 23:20:03 2021 - [info] Executing master IP activate script:
Mon Jun 28 23:20:03 2021 - [info]   /usr/local/bin/master_ip_failover --command=start --ssh_user=root --orig_master_host=10.0.0.15 --orig_master_ip=10.0.0.15 --orig_master_port=3306 --new_master_host=10.0.0.16 --new_master_ip=10.0.0.16 --new_master_port=3306 --new_master_user=mhauser   --new_master_password=xxx
Unknown option: new_master_user
Unknown option: new_master_password


IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.100/24;/sbin/arping -I eth0 -c 3 -s 10.0.0.100/24 10.0.0.254 >/dev/null 2>&1===

Enabling the VIP - 10.0.0.100/24 on the new master - 10.0.0.16 
Mon Jun 28 23:20:03 2021 - [info]  OK.
Mon Jun 28 23:20:03 2021 - [info] ** Finished master recovery successfully.
Mon Jun 28 23:20:03 2021 - [info] * Phase 3: Master Recovery Phase completed.
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info] * Phase 4: Slaves Recovery Phase..
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info] * Phase 4.1: Starting Parallel Slave Diff Log Generation Phase..
Mon Jun 28 23:20:03 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info] -- Slave diff file generation on host 10.0.0.17(10.0.0.17:3306) started, pid: 66090. Check tmp log /data/mastermha/app1//10.0.0.17_3306_20210628232000.log if it takes time..
Mon Jun 28 23:20:04 2021 - [info] 
Mon Jun 28 23:20:04 2021 - [info] Log messages from 10.0.0.17 ...
Mon Jun 28 23:20:04 2021 - [info] 
Mon Jun 28 23:20:03 2021 - [info]  This server has all relay logs. No need to generate diff files from the latest slave.
Mon Jun 28 23:20:04 2021 - [info] End of log messages from 10.0.0.17.
Mon Jun 28 23:20:04 2021 - [info] -- 10.0.0.17(10.0.0.17:3306) has the latest relay log events.
Mon Jun 28 23:20:04 2021 - [info] Generating relay diff files from the latest slave succeeded.
Mon Jun 28 23:20:04 2021 - [info] 
Mon Jun 28 23:20:04 2021 - [info] * Phase 4.2: Starting Parallel Slave Log Apply Phase..
Mon Jun 28 23:20:04 2021 - [info] 
Mon Jun 28 23:20:04 2021 - [info] -- Slave recovery on host 10.0.0.17(10.0.0.17:3306) started, pid: 66092. Check tmp log /data/mastermha/app1//10.0.0.17_3306_20210628232000.log if it takes time..
Mon Jun 28 23:20:05 2021 - [info] 
Mon Jun 28 23:20:05 2021 - [info] Log messages from 10.0.0.17 ...
Mon Jun 28 23:20:05 2021 - [info] 
Mon Jun 28 23:20:04 2021 - [info] Starting recovery on 10.0.0.17(10.0.0.17:3306)..
Mon Jun 28 23:20:04 2021 - [info]  This server has all relay logs. Waiting all logs to be applied.. 
Mon Jun 28 23:20:04 2021 - [info]   done.
Mon Jun 28 23:20:04 2021 - [info]  All relay logs were successfully applied.
Mon Jun 28 23:20:04 2021 - [info]  Resetting slave 10.0.0.17(10.0.0.17:3306) and starting replication from the new master 10.0.0.16(10.0.0.16:3306)..
Mon Jun 28 23:20:04 2021 - [info]  Executed CHANGE MASTER.
Mon Jun 28 23:20:04 2021 - [info]  Slave started.
Mon Jun 28 23:20:05 2021 - [info] End of log messages from 10.0.0.17.
Mon Jun 28 23:20:05 2021 - [info] -- Slave recovery on host 10.0.0.17(10.0.0.17:3306) succeeded.
Mon Jun 28 23:20:05 2021 - [info] All new slave servers recovered successfully.
Mon Jun 28 23:20:05 2021 - [info] 
Mon Jun 28 23:20:05 2021 - [info] * Phase 5: New master cleanup phase..
Mon Jun 28 23:20:05 2021 - [info] 
Mon Jun 28 23:20:05 2021 - [info] Resetting slave info on the new master..
Mon Jun 28 23:20:05 2021 - [info]  10.0.0.16: Resetting slave info succeeded.
Mon Jun 28 23:20:05 2021 - [info] Master failover to 10.0.0.16(10.0.0.16:3306) completed successfully.
Mon Jun 28 23:20:05 2021 - [info] 

----- Failover Report -----

app1: MySQL Master failover 10.0.0.15(10.0.0.15:3306) to 10.0.0.16(10.0.0.16:3306) succeeded

Master 10.0.0.15(10.0.0.15:3306) is down!

Check MHA Manager logs at centos7.2009:/data/mastermha/app1/manager.log for details.

Started automated(non-interactive) failover.
Invalidated master IP address on 10.0.0.15(10.0.0.15:3306)
The latest slave 10.0.0.16(10.0.0.16:3306) has all relay logs for recovery.
Selected 10.0.0.16(10.0.0.16:3306) as a new master.
10.0.0.16(10.0.0.16:3306): OK: Applying all logs succeeded.
10.0.0.16(10.0.0.16:3306): OK: Activated master IP address.
10.0.0.17(10.0.0.17:3306): This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
10.0.0.17(10.0.0.17:3306): OK: Applying all logs succeeded. Slave started, replicating from 10.0.0.16(10.0.0.16:3306)
10.0.0.16(10.0.0.16:3306): Resetting slave info succeeded.
Master failover to 10.0.0.16(10.0.0.16:3306) completed successfully.
Mon Jun 28 23:20:05 2021 - [info] Sending mail..
并且有邮件

Linux - MHA 配置实验

 

 

# 此时查看slave vip,说明成功

[23:20:22 root@centos8 ~]#ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
link/ether 00:0c:29:fa:59:6e brd ff:ff:ff:ff:ff:ff
inet 10.0.0.16/24 brd 10.0.0.255 scope global noprefixroute eth0
valid_lft forever preferred_lft forever
inet 10.0.0.100/24 brd 10.0.0.255 scope global secondary eth0:1
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fefa:596e/64 scope link
valid_lft forever preferred_lft forever

############################

ERROR:

1> 每部机器防火墙必须关闭,net-tools等请必须安装

不然会出现mysql无法启动,vip无法飘逸(虽然manage.log无报错)等错误

2> 需要重新启动MHA时,请删掉下面几个file,然后重新配置mysql的主从(change master to..)

[23:16:31 root@centos7 ~]#rm -f /data/mastermha/app1//app1.failover.complete (此情况适合要恢复master)
[23:18:07 root@centos7 ~]#rm -rf /data/mastermha/app1/manager.log
[23:18:10 root@centos7 ~]#rm -f /data/mastermha/app1/app1.failover.error

3> 一切问题都可以从manage.log中查找,如果其他问题,请qq我 578110218(能帮就帮)

 

Linux - MHA 配置实验

上一篇:ubuntu16.04安装docker-compose报错及解决


下一篇:04 Linux终端命令01