# 本次实验四台机器
# 10.0.0.21 - centos 7 - MHA服务器
# 10.0.0.15,16,17 - Centos 8 - Mysql Master, Slave1, Slave2
# 先从gitbub大神那里下载mha manage包和node包
Release mha4mysql-manager-0.58 · yoshinorim/mha4mysql-manager · GitHub
Release mha4mysql-node-0.58 · yoshinorim/mha4mysql-node · GitHub
# MHA服务器,需要都安装这两个
# mysql服务器需要安装node
# 实现基于key验证,可用脚本实现(参考其他大神作品写的)
[23:31:39 root@centos8 ~]#cat /data/push_ssh_key.sh #!/bin/bash # #************************************************************************************** #Author: Noise Lys #QQ: 578110218 #Date: 2021-05-27 #Filename: push_ssh_key.sh #URL: https://www.cnblogs.com/noise/ #Description: The test script #Copyright (C): 2021 All rights reserved #************************************************************************************** PASS=123456 #设置网段最后的地址,4-255之间,越小扫描越快 END=254 #挑选出当前IP IP=`ip a s eth0 | awk -F‘[ /]+‘ ‘NR==3{print $3}‘` #挑选出当前IP的前3位 NET=${IP%.*}. #先删除原来的key和log file rm -f /root/.ssh/id_rsa [ -e ./SCANIP.log ] && rm -f SCANIP.log #循环ping当前IP所有的主机从10.0.0.3~10.0.0.254,ping成功将其IP存入log for((i=3;i<="$END";i++));do ping -c 1 -w 1 ${NET}$i &> /dev/null && echo "${NET}$i" >> SCANIP.log & done wait #生成key ssh-keygen -P "" -f /root/.ssh/id_rsa #查询是否存在包sshpass,用来传输密码 rpm -q sshpass || yum -y install sshpass #传输密码到IP sshpass -p $PASS ssh-copy-id -o StrictHostKeyChecking=no $IP #将log里面的所有IP存入变量,以便后续遍历传输密码和key AliveIP=(`cat SCANIP.log`) for n in ${AliveIP[*]};do sshpass -p $PASS scp -o StrictHostKeyChecking=no -r /root/.ssh root@${n}: done #把.ssh/known_hosts拷贝到所有主机,使它们第一次互相访问时不需要输入回车 for n in ${AliveIP[*]};do scp /root/.ssh/known_hosts ${n}:.ssh/ done
# 编辑配置文件
[23:33:11 root@centos7 ~]#cat -A /etc/mastermha/app1.cnf (需要提前建立/etc/mastermha文件夹) [server default]$ user=mhauser$ password=123456$ manager_workdir=/data/mastermha/app1/$ manager_log=/data/mastermha/app1/manager.log$ remote_workdir=/data/mastermha/app1/$ ssh_user=root$ repl_user=repluser$ repl_password=123456$ ping_interval=1$ $ master_ip_failover_script=/usr/local/bin/master_ip_failover$ report_script=/usr/local/bin/sendmail.sh$ master_binlog_dir=/data/mysql/$ $ [server1]$ hostname=10.0.0.15$ candidate_master=1$ $ [server2]$ hostname=10.0.0.16$ candidate_master=1$ $ [server3]$ hostname=10.0.0.17$
# 邮件服务
[23:33:22 root@centos7 ~]#cat /usr/local/bin/sendmail.sh #!/bin/bash # #************************************************************************************** #Author: Noise Lys #QQ: 578110218 #Date: 2021-06-27 #Filename: /usr/local/bin/sendmail.sh #URL: https://www.cnblogs.com/noise/ #Description: The test script #Copyright (C): 2021 All rights reserved #************************************************************************************** echo "MySQL is down" | mail -s "MHA Warning" root@centos720091.com [23:36:20 root@centos7 ~]#chmod +x /usr/local/bin/sendmail.sh
# vip飘逸脚本(类似keepalived)
#!/usr/bin/env perl use strict; use warnings FATAL => ‘all‘; use Getopt::Long; my ( $command, $ssh_user, $orig_master_host, $orig_master_ip, $orig_master_port, $new_master_host, $new_master_ip, $new_master_port ); my $vip = ‘10.0.0.100/24‘; my $gateway = ‘10.0.0.254‘; my $interface = ‘eth0‘; my $key = "1"; my $ssh_start_vip = "/sbin/ifconfig $interface:$key $vip;/sbin/arping -I $interface -c 3 -s $vip $gateway >/dev/null 2>&1"; my $ssh_stop_vip = "/sbin/ifconfig $interface:$key down"; GetOptions( ‘command=s‘ => \$command, ‘ssh_user=s‘ => \$ssh_user, ‘orig_master_host=s‘ => \$orig_master_host, ‘orig_master_ip=s‘ => \$orig_master_ip, ‘orig_master_port=i‘ => \$orig_master_port, ‘new_master_host=s‘ => \$new_master_host, ‘new_master_ip=s‘ => \$new_master_ip,‘new_master_port=i‘ => \$new_master_port, ); exit &main(); sub main { print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n"; if ( $command eq "stop" || $command eq "stopssh" ) { my $exit_code = 1; eval { print "Disabling the VIP on old master: $orig_master_host \n"; &stop_vip(); $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "start" ) { my $exit_code = 10; eval { print "Enabling the VIP - $vip on the new master - $new_master_host \n"; &start_vip(); $exit_code = 0; }; if ($@) { warn $@; exit $exit_code; } exit $exit_code; } elsif ( $command eq "status" ) { print "Checking the Status of the script.. OK \n"; `ssh $ssh_user\@$orig_master_host \" $ssh_start_vip \"`; exit 0; } else { &usage(); exit 1; } } sub start_vip() { `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`; } sub stop_vip() { `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`; } sub usage { print"Usage: master_ip_failover --command=start|stop|stopssh|status -- orig_master_host=host --orig_master_ip=ip --orig_master_port=port -- new_master_host=host --new_master_ip=ip --new_master_port=port\n"; }
[23:38:46 root@centos7 ~]#chmod +x /usr/local/bin/master_ip_failover
# mysql master 配置文件
[23:20:04 root@centos8 ~]#cat /etc/my.cnf.d/mysql-server.cnf # # This group are read by MySQL server. # Use it for options that only the server (but not clients) should see # # For advice on how to change settings please see # http://dev.mysql.com/doc/refman/en/server-configuration-defaults.html # Settings user and group are ignored when systemd is used. # If you need to run mysqld under a different user or group, # customize your systemd unit file for mysqld according to the # instructions in http://fedoraproject.org/wiki/Systemd [mysqld] datadir=/var/lib/mysql socket=/var/lib/mysql/mysql.sock log-error=/var/log/mysql/mysqld.log pid-file=/run/mysqld/mysqld.pid server_id=15 log-bin=/data/mysql/mysql-bin skip_name_resolve=1 general_log
# 并创建user和记录下当前logbin (主从需要)
mysql> create user repluser@‘10.0.0.%‘ identified by ‘magedu‘; mysql> grant replication slave on *.* to repluser@‘10.0.0.%‘; mysql> create user mhauser@‘10.0.0.%‘ identified by ‘magedu‘; mysql> grant all on *.* to mhauser@‘10.0.0.%‘;
# mysql slave 配置文件
[23:40:04 root@centos8 ~]#cat /etc/my.cnf.d/mysql-server.cnf # # This group are read by MySQL server. # Use it for options that only the server (but not clients) should see # # For advice on how to change settings please see # http://dev.mysql.com/doc/refman/en/server-configuration-defaults.html # Settings user and group are ignored when systemd is used. # If you need to run mysqld under a different user or group, # customize your systemd unit file for mysqld according to the # instructions in http://fedoraproject.org/wiki/Systemd [mysqld] datadir=/var/lib/mysql socket=/var/lib/mysql/mysql.sock log-error=/var/log/mysql/mysqld.log pid-file=/run/mysqld/mysqld.pid server_id=17 log-bin=/data/mysql/mysql-bin read_only relay_log_purge=0 skip_name_resolve=1 general_log
# 给mysql master设立vip
[22:38:12 root@centos8 ~]#ifconfig eth0:1 10.0.0.100/24 [22:39:48 root@centos8 ~]#ip a 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000 link/ether 00:0c:29:b7:3e:e4 brd ff:ff:ff:ff:ff:ff inet 10.0.0.15/24 brd 10.0.0.255 scope global noprefixroute eth0 valid_lft forever preferred_lft forever inet 10.0.0.100/24 brd 10.0.0.255 scope global secondary eth0:1 valid_lft forever preferred_lft forever inet6 fe80::20c:29ff:feb7:3ee4/64 scope link valid_lft forever preferred_lft forever
#在manager上检查mha配置
2021-06-28T15:19:37.345677Z 13 Query SELECT 1 As Value
2021-06-28T15:19:38.339499Z 13 Query SELECT 1 As Value
2021-06-28T15:19:39.340015Z 13 Query SELECT 1 As Value
2021-06-28T15:19:40.340974Z 13 Query SELECT 1 As Value
2021-06-28T15:19:41.340748Z 13 Query SELECT 1 As Value
2021-06-28T15:19:42.341860Z 13 Query SELECT 1 As Value
2021-06-28T15:19:43.343077Z 13 Query SELECT 1 As Value
2021-06-28T15:19:44.343118Z 13 Query SELECT 1 As Value
2021-06-28T15:19:45.344224Z 13 Query SELECT 1 As Value
2021-06-28T15:19:46.344526Z 13 Query SELECT 1 As Value
2021-06-28T15:19:47.345494Z 13 Query SELECT 1 As Value
2021-06-28T15:19:48.347683Z 13 Query SELECT 1 As Value
2021-06-28T15:19:48.801340Z 8 Quit
2021-06-28T15:19:49.348587Z 13 Query SELECT 1 As Value
2021-06-28T15:19:50.348986Z 13 Query SELECT 1 As Value
2021-06-28T15:19:51.349671Z 13 Query SELECT 1 As Value
2021-06-28T15:19:52.351619Z 13 Query SELECT 1 As Value
2021-06-28T15:19:53.351102Z 13 Query SELECT 1 As Value
2021-06-28T15:19:54.351510Z 13 Query SELECT 1 As Value
2021-06-28T15:19:55.353596Z 13 Query SELECT 1 As Value
2021-06-28T15:19:56.361825Z 13 Query SELECT 1 As Value
#当mha运行后,down掉master
[23:19:07 root@centos7 ~]#tail -f /data/mastermha/app1/manager.log Mon Jun 28 23:19:37 2021 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes. Mon Jun 28 23:19:37 2021 - [info] Starting ping health check on 10.0.0.15(10.0.0.15:3306).. Mon Jun 28 23:19:37 2021 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn‘t respond.. Mon Jun 28 23:19:56 2021 - [warning] Got error on MySQL select ping: 1053 (Server shutdown in progress) Mon Jun 28 23:19:56 2021 - [info] Executing SSH check script: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/ --output_file=/data/mastermha/app1//save_binary_logs_test --manager_version=0.58 --binlog_prefix=mysql-bin Mon Jun 28 23:19:56 2021 - [info] HealthCheck: SSH to 10.0.0.15 is reachable. Mon Jun 28 23:19:57 2021 - [warning] Got error on MySQL connect: 2003 (Can‘t connect to MySQL server on ‘10.0.0.15‘ (111)) Mon Jun 28 23:19:57 2021 - [warning] Connection failed 2 time(s).. Mon Jun 28 23:19:58 2021 - [warning] Got error on MySQL connect: 2003 (Can‘t connect to MySQL server on ‘10.0.0.15‘ (111)) Mon Jun 28 23:19:58 2021 - [warning] Connection failed 3 time(s).. Mon Jun 28 23:19:59 2021 - [warning] Got error on MySQL connect: 2003 (Can‘t connect to MySQL server on ‘10.0.0.15‘ (111)) Mon Jun 28 23:19:59 2021 - [warning] Connection failed 4 time(s).. Mon Jun 28 23:19:59 2021 - [warning] Master is not reachable from health checker! Mon Jun 28 23:19:59 2021 - [warning] Master 10.0.0.15(10.0.0.15:3306) is not reachable! Mon Jun 28 23:19:59 2021 - [warning] SSH is reachable. Mon Jun 28 23:19:59 2021 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/mastermha/app1.cnf again, and trying to connect to all servers to check server status.. Mon Jun 28 23:19:59 2021 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Mon Jun 28 23:19:59 2021 - [info] Reading application default configuration from /etc/mastermha/app1.cnf.. Mon Jun 28 23:19:59 2021 - [info] Reading server configuration from /etc/mastermha/app1.cnf.. Mon Jun 28 23:20:00 2021 - [info] GTID failover mode = 0 Mon Jun 28 23:20:00 2021 - [info] Dead Servers: Mon Jun 28 23:20:00 2021 - [info] 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:00 2021 - [info] Alive Servers: Mon Jun 28 23:20:00 2021 - [info] 10.0.0.16(10.0.0.16:3306) Mon Jun 28 23:20:00 2021 - [info] 10.0.0.17(10.0.0.17:3306) Mon Jun 28 23:20:00 2021 - [info] Alive Slaves: Mon Jun 28 23:20:00 2021 - [info] 10.0.0.16(10.0.0.16:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:00 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:00 2021 - [info] Primary candidate for the new Master (candidate_master is set) Mon Jun 28 23:20:00 2021 - [info] 10.0.0.17(10.0.0.17:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:00 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:00 2021 - [info] Checking slave configurations.. Mon Jun 28 23:20:00 2021 - [info] read_only=1 is not set on slave 10.0.0.16(10.0.0.16:3306). Mon Jun 28 23:20:00 2021 - [info] Checking replication filtering settings.. Mon Jun 28 23:20:00 2021 - [info] Replication filtering check ok. Mon Jun 28 23:20:00 2021 - [info] Master is down! Mon Jun 28 23:20:00 2021 - [info] Terminating monitoring script. Mon Jun 28 23:20:00 2021 - [info] Got exit code 20 (Master dead). Mon Jun 28 23:20:00 2021 - [info] MHA::MasterFailover version 0.58. Mon Jun 28 23:20:00 2021 - [info] Starting master failover. Mon Jun 28 23:20:00 2021 - [info] Mon Jun 28 23:20:00 2021 - [info] * Phase 1: Configuration Check Phase.. Mon Jun 28 23:20:00 2021 - [info] Mon Jun 28 23:20:01 2021 - [info] GTID failover mode = 0 Mon Jun 28 23:20:01 2021 - [info] Dead Servers: Mon Jun 28 23:20:01 2021 - [info] 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:01 2021 - [info] Checking master reachability via MySQL(double check)... Mon Jun 28 23:20:01 2021 - [info] ok. Mon Jun 28 23:20:01 2021 - [info] Alive Servers: Mon Jun 28 23:20:01 2021 - [info] 10.0.0.16(10.0.0.16:3306) Mon Jun 28 23:20:01 2021 - [info] 10.0.0.17(10.0.0.17:3306) Mon Jun 28 23:20:01 2021 - [info] Alive Slaves: Mon Jun 28 23:20:01 2021 - [info] 10.0.0.16(10.0.0.16:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:01 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:01 2021 - [info] Primary candidate for the new Master (candidate_master is set) Mon Jun 28 23:20:01 2021 - [info] 10.0.0.17(10.0.0.17:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:01 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:01 2021 - [info] Starting Non-GTID based failover. Mon Jun 28 23:20:01 2021 - [info] Mon Jun 28 23:20:01 2021 - [info] ** Phase 1: Configuration Check Phase completed. Mon Jun 28 23:20:01 2021 - [info] Mon Jun 28 23:20:01 2021 - [info] * Phase 2: Dead Master Shutdown Phase.. Mon Jun 28 23:20:01 2021 - [info] Mon Jun 28 23:20:01 2021 - [info] Forcing shutdown so that applications never connect to the current master.. Mon Jun 28 23:20:01 2021 - [info] Executing master IP deactivation script: Mon Jun 28 23:20:01 2021 - [info] /usr/local/bin/master_ip_failover --orig_master_host=10.0.0.15 --orig_master_ip=10.0.0.15 --orig_master_port=3306 --command=stopssh --ssh_user=root IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.100/24;/sbin/arping -I eth0 -c 3 -s 10.0.0.100/24 10.0.0.254 >/dev/null 2>&1=== Disabling the VIP on old master: 10.0.0.15 Mon Jun 28 23:20:01 2021 - [info] done. Mon Jun 28 23:20:01 2021 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master. Mon Jun 28 23:20:01 2021 - [info] * Phase 2: Dead Master Shutdown Phase completed. Mon Jun 28 23:20:01 2021 - [info] Mon Jun 28 23:20:01 2021 - [info] * Phase 3: Master Recovery Phase.. Mon Jun 28 23:20:01 2021 - [info] Mon Jun 28 23:20:01 2021 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Mon Jun 28 23:20:01 2021 - [info] Mon Jun 28 23:20:01 2021 - [info] The latest binary log file/position on all slaves is mysql-bin.000005:156 Mon Jun 28 23:20:01 2021 - [info] Latest slaves (Slaves that received relay log files to the latest): Mon Jun 28 23:20:01 2021 - [info] 10.0.0.16(10.0.0.16:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:01 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:01 2021 - [info] Primary candidate for the new Master (candidate_master is set) Mon Jun 28 23:20:01 2021 - [info] 10.0.0.17(10.0.0.17:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:01 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:01 2021 - [info] The oldest binary log file/position on all slaves is mysql-bin.000005:156 Mon Jun 28 23:20:01 2021 - [info] Oldest slaves: Mon Jun 28 23:20:01 2021 - [info] 10.0.0.16(10.0.0.16:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:01 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:01 2021 - [info] Primary candidate for the new Master (candidate_master is set) Mon Jun 28 23:20:01 2021 - [info] 10.0.0.17(10.0.0.17:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:01 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:01 2021 - [info] Mon Jun 28 23:20:01 2021 - [info] * Phase 3.2: Saving Dead Master‘s Binlog Phase.. Mon Jun 28 23:20:01 2021 - [info] Mon Jun 28 23:20:02 2021 - [info] Fetching dead master‘s binary logs.. Mon Jun 28 23:20:02 2021 - [info] Executing command on the dead master 10.0.0.15(10.0.0.15:3306): save_binary_logs --command=save --start_file=mysql-bin.000005 --start_pos=156 --binlog_dir=/data/mysql/ --output_file=/data/mastermha/app1//saved_master_binlog_from_10.0.0.15_3306_20210628232000.binlog --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.58 Creating /data/mastermha/app1 if not exists.. ok. Concat binary/relay logs from mysql-bin.000005 pos 156 to mysql-bin.000005 EOF into /data/mastermha/app1//saved_master_binlog_from_10.0.0.15_3306_20210628232000.binlog .. Binlog Checksum enabled Dumping binlog format description event, from position 0 to 156.. ok. No need to dump effective binlog data from /data/mysql//mysql-bin.000005 (pos starts 156, filesize 156). Skipping. Binlog Checksum enabled /data/mastermha/app1//saved_master_binlog_from_10.0.0.15_3306_20210628232000.binlog has no effective data events. Event not exists. Mon Jun 28 23:20:03 2021 - [info] Additional events were not found from the orig master. No need to save. Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] * Phase 3.3: Determining New Master Phase.. Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] Finding the latest slave that has all relay logs for recovering other slaves.. Mon Jun 28 23:20:03 2021 - [info] All slaves received relay logs to the same position. No need to resync each other. Mon Jun 28 23:20:03 2021 - [info] Searching new master from slaves.. Mon Jun 28 23:20:03 2021 - [info] Candidate masters from the configuration file: Mon Jun 28 23:20:03 2021 - [info] 10.0.0.16(10.0.0.16:3306) Version=8.0.21 (oldest major version between slaves) log-bin:enabled Mon Jun 28 23:20:03 2021 - [info] Replicating from 10.0.0.15(10.0.0.15:3306) Mon Jun 28 23:20:03 2021 - [info] Primary candidate for the new Master (candidate_master is set) Mon Jun 28 23:20:03 2021 - [info] Non-candidate masters: Mon Jun 28 23:20:03 2021 - [info] Searching from candidate_master slaves which have received the latest relay log events.. Mon Jun 28 23:20:03 2021 - [info] New master is 10.0.0.16(10.0.0.16:3306) Mon Jun 28 23:20:03 2021 - [info] Starting master failover.. Mon Jun 28 23:20:03 2021 - [info] From: 10.0.0.15(10.0.0.15:3306) (current master) +--10.0.0.16(10.0.0.16:3306) +--10.0.0.17(10.0.0.17:3306) To: 10.0.0.16(10.0.0.16:3306) (new master) +--10.0.0.17(10.0.0.17:3306) Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] * Phase 3.4: New Master Diff Log Generation Phase.. Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] This server has all relay logs. No need to generate diff files from the latest slave. Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] * Phase 3.5: Master Log Apply Phase.. Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] *NOTICE: If any error happens from this phase, manual recovery is needed. Mon Jun 28 23:20:03 2021 - [info] Starting recovery on 10.0.0.16(10.0.0.16:3306).. Mon Jun 28 23:20:03 2021 - [info] This server has all relay logs. Waiting all logs to be applied.. Mon Jun 28 23:20:03 2021 - [info] done. Mon Jun 28 23:20:03 2021 - [info] All relay logs were successfully applied. Mon Jun 28 23:20:03 2021 - [info] Getting new master‘s binlog name and position.. Mon Jun 28 23:20:03 2021 - [info] mysql-bin.000002:1229 Mon Jun 28 23:20:03 2021 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST=‘10.0.0.16‘, MASTER_PORT=3306, MASTER_LOG_FILE=‘mysql-bin.000002‘, MASTER_LOG_POS=1229, MASTER_USER=‘repluser‘, MASTER_PASSWORD=‘xxx‘; Mon Jun 28 23:20:03 2021 - [info] Executing master IP activate script: Mon Jun 28 23:20:03 2021 - [info] /usr/local/bin/master_ip_failover --command=start --ssh_user=root --orig_master_host=10.0.0.15 --orig_master_ip=10.0.0.15 --orig_master_port=3306 --new_master_host=10.0.0.16 --new_master_ip=10.0.0.16 --new_master_port=3306 --new_master_user=‘mhauser‘ --new_master_password=xxx Unknown option: new_master_user Unknown option: new_master_password IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 10.0.0.100/24;/sbin/arping -I eth0 -c 3 -s 10.0.0.100/24 10.0.0.254 >/dev/null 2>&1=== Enabling the VIP - 10.0.0.100/24 on the new master - 10.0.0.16 Mon Jun 28 23:20:03 2021 - [info] OK. Mon Jun 28 23:20:03 2021 - [info] ** Finished master recovery successfully. Mon Jun 28 23:20:03 2021 - [info] * Phase 3: Master Recovery Phase completed. Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] * Phase 4: Slaves Recovery Phase.. Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] * Phase 4.1: Starting Parallel Slave Diff Log Generation Phase.. Mon Jun 28 23:20:03 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] -- Slave diff file generation on host 10.0.0.17(10.0.0.17:3306) started, pid: 66090. Check tmp log /data/mastermha/app1//10.0.0.17_3306_20210628232000.log if it takes time.. Mon Jun 28 23:20:04 2021 - [info] Mon Jun 28 23:20:04 2021 - [info] Log messages from 10.0.0.17 ... Mon Jun 28 23:20:04 2021 - [info] Mon Jun 28 23:20:03 2021 - [info] This server has all relay logs. No need to generate diff files from the latest slave. Mon Jun 28 23:20:04 2021 - [info] End of log messages from 10.0.0.17. Mon Jun 28 23:20:04 2021 - [info] -- 10.0.0.17(10.0.0.17:3306) has the latest relay log events. Mon Jun 28 23:20:04 2021 - [info] Generating relay diff files from the latest slave succeeded. Mon Jun 28 23:20:04 2021 - [info] Mon Jun 28 23:20:04 2021 - [info] * Phase 4.2: Starting Parallel Slave Log Apply Phase.. Mon Jun 28 23:20:04 2021 - [info] Mon Jun 28 23:20:04 2021 - [info] -- Slave recovery on host 10.0.0.17(10.0.0.17:3306) started, pid: 66092. Check tmp log /data/mastermha/app1//10.0.0.17_3306_20210628232000.log if it takes time.. Mon Jun 28 23:20:05 2021 - [info] Mon Jun 28 23:20:05 2021 - [info] Log messages from 10.0.0.17 ... Mon Jun 28 23:20:05 2021 - [info] Mon Jun 28 23:20:04 2021 - [info] Starting recovery on 10.0.0.17(10.0.0.17:3306).. Mon Jun 28 23:20:04 2021 - [info] This server has all relay logs. Waiting all logs to be applied.. Mon Jun 28 23:20:04 2021 - [info] done. Mon Jun 28 23:20:04 2021 - [info] All relay logs were successfully applied. Mon Jun 28 23:20:04 2021 - [info] Resetting slave 10.0.0.17(10.0.0.17:3306) and starting replication from the new master 10.0.0.16(10.0.0.16:3306).. Mon Jun 28 23:20:04 2021 - [info] Executed CHANGE MASTER. Mon Jun 28 23:20:04 2021 - [info] Slave started. Mon Jun 28 23:20:05 2021 - [info] End of log messages from 10.0.0.17. Mon Jun 28 23:20:05 2021 - [info] -- Slave recovery on host 10.0.0.17(10.0.0.17:3306) succeeded. Mon Jun 28 23:20:05 2021 - [info] All new slave servers recovered successfully. Mon Jun 28 23:20:05 2021 - [info] Mon Jun 28 23:20:05 2021 - [info] * Phase 5: New master cleanup phase.. Mon Jun 28 23:20:05 2021 - [info] Mon Jun 28 23:20:05 2021 - [info] Resetting slave info on the new master.. Mon Jun 28 23:20:05 2021 - [info] 10.0.0.16: Resetting slave info succeeded. Mon Jun 28 23:20:05 2021 - [info] Master failover to 10.0.0.16(10.0.0.16:3306) completed successfully. Mon Jun 28 23:20:05 2021 - [info] ----- Failover Report ----- app1: MySQL Master failover 10.0.0.15(10.0.0.15:3306) to 10.0.0.16(10.0.0.16:3306) succeeded Master 10.0.0.15(10.0.0.15:3306) is down! Check MHA Manager logs at centos7.2009:/data/mastermha/app1/manager.log for details. Started automated(non-interactive) failover. Invalidated master IP address on 10.0.0.15(10.0.0.15:3306) The latest slave 10.0.0.16(10.0.0.16:3306) has all relay logs for recovery. Selected 10.0.0.16(10.0.0.16:3306) as a new master. 10.0.0.16(10.0.0.16:3306): OK: Applying all logs succeeded. 10.0.0.16(10.0.0.16:3306): OK: Activated master IP address. 10.0.0.17(10.0.0.17:3306): This host has the latest relay log events. Generating relay diff files from the latest slave succeeded. 10.0.0.17(10.0.0.17:3306): OK: Applying all logs succeeded. Slave started, replicating from 10.0.0.16(10.0.0.16:3306) 10.0.0.16(10.0.0.16:3306): Resetting slave info succeeded. Master failover to 10.0.0.16(10.0.0.16:3306) completed successfully. Mon Jun 28 23:20:05 2021 - [info] Sending mail..
并且有邮件
# 此时查看slave vip,说明成功
[23:20:22 root@centos8 ~]#ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
link/ether 00:0c:29:fa:59:6e brd ff:ff:ff:ff:ff:ff
inet 10.0.0.16/24 brd 10.0.0.255 scope global noprefixroute eth0
valid_lft forever preferred_lft forever
inet 10.0.0.100/24 brd 10.0.0.255 scope global secondary eth0:1
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fefa:596e/64 scope link
valid_lft forever preferred_lft forever
############################
ERROR:
1> 每部机器防火墙必须关闭,net-tools等请必须安装
不然会出现mysql无法启动,vip无法飘逸(虽然manage.log无报错)等错误
2> 需要重新启动MHA时,请删掉下面几个file,然后重新配置mysql的主从(change master to..)
[23:16:31 root@centos7 ~]#rm -f /data/mastermha/app1//app1.failover.complete (此情况适合要恢复master)
[23:18:07 root@centos7 ~]#rm -rf /data/mastermha/app1/manager.log
[23:18:10 root@centos7 ~]#rm -f /data/mastermha/app1/app1.failover.error
3> 一切问题都可以从manage.log中查找,如果其他问题,请qq我 578110218(能帮就帮)