附件下载地址:https://down.51cto.com/data/2464809
1、orchestrator 配置文件PostFailoverProcesses中调用配置脚本
{ "Debug": true, "EnableSyslog": false, "ListenAddress": ":3000", "MySQLTopologyUser": "orchestrator", "MySQLTopologyPassword": "123456", "MySQLTopologyCredentialsConfigFile": "", "MySQLTopologySSLPrivateKeyFile": "", "MySQLTopologySSLCertFile": "", "MySQLTopologySSLCAFile": "", "MySQLTopologySSLSkipVerify": true, "MySQLTopologyUseMutualTLS": false, "BackendDB": "sqlite", "SQLite3DataFile": "/usr/local/orchestrator/orchestrator.sqlite3", "MySQLConnectTimeoutSeconds": 1, "DefaultInstancePort": 3306, "DiscoverByShowSlaveHosts": true, "InstancePollSeconds": 5, "DiscoveryIgnoreReplicaHostnameFilters": [ "a_host_i_want_to_ignore[.]example[.]com", ".*[.]ignore_all_hosts_from_this_domain[.]example[.]com" ], "UnseenInstanceForgetHours": 240, "SnapshotTopologiesIntervalHours": 0, "InstanceBulkOperationsWaitTimeoutSeconds": 10, "HostnameResolveMethod": "default", "MySQLHostnameResolveMethod": "@@hostname", "SkipBinlogServerUnresolveCheck": true, "ExpiryHostnameResolvesMinutes": 60, "RejectHostnameResolvePattern": "", "ReasonableReplicationLagSeconds": 10, "ProblemIgnoreHostnameFilters": [], "VerifyReplicationFilters": false, "ReasonableMaintenanceReplicationLagSeconds": 20, "CandidateInstanceExpireMinutes": 60, "AuditLogFile": "", "AuditToSyslog": false, "RemoveTextFromHostnameDisplay": ".mydomain.com:3306", "ReadOnly": false, "AuthenticationMethod": "", "HTTPAuthUser": "", "HTTPAuthPassword": "", "AuthUserHeader": "", "PowerAuthUsers": [ "*" ], "ClusterNameToAlias": { "127.0.0.1": "test suite" }, "SlaveLagQuery": "", "DetectClusterAliasQuery": "SELECT SUBSTRING_INDEX(@@hostname, '.', 1)", "DetectClusterDomainQuery": "", "DetectInstanceAliasQuery": "", "DetectPromotionRuleQuery": "", "DataCenterPattern": "[.]([^.]+)[.][^.]+[.]mydomain[.]com", "PhysicalEnvironmentPattern": "[.]([^.]+[.][^.]+)[.]mydomain[.]com", "PromotionIgnoreHostnameFilters": [], "DetectSemiSyncEnforcedQuery": "", "ServeAgentsHttp": false, "AgentsServerPort": ":3001", "AgentsUseSSL": false, "AgentsUseMutualTLS": false, "AgentSSLSkipVerify": false, "AgentSSLPrivateKeyFile": "", "AgentSSLCertFile": "", "AgentSSLCAFile": "", "AgentSSLValidOUs": [], "UseSSL": false, "UseMutualTLS": false, "SSLSkipVerify": false, "SSLPrivateKeyFile": "", "SSLCertFile": "", "SSLCAFile": "", "SSLValidOUs": [], "URLPrefix": "", "StatusEndpoint": "/api/status", "StatusSimpleHealth": true, "StatusOUVerify": false, "AgentPollMinutes": 60, "UnseenAgentForgetHours": 6, "StaleSeedFailMinutes": 60, "SeedAcceptableBytesDiff": 8192, "PseudoGTIDPattern": "", "PseudoGTIDPatternIsFixedSubstring": false, "PseudoGTIDMonotonicHint": "asc:", "DetectPseudoGTIDQuery": "", "BinlogEventsChunkSize": 10000, "SkipBinlogEventsContaining": [], "ReduceReplicationAnalysisCount": true, "FailureDetectionPeriodBlockMinutes": 1, "RecoveryPeriodBlockSeconds": 0, "RecoveryIgnoreHostnameFilters": [], "RecoverMasterClusterFilters": [ "*" ], "RecoverIntermediateMasterClusterFilters": [ "*" ], "OnFailureDetectionProcesses": [ "echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countSlaves}' >> /tmp/recovery.log" ], "PreFailoverProcesses": [ "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/recovery.log" ], "PostFailoverProcesses": [ "echo '(for all types) Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log", "/usr/local/bin/orch_hook.sh {failureType} {failureClusterAlias} {failedHost} {successorHost} >> /tmp/orch.log" ], "PostUnsuccessfulFailoverProcesses": [], "PostMasterFailoverProcesses": [ "echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Promoted: {successorHost}:{successorPort}' >> /tmp/recovery.log" ], "PostIntermediateMasterFailoverProcesses": [ "echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log" ], "CoMasterRecoveryMustPromoteOtherCoMaster": true, "DetachLostSlavesAfterMasterFailover": true, "ApplyMySQLPromotionAfterMasterFailover": true, "PreventCrossDataCenterMasterFailover": false, "PreventCro***egionMasterFailover": false, "MasterFailoverDetachSlaveMasterHost": false, "MasterFailoverLostInstancesDowntimeMinutes": 0, "PostponeSlaveRecoveryOnLagMinutes": 0, "OSCIgnoreHostnameFilters": [], "GraphiteAddr": "", "GraphitePath": "", "GraphiteConvertHostnameDotsToUnderscores": true }
2、orch_hook.sh 注意修改vip和网卡信息,MYSQL_PWD并不需要配置正确的密码,因为我使用的是5.7版本GTID复制,这个是为pseudo_gtid使用的
#!/bin/bash isitdead=$1 cluster=$2 oldmaster=$3 newmaster=$4 mysqluser="orchestrator" export MYSQL_PWD="xxxpassxxx" logfile="/var/log/orch_hook.log" # list of clusternames clusternames=(rep blea lajos) # clustername=( interface IP user Inter_IP) rep=( enp0s8 "192.168.56.121" root "192.168.56.125") if [[ $isitdead == "DeadMaster" ]]; then array=( enp0s8 "192.168.56.121" root "192.168.56.125") interface=${array[0]} IP=${array[1]} user=${array[2]} if [ ! -z ${IP} ] ; then echo $(date) echo "Revocering from: $isitdead" echo "New master is: $newmaster" echo "/usr/local/bin/orch_vip.sh -d 1 -n $newmaster -i ${interface} -I ${IP} -u ${user} -o $oldmaster" | tee $logfile /usr/local/bin/orch_vip.sh -d 1 -n $newmaster -i ${interface} -I ${IP} -u ${user} -o $oldmaster mysql -h$newmaster -u$mysqluser < /usr/local/bin/orch_event.sql else echo "Cluster does not exist!" | tee $logfile fi elif [[ $isitdead == "DeadIntermediateMasterWithSingleSlaveFailingToConnect" ]]; then array=( enp0s8 "192.168.56.121" root "192.168.56.125") interface=${array[0]} IP=${array[3]} user=${array[2]} slavehost=`echo $5 | cut -d":" -f1` echo $(date) echo "Revocering from: $isitdead" echo "New intermediate master is: $slavehost" echo "/usr/local/bin/orch_vip.sh -d 1 -n $slavehost -i ${interface} -I ${IP} -u ${user} -o $oldmaster" | tee $logfile /usr/local/bin/orch_vip.sh -d 1 -n $slavehost -i ${interface} -I ${IP} -u ${user} -o $oldmaster elif [[ $isitdead == "DeadIntermediateMaster" ]]; then array=( enp0s8 "192.168.56.121" root "192.168.56.125") interface=${array[0]} IP=${array[3]} user=${array[2]} slavehost=`echo $5 | sed -E "s/:[0-9]+//g" | sed -E "s/,/ /g"` showslave=`mysql -h$newmaster -u$mysqluser -sN -e "SHOW SLAVE HOSTS;" | awk '{print $2}'` newintermediatemaster=`echo $slavehost $showslave | tr ' ' '\n' | sort | uniq -d` echo $(date) echo "Revocering from: $isitdead" echo "New intermediate master is: $newintermediatemaster" echo "/usr/local/bin/orch_vip.sh -d 1 -n $newintermediatemaster -i ${interface} -I ${IP} -u ${user} -o $oldmaster" | tee $logfile /usr/local/bin/orch_vip.sh -d 1 -n $newintermediatemaster -i ${interface} -I ${IP} -u ${user} -o $oldmaster fi
3、orch_vip.sh 脚本
#!/bin/bash emailaddress="email@example.com" sendmail=0 function usage { cat << EOF usage: $0 [-h] [-d master is dead] [-o old master ] [-s ssh options] [-n new master] [-i interface] [-I] [-u SSH user] OPTIONS: -h Show this message -o string Old master hostname or IP address -d int If master is dead should be 1 otherweise it is 0 -s string SSH options -n string New master hostname or IP address -i string Interface exmple eth0:1 -I string Virtual IP -u string SSH user EOF } while getopts ho:d:s:n:i:I:u: flag; do case $flag in o) orig_master="$OPTARG"; ;; d) isitdead="${OPTARG}"; ;; s) ssh_options="${OPTARG}"; ;; n) new_master="$OPTARG"; ;; i) interface="$OPTARG"; ;; I) vip="$OPTARG"; ;; u) ssh_user="$OPTARG"; ;; h) usage; exit 0; ;; *) usage; exit 1; ;; esac done if [ $OPTIND -eq 1 ]; then echo "No options were passed"; usage; fi shift $(( OPTIND - 1 )); # discover commands from our path ssh=$(which ssh) arping=$(which arping) ip2util=$(which ip) # command for adding our vip cmd_vip_add="sudo -n $ip2util address add ${vip} dev ${interface}" # command for deleting our vip cmd_vip_del="sudo -n $ip2util address del ${vip}/32 dev ${interface}" # command for discovering if our vip is enabled cmd_vip_chk="sudo -n $ip2util address show dev ${interface} to ${vip%/*}/32" # command for sending gratuitous arp to announce ip move cmd_arp_fix="sudo -n $arping -c 1 -I ${interface} ${vip%/*} " # command for sending gratuitous arp to announce ip move on current server cmd_local_arp_fix="sudo -n $arping -c 1 -I ${interface} ${vip%/*} " vip_stop() { rc=0 # ensure the vip is removed $ssh ${ssh_options} -tt ${ssh_user}@${orig_master} \ "[ -n \"\$(${cmd_vip_chk})\" ] && ${cmd_vip_del} && sudo ${ip2util} route flush cache || [ -z \"\$(${cmd_vip_chk})\" ]" rc=$? return $rc } vip_start() { rc=0 # ensure the vip is added # this command should exit with failure if we are unable to add the vip # if the vip already exists always exit 0 (whether or not we added it) $ssh ${ssh_options} -tt ${ssh_user}@${new_master} \ "[ -z \"\$(${cmd_vip_chk})\" ] && ${cmd_vip_add} && ${cmd_arp_fix} || [ -n \"\$(${cmd_vip_chk})\" ]" rc=$? $cmd_local_arp_fix return $rc } vip_status() { $arping -c 1 -I ${interface} ${vip%/*} if ping -c 1 -W 1 "$vip"; then return 0 else return 1 fi } if [[ $isitdead == 0 ]]; then echo "Online failover" if vip_stop; then if vip_start; then echo "$vip is moved to $new_master." if [ $sendmail -eq 1 ]; then mail -s "$vip is moved to $new_master." "$emailaddress" < /dev/null &> /dev/null ; fi else echo "Can't add $vip on $new_master!" if [ $sendmail -eq 1 ]; then mail -s "Can't add $vip on $new_master!" "$emailaddress" < /dev/null &> /dev/null ; fi exit 1 fi else echo $rc echo "Can't remove the $vip from orig_master!" if [ $sendmail -eq 1 ]; then mail -s "Can't remove the $vip from orig_master!" "$emailaddress" < /dev/null &> /dev/null ; fi exit 1 fi elif [[ $isitdead == 1 ]]; then echo "Master is dead, failover" # make sure the vip is not available if vip_status; then if vip_stop; then if [ $sendmail -eq 1 ]; then mail -s "$vip is removed from orig_master." "$emailaddress" < /dev/null &> /dev/null ; fi else if [ $sendmail -eq 1 ]; then mail -s "Couldn't remove $vip from orig_master." "$emailaddress" < /dev/null &> /dev/null ; fi exit 1 fi fi if vip_start; then echo "$vip is moved to $new_master." if [ $sendmail -eq 1 ]; then mail -s "$vip is moved to $new_master." "$emailaddress" < /dev/null &> /dev/null ; fi else echo "Can't add $vip on $new_master!" if [ $sendmail -eq 1 ]; then mail -s "Can't add $vip on $new_master!" "$emailaddress" < /dev/null &> /dev/null ; fi exit 1 fi else echo "Wrong argument, the master is dead or live?" fi
4、orch_event.sql 我并没有使用
create database if not exists meta; use meta; create table if not exists pseudo_gtid_status ( anchor int unsigned not null, originating_mysql_host varchar(128) charset ascii not null, originating_mysql_port int unsigned not null, originating_server_id int unsigned not null, time_generated timestamp not null default current_timestamp, pseudo_gtid_uri varchar(255) charset ascii not null, pseudo_gtid_hint varchar(255) charset ascii not null, PRIMARY KEY (anchor) ); drop event if exists create_pseudo_gtid_event; delimiter $$ create event if not exists create_pseudo_gtid_event on schedule every 5 second starts current_timestamp on completion preserve enable do begin DECLARE lock_result INT; DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; set @connection_id := connection_id(); set @now := now(); set @rand := floor(rand()*(1 << 32)); set @pseudo_gtid_hint := concat_ws(':', lpad(hex(unix_timestamp(@now)), 8, '0'), lpad(hex(@connection_id), 16, '0'), lpad(hex(@rand), 8, '0')); set @_create_statement := concat('drop ', 'view if exists `meta`.`_pseudo_gtid_', 'hint__asc:', @pseudo_gtid_hint, '`'); PREPARE st FROM @_create_statement; EXECUTE st; DEALLOCATE PREPARE st; /*!50600 SET innodb_lock_wait_timeout = 1; */ SET lock_result = GET_LOCK('pseudo_gtid_status', 0); IF lock_result = 1 THEN set @serverid := @@server_id; set @hostname := @@hostname; set @port := @@port; set @pseudo_gtid := concat('pseudo-gtid://', @hostname, ':', @port, '/', @serverid, '/', date(@now), '/', time(@now), '/', @rand); insert into pseudo_gtid_status ( anchor, originating_mysql_host, originating_mysql_port, originating_server_id, time_generated, pseudo_gtid_uri, pseudo_gtid_hint ) values (1, @hostname, @port, @serverid, @now, @pseudo_gtid, @pseudo_gtid_hint) on duplicate key update originating_mysql_host = values(originating_mysql_host), originating_mysql_port = values(originating_mysql_port), originating_server_id = values(originating_server_id), time_generated = values(time_generated), pseudo_gtid_uri = values(pseudo_gtid_uri), pseudo_gtid_hint = values(pseudo_gtid_hint) ; SET lock_result = RELEASE_LOCK('pseudo_gtid_status'); END IF; end $$ delimiter ; set global event_scheduler := 1;
5、集群信息
[root@es3 bin]# cat /etc/hosts 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 192.168.56.14 es1 192.168.56.15 es2 192.168.56.16 es3 [root@es3 bin]# cat /etc/my.cnf |grep -v ^# [mysqld] datadir=/var/lib/mysql socket=/var/lib/mysql/mysql.sock symbolic-links=0 log-error=/var/log/mysqld.log pid-file=/var/run/mysqld/mysqld.pid gtid-mode = ON enforce-gtid-consistency = ON log-slave-updates = ON log-bin=mysqlbin server-id=3 report_host=es3 [root@es3 bin]#
附记:
orchestrator 本身能完成主从切换满足条件
Requirements
To run any kind of failovers, your topologies must support either:
Oracle GTID (with
MASTER_AUTO_POSITION=1
)MariaDB GTID
Binlog Servers
See MySQL Configuration for more details.
Automated recovery is opt in. Please consider recovery configuration.