简介
一般etcd集群需要3个或3个以上的奇数节点,我们这里使用两个节点作为测试
node1 10.180.11.3,node2 10.180.11.2
ETCD 、coredns安装
略
ETCD配置
1 [root@node1 ~]# cat /etc/etcd/etcd.conf 2 #[Member] 3 #ETCD_CORS="" 4 #ETCD_DATA_DIR="/var/lib/etcd/default.etcd" 5 ETCD_DATA_DIR="/home/etcd/data.etcd" 6 #ETCD_WAL_DIR="" 7 #ETCD_LISTEN_PEER_URLS="http://localhost:2380" 8 ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380" 9 ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379" 10 #ETCD_MAX_SNAPSHOTS="5" 11 #ETCD_MAX_WALS="5" 12 ETCD_NAME="etcd_dns_node1" 13 #ETCD_SNAPSHOT_COUNT="100000" 14 #ETCD_HEARTBEAT_INTERVAL="100" 15 #ETCD_ELECTION_TIMEOUT="1000" 16 #ETCD_QUOTA_BACKEND_BYTES="0" 17 #ETCD_MAX_REQUEST_BYTES="1572864" 18 #ETCD_GRPC_KEEPALIVE_MIN_TIME="5s" 19 #ETCD_GRPC_KEEPALIVE_INTERVAL="2h0m0s" 20 #ETCD_GRPC_KEEPALIVE_TIMEOUT="20s" 21 # 22 #[Clustering] 23 #ETCD_INITIAL_ADVERTISE_PEER_URLS="http://localhost:2380" 24 ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.3:2380" 25 #ETCD_ADVERTISE_CLIENT_URLS="http://localhost:2379" 26 ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.3:2379" 27 #ETCD_DISCOVERY="" 28 #ETCD_DISCOVERY_FALLBACK="proxy" 29 #ETCD_DISCOVERY_PROXY="" 30 #ETCD_DISCOVERY_SRV="" 31 ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380" 32 ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster" 33 ETCD_INITIAL_CLUSTER_STATE="new" 34 #ETCD_STRICT_RECONFIG_CHECK="true" 35 #ETCD_ENABLE_V2="true" 36 # 37 #[Proxy] 38 #ETCD_PROXY="off" 39 #ETCD_PROXY_FAILURE_WAIT="5000" 40 #ETCD_PROXY_REFRESH_INTERVAL="30000" 41 #ETCD_PROXY_DIAL_TIMEOUT="1000" 42 #ETCD_PROXY_WRITE_TIMEOUT="5000" 43 #ETCD_PROXY_READ_TIMEOUT="0" 44 # 45 #[Security] 46 #ETCD_CERT_FILE="" 47 #ETCD_KEY_FILE="" 48 #ETCD_CLIENT_CERT_AUTH="false" 49 #ETCD_TRUSTED_CA_FILE="" 50 #ETCD_AUTO_TLS="false" 51 #ETCD_PEER_CERT_FILE="" 52 #ETCD_PEER_KEY_FILE="" 53 #ETCD_PEER_CLIENT_CERT_AUTH="false" 54 #ETCD_PEER_TRUSTED_CA_FILE="" 55 #ETCD_PEER_AUTO_TLS="false" 56 # 57 #[Logging] 58 #ETCD_DEBUG="false" 59 #ETCD_LOG_PACKAGE_LEVELS="" 60 #ETCD_LOG_OUTPUT="default" 61 # 62 #[Unsafe] 63 #ETCD_FORCE_NEW_CLUSTER="false" 64 # 65 #[Version] 66 #ETCD_VERSION="false" 67 #ETCD_AUTO_COMPACTION_RETENTION="0" 68 # 69 #[Profiling] 70 #ETCD_ENABLE_PPROF="false" 71 #ETCD_METRICS="basic" 72 # 73 #[Auth] 74 #ETCD_AUTH_TOKEN="simple" 75 [root@node1 ~]# 76 77 78 [root@node1 ~]# cat /etc/etcd/etcd.conf 79 #[Member] 80 ETCD_DATA_DIR="/home/etcd/data.etcd" 81 ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380" 82 ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379" 83 ETCD_NAME="etcd_dns_node2" 84 # 85 #[Clustering] 86 ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380" 87 ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379" 88 ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380" 89 ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster" 90 ETCD_INITIAL_CLUSTER_STATE="existing" 91 # 92 #[Member] 93 #ETCD_CORS="" 94 #ETCD_DATA_DIR="/var/lib/etcd/default.etcd" 95 #ETCD_DATA_DIR="/home/etcd/data.etcd" 96 #ETCD_WAL_DIR="" 97 #ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380" 98 #ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379" 99 #ETCD_MAX_SNAPSHOTS="5" 100 #ETCD_MAX_WALS="5" 101 #ETCD_NAME="etcd_dns_node2" 102 #ETCD_SNAPSHOT_COUNT="100000" 103 #ETCD_HEARTBEAT_INTERVAL="100" 104 #ETCD_ELECTION_TIMEOUT="1000" 105 #ETCD_QUOTA_BACKEND_BYTES="0" 106 #ETCD_MAX_REQUEST_BYTES="1572864" 107 #ETCD_GRPC_KEEPALIVE_MIN_TIME="5s" 108 #ETCD_GRPC_KEEPALIVE_INTERVAL="2h0m0s" 109 #ETCD_GRPC_KEEPALIVE_TIMEOUT="20s" 110 # 111 #[Clustering] 112 #ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380" 113 #ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379" 114 #ETCD_DISCOVERY="" 115 #ETCD_DISCOVERY_FALLBACK="proxy" 116 #ETCD_DISCOVERY_PROXY="" 117 #ETCD_DISCOVERY_SRV="" 118 #ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380" 119 #ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster" 120 #ETCD_INITIAL_CLUSTER_STATE="existing" 121 #ETCD_STRICT_RECONFIG_CHECK="true" 122 #ETCD_ENABLE_V2="true" 123 # 124 #[Proxy] 125 #ETCD_PROXY="off" 126 #ETCD_PROXY_FAILURE_WAIT="5000" 127 #ETCD_PROXY_REFRESH_INTERVAL="30000" 128 #ETCD_PROXY_DIAL_TIMEOUT="1000" 129 #ETCD_PROXY_WRITE_TIMEOUT="5000" 130 #ETCD_PROXY_READ_TIMEOUT="0" 131 # 132 #[Security] 133 #ETCD_CERT_FILE="" 134 #ETCD_KEY_FILE="" 135 #ETCD_CLIENT_CERT_AUTH="false" 136 #ETCD_TRUSTED_CA_FILE="" 137 #ETCD_AUTO_TLS="false" 138 #ETCD_PEER_CERT_FILE="" 139 #ETCD_PEER_KEY_FILE="" 140 #ETCD_PEER_CLIENT_CERT_AUTH="false" 141 #ETCD_PEER_TRUSTED_CA_FILE="" 142 #ETCD_PEER_AUTO_TLS="false" 143 # 144 #[Logging] 145 #ETCD_DEBUG="false" 146 #ETCD_LOG_PACKAGE_LEVELS="" 147 #ETCD_LOG_OUTPUT="default" 148 # 149 #[Unsafe] 150 #ETCD_FORCE_NEW_CLUSTER="false" 151 # 152 #[Version] 153 #ETCD_VERSION="false" 154 #ETCD_AUTO_COMPACTION_RETENTION="0" 155 # 156 #[Profiling] 157 #ETCD_ENABLE_PPROF="false" 158 #ETCD_METRICS="basic" 159 # 160 #[Auth] 161 #ETCD_AUTH_TOKEN="simple" 162 [root@node1 ~]#
coredns配置
[root@dns-1 ~]# cat /etc/coredns/Corefile .:53 { etcd { stubzones path /skydns endpoint http://10.180.11.3:2379 upstream 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf fallthrough } #cache 160 forward . 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf log errors } [root@dns-1 ~]#
[root@dns-2 ~]# cat /etc/coredns/Corefile .:53 { etcd { stubzones path /skydns endpoint http://10.180.11.3:2379 upstream 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf fallthrough } #cache 160 forward . 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf log errors } [root@dns-2 ~]#
coredns注册成系统服务
在两个节点将coredns注册成服务
[root@dns-1 ~]# cat /usr/lib/systemd/system/coredns.service [Unit] Description=CoreDNS DNS server Documentation=https://coredns.io After=network.target [Service] PermissionsStartOnly=true LimitNOFILE=1048576 LimitNPROC=512 CapabilityBoundingSet=CAP_NET_BIND_SERVICE AmbientCapabilities=CAP_NET_BIND_SERVICE NoNewPrivileges=true User=root ExecStart=/usr/sbin/coredns -quiet -conf /etc/coredns/Corefile ExecReload=/bin/kill -SIGUSR1 $MAINPID Restart=on-failure [Install] WantedBy=multi-user.target [root@dns-1 ~]#
配置开机启动
systemctl enable etcd
systemctl enable coredns
配置etcd API 版本为3
[root@dns-1 ~]# cat .bash_profile export ETCDCTL_API=3
启动etcd 和coredns
systemctl start etcd systemctl start coredns
etcdct l 命令帮助
[root@dns-1 ~]# etcdctl -h NAME: etcdctl - A simple command line client for etcd3. USAGE: etcdctl VERSION: 3.3.11 API VERSION: 3.3 COMMANDS: get Gets the key or a range of keys put Puts the given key into the store del Removes the specified key or range of keys [key, range_end) txn Txn processes all the requests in one transaction compaction Compacts the event history in etcd alarm disarm Disarms all alarms alarm list Lists all alarms defrag Defragments the storage of the etcd members with given endpoints endpoint health Checks the healthiness of endpoints specified in `--endpoints` flag endpoint status Prints out the status of endpoints specified in `--endpoints` flag endpoint hashkv Prints the KV history hash for each endpoint in --endpoints move-leader Transfers leadership to another etcd cluster member. watch Watches events stream on keys or prefixes version Prints the version of etcdctl lease grant Creates leases lease revoke Revokes leases lease timetolive Get lease information lease list List all active leases lease keep-alive Keeps leases alive (renew) member add Adds a member into the cluster member remove Removes a member from the cluster member update Updates a member in the cluster member list Lists all members in the cluster snapshot save Stores an etcd node backend snapshot to a given file snapshot restore Restores an etcd member snapshot to an etcd directory snapshot status Gets backend snapshot status of a given file make-mirror Makes a mirror at the destination etcd cluster migrate Migrates keys in a v2 store to a mvcc store lock Acquires a named lock elect Observes and participates in leader election auth enable Enables authentication auth disable Disables authentication user add Adds a new user user delete Deletes a user user get Gets detailed information of a user user list Lists all users user passwd Changes password of user user grant-role Grants a role to a user user revoke-role Revokes a role from a user role add Adds a new role role delete Deletes a role role get Gets detailed information of a role role list Lists all roles role grant-permission Grants a key to a role role revoke-permission Revokes a key from a role check perf Check the performance of the etcd cluster help Help about any command OPTIONS: --cacert="" verify certificates of TLS-enabled secure servers using this CA bundle --cert="" identify secure client using this TLS certificate file --command-timeout=5s timeout for short running command (excluding dial timeout) --debug[=false] enable client-side debug logging --dial-timeout=2s dial timeout for client connections -d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints --endpoints=[127.0.0.1:2379] gRPC endpoints --hex[=false] print byte strings as hex encoded strings --insecure-discovery[=true] accept insecure SRV records describing cluster endpoints --insecure-skip-tls-verify[=false] skip server certificate verification --insecure-transport[=true] disable transport security for client connections --keepalive-time=2s keepalive time for client connections --keepalive-timeout=6s keepalive timeout for client connections --key="" identify secure client using this TLS key file --user="" username[:password] for authentication (prompt if password is not supplied) -w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)
通过etcdctl设置域名解析:
(1)A记录
删除记录
[root@dns-1 ~]# etcdctl get --prefix Error: range command needs arguments. [root@dns-1 ~]# etcdctl get / --prefix /skydns/com/test1/www/v4 {"host":"1.1.2.3"} /skydns/migu/test/www/v4 {"host":"1.1.1.1"} /skydns/migu/test1/www/v4 {"host":"1.1.2.2"} [root@dns-1 ~]# etcdctl del /skydns/com/ 0 [root@dns-1 ~]# etcdctl get / --prefix /skydns/com/test1/www/v4 {"host":"1.1.2.3"} /skydns/migu/test/www/v4 {"host":"1.1.1.1"} /skydns/migu/test1/www/v4 {"host":"1.1.2.2"} [root@dns-1 ~]# [root@dns-1 ~]# etcdctl del /skydns/com/ -p Error: unknown shorthand flag: 'p' in -p NAME: del - Removes the specified key or range of keys [key, range_end) USAGE: etcdctl del [options] <key> [range_end] OPTIONS: --from-key[=false] delete keys that are greater than or equal to the given key using byte compare --prefix[=false] delete keys with matching prefix --prev-kv[=false] return deleted key-value pairs GLOBAL OPTIONS: --cacert="" verify certificates of TLS-enabled secure servers using this CA bundle --cert="" identify secure client using this TLS certificate file --command-timeout=5s timeout for short running command (excluding dial timeout) --debug[=false] enable client-side debug logging --dial-timeout=2s dial timeout for client connections -d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints --endpoints=[127.0.0.1:2379] gRPC endpoints --hex[=false] print byte strings as hex encoded strings --insecure-discovery[=true] accept insecure SRV records describing cluster endpoints --insecure-skip-tls-verify[=false] skip server certificate verification --insecure-transport[=true] disable transport security for client connections --keepalive-time=2s keepalive time for client connections --keepalive-timeout=6s keepalive timeout for client connections --key="" identify secure client using this TLS key file --user="" username[:password] for authentication (prompt if password is not supplied) -w, --write-out="simple" set the output format (fields, json, protobuf, simple, table) Error: unknown shorthand flag: 'p' in -p [root@dns-1 ~]# [root@dns-1 ~]# etcdctl del /skydns/com/ --prefix 1 [root@dns-1 ~]# [root@dns-1 ~]# etcdctl get / --prefix /skydns/migu/test/www/v4 {"host":"1.1.1.1"} /skydns/migu/test1/www/v4 {"host":"1.1.2.2"} [root@dns-1 ~]# [root@dns-1 ~]# etcdctl del / --prefix 2 [root@dns-1 ~]# [root@dns-1 ~]# etcdctl get / --prefix [root@dns-1 ~]#
非常奇怪,两个节点是集群,在一个节点进行数据的增加删除,其他节点会同步进行的,但在节点2删除数据后节点1上的数据还在。在之前集群状态是正常的,数据也是在一个几点添加的,后来为了测试 snapshot restore的功能,在做了 snapshot save后删除了源数据,并进行了 snapshot restore,修改数据文件属主属组为etcd,并成功在两个节点启动etcd,测试数据已经恢复。所以并没有查看集群的同步状态。、因为etcd集群需要超过半数的节点存活,否则整个集群将不可用。
[root@dns-2 ~]# etcdctl get / --prefix /skydns/com/test1/www/v4 {"host":"1.1.2.3"} /skydns/migu/test/www/v4 {"host":"1.1.1.1"} /skydns/migu/test1/www/v4 {"host":"1.1.2.2"} [root@dns-2 ~]# etcdctl member list 8e9e05c52164694d, started, etcd_dns_node1, http://localhost:2380, http://10.180.11.3:2379 [root@dns-2 ~]# etcdctl endpoint status 127.0.0.1:2379, 8e9e05c52164694d, 3.3.11, 16 kB, true, 2, 4 [root@dns-2 ~]# etcdctl endpoint health 127.0.0.1:2379 is healthy: successfully committed proposal: took = 500.505µs [root@dns-2 ~]# etcdctl endpoint health --write-out="table" 127.0.0.1:2379 is healthy: successfully committed proposal: took = 503.821µs [root@dns-2 ~]# etcdctl endpoint health --write-out="table" 127.0.0.1:2379 is healthy: successfully committed proposal: took = 1.066859ms [root@dns-2 ~]# [root@dns-2 ~]# etcdctl --write-out="table" endpoint health 127.0.0.1:2379 is healthy: successfully committed proposal: took = 492.012µs [root@dns-2 ~]# [root@dns-2 ~]# etcdctl --write-out="table" endpoint status +----------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +----------------+------------------+---------+---------+-----------+-----------+------------+ | 127.0.0.1:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 | +----------------+------------------+---------+---------+-----------+-----------+------------+ [root@dns-2 ~]# [root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2,10.180.11.3 endpoint status Error: dial tcp: address 10.180.11.3: missing port in address [root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status +------------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +------------------+------------------+---------+---------+-----------+-----------+------------+ | 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 7 | | 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 | +------------------+------------------+---------+---------+-----------+-----------+------------+ [root@dns-2 ~]# [root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health 10.180.11.3:2379 is healthy: successfully committed proposal: took = 529.95µs 10.180.11.2:2379 is healthy: successfully committed proposal: took = 472.66
好吧,那我们重启一下etcd看看会是什么情况
[root@dns-2 ~]# systemctl restart etcd [root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health 10.180.11.2:2379 is healthy: successfully committed proposal: took = 513.726µs 10.180.11.3:2379 is healthy: successfully committed proposal: took = 613.431µs [root@dns-1 ~]# systemctl restart etcd [root@dns-1 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status +------------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +------------------+------------------+---------+---------+-----------+-----------+------------+ | 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 9 | | 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 | +------------------+------------------+---------+---------+-----------+-----------+------------+ [root@dns-1 ~]# [root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status +------------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +------------------+------------------+---------+---------+-----------+-----------+------------+ | 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 7 | | 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 | +------------------+------------------+---------+---------+-----------+-----------+------------+ [root@dns-2 ~]# etcdctl member list 8e9e05c52164694d, started, etcd_dns_node1, http://localhost:2380, http://10.180.11.3:2379
可以正常重启,但集群还是分裂的,那我们尝试在节点2将节点1加入集群,因为两边数据不一致,估计不会成功
[root@dns-2 ~]# etcdctl member add 8e9e05c52164694d Error: member peer urls not provided. [root@dns-2 ~]# etcdctl member add -h NAME: member add - Adds a member into the cluster USAGE: etcdctl member add <memberName> [options] OPTIONS: --peer-urls="" comma separated peer URLs for the new member. GLOBAL OPTIONS: --cacert="" verify certificates of TLS-enabled secure servers using this CA bundle --cert="" identify secure client using this TLS certificate file --command-timeout=5s timeout for short running command (excluding dial timeout) --debug[=false] enable client-side debug logging --dial-timeout=2s dial timeout for client connections -d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints --endpoints=[127.0.0.1:2379] gRPC endpoints --hex[=false] print byte strings as hex encoded strings --insecure-discovery[=true] accept insecure SRV records describing cluster endpoints --insecure-skip-tls-verify[=false] skip server certificate verification --insecure-transport[=true] disable transport security for client connections --keepalive-time=2s keepalive time for client connections --keepalive-timeout=6s keepalive timeout for client connections --key="" identify secure client using this TLS key file --user="" username[:password] for authentication (prompt if password is not supplied) -w, --write-out="simple" set the output format (fields, json, protobuf, simple, table) [root@dns-2 ~]# etcdctl member add http://10.180.11.2:2380 Error: member peer urls not provided. [root@dns-2 ~]# etcdctl member add --peer-urls="http://10.180.11.2:2380" Error: member name not provided. [root@dns-2 ~]# etcdctl member add etcd-dns-cluster --peer-urls="http://10.180.11.2:2380" Member 5d81d60b777579ed added to cluster cdf818194e3a8c32 ETCD_NAME="etcd-dns-cluster" ETCD_INITIAL_CLUSTER="etcd-dns-cluster=http://10.180.11.2:2380,etcd_dns_node1=http://localhost:2380" ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380" ETCD_INITIAL_CLUSTER_STATE="existing" [root@dns-2 ~]# [root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status +------------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +------------------+------------------+---------+---------+-----------+-----------+------------+ | 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 9 | | 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | false | 3 | 7 | +------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-2 ~]# etcdctl get / --prefix Error: context deadline exceeded [root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health 10.180.11.2:2379 is healthy: successfully committed proposal: took = 552.875µs 10.180.11.3:2379 is unhealthy: failed to commit proposal: context deadline exceeded Error: unhealthy cluster [root@dns-2 ~]# [root@dns-1 ~]# etcdctl get / --prefix [root@dns-1 ~]#
关闭两个节点etcd,先启节点1
[root@dns-1 ~]# systemctl stop etcd [root@dns-1 ~]# systemctl start etcd [root@dns-1 ~]# etcdctl get / --prefix [root@dns-1 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status Failed to get the status of endpoint 10.180.11.3:2379 (context deadline exceeded) +------------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +------------------+------------------+---------+---------+-----------+-----------+------------+ | 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 5 | 13 | +------------------+------------------+---------+---------+-----------+-----------+------------+
再启节点2
[root@dns-2 ~]# systemctl start etcd Job for etcd.service failed because a timeout was exceeded. See "systemctl status etcd.service" and "journalctl -xe" for details.
节点1使用之前创建的快照恢复到和节点2一样,并重启etcd
[root@dns-1 ~]# systemctl stop etcd [root@dns-1 ~]# cd /home/etcd/ [root@dns-1 etcd]# ll total 0 drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup [root@dns-1 etcd]# mv data.etcd data.etcd.20200528.bak
[root@dns-1 etcd]# etcdctl snapshot restor -h NAME: snapshot restore - Restores an etcd member snapshot to an etcd directory USAGE: etcdctl snapshot restore <filename> [options] OPTIONS: --data-dir="" Path to the data directory --initial-advertise-peer-urls="http://localhost:2380" List of this member's peer URLs to advertise to the rest of the cluster --initial-cluster="default=http://localhost:2380" Initial cluster configuration for restore bootstrap --initial-cluster-token="etcd-cluster" Initial cluster token for the etcd cluster during restore bootstrap --name="default" Human-readable name for this member --skip-hash-check[=false] Ignore snapshot integrity hash value (required if copied from data directory) --wal-dir="" Path to the WAL directory (use --data-dir if none given) GLOBAL OPTIONS: --cacert="" verify certificates of TLS-enabled secure servers using this CA bundle --cert="" identify secure client using this TLS certificate file --command-timeout=5s timeout for short running command (excluding dial timeout) --debug[=false] enable client-side debug logging --dial-timeout=2s dial timeout for client connections -d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints --endpoints=[127.0.0.1:2379] gRPC endpoints --hex[=false] print byte strings as hex encoded strings --insecure-discovery[=true] accept insecure SRV records describing cluster endpoints --insecure-skip-tls-verify[=false] skip server certificate verification --insecure-transport[=true] disable transport security for client connections --keepalive-time=2s keepalive time for client connections --keepalive-timeout=6s keepalive timeout for client connections --key="" identify secure client using this TLS key file --user="" username[:password] for authentication (prompt if password is not supplied) -w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)
[root@dns-1 etcd]# etcdctl snapshot restor /home/etcd/etcd_backup/etcd_20200527113646.db --data-dir="/home/etcd/data.etcd" 2020-05-28 11:09:15.687417 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32 [root@dns-1 etcd]# [root@dns-1 etcd]# ll total 0 drwx------ 3 root root 20 May 28 11:09 data.etcd drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd.20200528.bak drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup [root@dns-1 etcd]# chown etcd:etcd data.etcd [root@dns-1 etcd]# ll total 0 drwx------ 3 etcd etcd 20 May 28 11:09 data.etcd drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd.20200528.bak drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup [root@dns-1 etcd]# [root@dns-1 etcd]# systemctl start etcd Job for etcd.service failed because the control process exited with error code. See "systemctl status etcd.service" and "journalctl -xe" for details. [root@dns-1 etcd]# journalctl -xe -- The result is failed. May 28 11:09:56 dns-1.novalocal systemd[1]: Unit etcd.service entered failed state. May 28 11:09:56 dns-1.novalocal systemd[1]: etcd.service failed. May 28 11:09:57 dns-1.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart. May 28 11:09:57 dns-1.novalocal systemd[1]: start request repeated too quickly for etcd.service May 28 11:09:57 dns-1.novalocal systemd[1]: Failed to start Etcd Server. -- Subject: Unit etcd.service has failed -- Defined-By: systemd -- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel -- -- Unit etcd.service has failed. -- -- The result is failed. May 28 11:09:57 dns-1.novalocal systemd[1]: Unit etcd.service entered failed state. May 28 11:09:57 dns-1.novalocal systemd[1]: etcd.service failed. May 28 11:10:01 dns-1.novalocal systemd[1]: Started Session 5722 of user root. -- Subject: Unit session-5722.scope has finished start-up -- Defined-By: systemd -- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel -- -- Unit session-5722.scope has finished starting up. -- -- The start-up result is done. May 28 11:10:01 dns-1.novalocal systemd[1]: Starting Session 5722 of user root. -- Subject: Unit session-5722.scope has begun start-up -- Defined-By: systemd -- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel -- -- Unit session-5722.scope has begun starting up. May 28 11:10:01 dns-1.novalocal CROND[18120]: (root) CMD (/usr/lib64/sa/sa1 1 1) May 28 11:10:15 dns-1.novalocal sshd[18126]: Connection closed by 127.0.0.1 port 54794 [preauth] [root@dns-1 etcd]# [root@dns-1 etcd]# chown etcd:etcd data.etcd -R [root@dns-1 etcd]# [root@dns-1 etcd]# systemctl start etcd [root@dns-1 etcd]# [root@dns-1 etcd]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status Failed to get the status of endpoint 10.180.11.3:2379 (context deadline exceeded) +------------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +------------------+------------------+---------+---------+-----------+-----------+------------+ | 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 | +------------------+------------------+---------+---------+-----------+-----------+------------+ [root@dns-1 etcd]#
再启节点2
[root@dns-2 ~]# systemctl start etcd Job for etcd.service failed because a timeout was exceeded. See "systemctl status etcd.service" and "journalctl -xe" for details.
关掉node1,节点2 恢复快照重启
[root@dns-1 etcd]# systemctl stop etcd [root@dns-2 etcd]# rm -rf data.etcd [root@dns-2 etcd]# [root@dns-2 etcd]# ll total 20 drwx------. 3 etcd etcd 20 May 26 19:14 data.etcd.bak -rw-r-----. 1 root root 16416 May 27 11:45 etcd_20200527113646.db [root@dns-2 etcd]# [root@dns-2 etcd]# etcdctl snapshot restor /home/etcd/etcd_20200527113646.db --data-dir="/home/etcd/data.etcd" 2020-05-28 11:22:06.237276 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32 [root@dns-2 etcd]# [root@dns-2 etcd]# ll total 20 drwx------. 3 root root 20 May 28 11:22 data.etcd drwx------. 3 etcd etcd 20 May 26 19:14 data.etcd.bak -rw-r-----. 1 root root 16416 May 27 11:45 etcd_20200527113646.db [root@dns-2 etcd]# chown etcd:etcd data.etcd -R [root@dns-2 etcd]# systemctl start etcd
再启节点1
[root@dns-1 etcd]# systemctl start etcd [root@dns-1 etcd]# [root@dns-1 etcd]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status +------------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +------------------+------------------+---------+---------+-----------+-----------+------------+ | 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 | | 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 | +------------------+------------------+---------+---------+-----------+-----------+------------+ [root@dns-1 etcd]#
[root@dns-2 etcd]# etcdctl member add etcd-dns-cluster --peer-urls="http://10.180.11.2:2380" Member 871040fd87e765c5 added to cluster cdf818194e3a8c32 ETCD_NAME="etcd-dns-cluster" ETCD_INITIAL_CLUSTER="etcd-dns-cluster=http://10.180.11.2:2380,etcd_dns_node1=http://localhost:2380" ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380" ETCD_INITIAL_CLUSTER_STATE="existing" [root@dns-2 etcd]# [root@dns-2 etcd]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status +------------------+------------------+---------+---------+-----------+-----------+------------+ | ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX | +------------------+------------------+---------+---------+-----------+-----------+------------+ | 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 | | 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | false | 2 | 5 | +------------------+------------------+---------+---------+-----------+-----------+------------+ [root@dns-2 etcd]#
etcd集群添加节点
[root@dns-1 ~]# cat /etc/etcd/etcd.conf |grep -v "#" ETCD_DATA_DIR="/home/etcd/data.etcd" ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380" ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379" ETCD_NAME="etcd_dns_node2" ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380" ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379" ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380,etcd_dns_node3=http://10.180.11.17:2380" ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster" ETCD_INITIAL_CLUSTER_STATE="new" [root@dns-1 ~]# [root@dns-1 ~]# [root@dns-1 ~]# etcdctl member add --peer-urls="http://10.180.11.17:2380" Error: member name not provided. [root@dns-1 ~]# etcdctl member add etcd_dns_node3 --peer-urls="http://10.180.11.17:2380" Member 13db2c9d1758b274 added to cluster b45f8b203d965968 ETCD_NAME="etcd_dns_node3" ETCD_INITIAL_CLUSTER="etcd_dns_node3=http://10.180.11.17:2380,etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380" ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.17:2380" ETCD_INITIAL_CLUSTER_STATE="existing" [root@dns-1 ~]# [root@dns-1 ~]# [root@dns-1 ~]# etcdctl member list 13db2c9d1758b274, unstarted, , http://10.180.11.17:2380, 2820e5fdaeb09ba6, started, etcd_dns_node1, http://10.180.11.3:2380, http://10.180.11.3:2379 523a2b4083effb13, started, etcd_dns_node2, http://10.180.11.2:2380, http://10.180.11.2:2379
启动新节点
[root@dns-3 ~]# systemctl start etcd Job for etcd.service failed because the control process exited with error code. See "systemctl status etcd.service" and "journalctl -xe" for details. [root@dns-3 ~]# [root@dns-3 ~]# [root@dns-3 ~]# journalctl -xe Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server. -- Subject: Unit etcd.service has failed -- Defined-By: systemd -- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel -- -- Unit etcd.service has failed. -- -- The result is failed. Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state. Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed. Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart. Jul 13 22:23:41 dns-3.novalocal systemd[1]: Starting Etcd Server... -- Subject: Unit etcd.service has begun start-up -- Defined-By: systemd -- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel -- -- Unit etcd.service has begun starting up. Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_ADVERTISE_CLIENT_URLS=http://10.180.11.17:2379 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_ADVERTISE_PEER_URLS=http://10.180.11.17:2380 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER=etcd_dns_node1=http://10.180.11.3:2380,etc Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER_STATE=existing Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER_TOKEN=etcd-dns-cluster Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_NAME, but unused: shadowed by corresponding flag Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_DATA_DIR, but unused: shadowed by corresponding flag Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_LISTEN_CLIENT_URLS, but unused: shadowed by corresponding flag Jul 13 22:23:41 dns-3.novalocal etcd[12609]: etcd Version: 3.3.11 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Git SHA: 2cf9e51 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Go Version: go1.10.3 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Go OS/Arch: linux/amd64 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: setting maximum number of CPUs to 4, total number of available CPUs is 4 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: listening for peers on http://0.0.0.0:2380 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: listening for client requests on 0.0.0.0:2379 Jul 13 22:23:41 dns-3.novalocal etcd[12609]: cannot access data directory: mkdir /home/etcd: permission denied Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service: main process exited, code=exited, status=1/FAILURE Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server. -- Subject: Unit etcd.service has failed -- Defined-By: systemd -- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel -- -- Unit etcd.service has failed. -- -- The result is failed. Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state. Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed. Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart. Jul 13 22:23:41 dns-3.novalocal systemd[1]: start request repeated too quickly for etcd.service Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server. -- Subject: Unit etcd.service has failed -- Defined-By: systemd -- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel -- -- Unit etcd.service has failed. -- -- The result is failed. Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state. Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed. [root@dns-3 ~]# [root@dns-3 ~]# grep -v "#" /etc/etcd/etcd.conf ETCD_DATA_DIR="/home/etcd/data.etcd" ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380" ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379" ETCD_NAME="etcd_dns_node3" ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.17:2380" ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.17:2379" ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380,etcd_dns_node3=http://10.180.11.17:2380" ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster" ETCD_INITIAL_CLUSTER_STATE="existing" [root@dns-3 ~]# mkdir -p /home/etcd/data.etcd [root@dns-3 ~]# id etcd uid=997(etcd) gid=994(etcd) groups=994(etcd) [root@dns-3 ~]# chown etcd:etcd /home/etcd -R [root@dns-3 ~]# systemctl start etcd [root@dns-3 ~]# systemctl status etcd ◠etcd.service - Etcd Server Loaded: loaded (/usr/lib/systemd/system/etcd.service; disabled; vendor preset: disabled) Active: active (running) since Mon 2020-07-13 22:25:46 CST; 18s ago Main PID: 12635 (etcd) CGroup: /system.slice/etcd.service └─12635 /usr/bin/etcd --name=etcd_dns_node3 --data-dir=/home/etcd/data.etcd --listen-client-urls=http://0.0.0.0:2379 Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 2820e5fdaeb09ba6 [http://10.180.11.3:2380] to cluster b45f8b203d965968 Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 523a2b4083effb13 [http://10.180.11.2:2380] to cluster b45f8b203d965968 Jul 13 22:25:46 dns-3.novalocal etcd[12635]: set the initial cluster version to 3.3 Jul 13 22:25:46 dns-3.novalocal etcd[12635]: enabled capabilities for version 3.3 Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 13db2c9d1758b274 [http://10.180.11.17:2380] to cluster b45f8b203d965968 Jul 13 22:25:46 dns-3.novalocal etcd[12635]: ready to serve client requests Jul 13 22:25:46 dns-3.novalocal etcd[12635]: published {Name:etcd_dns_node3 ClientURLs:[http://10.180.11.17:2379]} to cluster b45f8b203d965968 Jul 13 22:25:46 dns-3.novalocal etcd[12635]: serving insecure client requests on [::]:2379, this is strongly discouraged! Jul 13 22:25:46 dns-3.novalocal systemd[1]: Started Etcd Server. Jul 13 22:25:46 dns-3.novalocal etcd[12635]: 13db2c9d1758b274 initialzed peer connection; fast-forwarding 8 ticks (election ticks 10) with... peer(s) Hint: Some lines were ellipsized, use -l to show in full. [root@dns-3 ~]# etcdctl member list 13db2c9d1758b274, started, etcd_dns_node3, http://10.180.11.17:2380, http://10.180.11.17:2379 2820e5fdaeb09ba6, started, etcd_dns_node1, http://10.180.11.3:2380, http://10.180.11.3:2379 523a2b4083effb13, started, etcd_dns_node2, http://10.180.11.2:2380, http://10.180.11.2:2379 [root@dns-3 ~]#