etcd+coredns

简介

一般etcd集群需要3个或3个以上的奇数节点,我们这里使用两个节点作为测试

node1 10.180.11.3,node2 10.180.11.2

ETCD 、coredns安装


ETCD配置

  1 [root@node1 ~]# cat /etc/etcd/etcd.conf
  2 #[Member]
  3 #ETCD_CORS=""
  4 #ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
  5 ETCD_DATA_DIR="/home/etcd/data.etcd"
  6 #ETCD_WAL_DIR=""
  7 #ETCD_LISTEN_PEER_URLS="http://localhost:2380"
  8 ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
  9 ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
 10 #ETCD_MAX_SNAPSHOTS="5"
 11 #ETCD_MAX_WALS="5"
 12 ETCD_NAME="etcd_dns_node1"
 13 #ETCD_SNAPSHOT_COUNT="100000"
 14 #ETCD_HEARTBEAT_INTERVAL="100"
 15 #ETCD_ELECTION_TIMEOUT="1000"
 16 #ETCD_QUOTA_BACKEND_BYTES="0"
 17 #ETCD_MAX_REQUEST_BYTES="1572864"
 18 #ETCD_GRPC_KEEPALIVE_MIN_TIME="5s"
 19 #ETCD_GRPC_KEEPALIVE_INTERVAL="2h0m0s"
 20 #ETCD_GRPC_KEEPALIVE_TIMEOUT="20s"
 21 #
 22 #[Clustering]
 23 #ETCD_INITIAL_ADVERTISE_PEER_URLS="http://localhost:2380"
 24 ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.3:2380"
 25 #ETCD_ADVERTISE_CLIENT_URLS="http://localhost:2379"
 26 ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.3:2379"
 27 #ETCD_DISCOVERY=""
 28 #ETCD_DISCOVERY_FALLBACK="proxy"
 29 #ETCD_DISCOVERY_PROXY=""
 30 #ETCD_DISCOVERY_SRV=""
 31 ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380"
 32 ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
 33 ETCD_INITIAL_CLUSTER_STATE="new"
 34 #ETCD_STRICT_RECONFIG_CHECK="true"
 35 #ETCD_ENABLE_V2="true"
 36 #
 37 #[Proxy]
 38 #ETCD_PROXY="off"
 39 #ETCD_PROXY_FAILURE_WAIT="5000"
 40 #ETCD_PROXY_REFRESH_INTERVAL="30000"
 41 #ETCD_PROXY_DIAL_TIMEOUT="1000"
 42 #ETCD_PROXY_WRITE_TIMEOUT="5000"
 43 #ETCD_PROXY_READ_TIMEOUT="0"
 44 #
 45 #[Security]
 46 #ETCD_CERT_FILE=""
 47 #ETCD_KEY_FILE=""
 48 #ETCD_CLIENT_CERT_AUTH="false"
 49 #ETCD_TRUSTED_CA_FILE=""
 50 #ETCD_AUTO_TLS="false"
 51 #ETCD_PEER_CERT_FILE=""
 52 #ETCD_PEER_KEY_FILE=""
 53 #ETCD_PEER_CLIENT_CERT_AUTH="false"
 54 #ETCD_PEER_TRUSTED_CA_FILE=""
 55 #ETCD_PEER_AUTO_TLS="false"
 56 #
 57 #[Logging]
 58 #ETCD_DEBUG="false"
 59 #ETCD_LOG_PACKAGE_LEVELS=""
 60 #ETCD_LOG_OUTPUT="default"
 61 #
 62 #[Unsafe]
 63 #ETCD_FORCE_NEW_CLUSTER="false"
 64 #
 65 #[Version]
 66 #ETCD_VERSION="false"
 67 #ETCD_AUTO_COMPACTION_RETENTION="0"
 68 #
 69 #[Profiling]
 70 #ETCD_ENABLE_PPROF="false"
 71 #ETCD_METRICS="basic"
 72 #
 73 #[Auth]
 74 #ETCD_AUTH_TOKEN="simple"
 75 [root@node1 ~]#
 76 
 77 
 78 [root@node1 ~]# cat /etc/etcd/etcd.conf
 79 #[Member]
 80 ETCD_DATA_DIR="/home/etcd/data.etcd"
 81 ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
 82 ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
 83 ETCD_NAME="etcd_dns_node2"
 84 #
 85 #[Clustering]
 86 ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
 87 ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379"
 88 ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380"
 89 ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
 90 ETCD_INITIAL_CLUSTER_STATE="existing"
 91 #
 92 #[Member]
 93 #ETCD_CORS=""
 94 #ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
 95 #ETCD_DATA_DIR="/home/etcd/data.etcd"
 96 #ETCD_WAL_DIR=""
 97 #ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
 98 #ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
 99 #ETCD_MAX_SNAPSHOTS="5"
100 #ETCD_MAX_WALS="5"
101 #ETCD_NAME="etcd_dns_node2"
102 #ETCD_SNAPSHOT_COUNT="100000"
103 #ETCD_HEARTBEAT_INTERVAL="100"
104 #ETCD_ELECTION_TIMEOUT="1000"
105 #ETCD_QUOTA_BACKEND_BYTES="0"
106 #ETCD_MAX_REQUEST_BYTES="1572864"
107 #ETCD_GRPC_KEEPALIVE_MIN_TIME="5s"
108 #ETCD_GRPC_KEEPALIVE_INTERVAL="2h0m0s"
109 #ETCD_GRPC_KEEPALIVE_TIMEOUT="20s"
110 #
111 #[Clustering]
112 #ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
113 #ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379"
114 #ETCD_DISCOVERY=""
115 #ETCD_DISCOVERY_FALLBACK="proxy"
116 #ETCD_DISCOVERY_PROXY=""
117 #ETCD_DISCOVERY_SRV=""
118 #ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380"
119 #ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
120 #ETCD_INITIAL_CLUSTER_STATE="existing"
121 #ETCD_STRICT_RECONFIG_CHECK="true"
122 #ETCD_ENABLE_V2="true"
123 #
124 #[Proxy]
125 #ETCD_PROXY="off"
126 #ETCD_PROXY_FAILURE_WAIT="5000"
127 #ETCD_PROXY_REFRESH_INTERVAL="30000"
128 #ETCD_PROXY_DIAL_TIMEOUT="1000"
129 #ETCD_PROXY_WRITE_TIMEOUT="5000"
130 #ETCD_PROXY_READ_TIMEOUT="0"
131 #
132 #[Security]
133 #ETCD_CERT_FILE=""
134 #ETCD_KEY_FILE=""
135 #ETCD_CLIENT_CERT_AUTH="false"
136 #ETCD_TRUSTED_CA_FILE=""
137 #ETCD_AUTO_TLS="false"
138 #ETCD_PEER_CERT_FILE=""
139 #ETCD_PEER_KEY_FILE=""
140 #ETCD_PEER_CLIENT_CERT_AUTH="false"
141 #ETCD_PEER_TRUSTED_CA_FILE=""
142 #ETCD_PEER_AUTO_TLS="false"
143 #
144 #[Logging]
145 #ETCD_DEBUG="false"
146 #ETCD_LOG_PACKAGE_LEVELS=""
147 #ETCD_LOG_OUTPUT="default"
148 #
149 #[Unsafe]
150 #ETCD_FORCE_NEW_CLUSTER="false"
151 #
152 #[Version]
153 #ETCD_VERSION="false"
154 #ETCD_AUTO_COMPACTION_RETENTION="0"
155 #
156 #[Profiling]
157 #ETCD_ENABLE_PPROF="false"
158 #ETCD_METRICS="basic"
159 #
160 #[Auth]
161 #ETCD_AUTH_TOKEN="simple"
162 [root@node1 ~]#

 


coredns配置

[root@dns-1 ~]# cat /etc/coredns/Corefile
.:53 {
etcd {
stubzones
path /skydns
endpoint http://10.180.11.3:2379
upstream 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf 
fallthrough
}
#cache 160
forward . 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf
log
errors
}
[root@dns-1 ~]#

 

[root@dns-2 ~]# cat /etc/coredns/Corefile
.:53 {
etcd {
stubzones
path /skydns
endpoint http://10.180.11.3:2379
upstream 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf 
fallthrough
}
#cache 160
forward . 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf
log
errors
}
[root@dns-2 ~]#

 

coredns注册成系统服务

在两个节点将coredns注册成服务

[root@dns-1 ~]# cat /usr/lib/systemd/system/coredns.service
[Unit]
Description=CoreDNS DNS server
Documentation=https://coredns.io
After=network.target

[Service]
PermissionsStartOnly=true
LimitNOFILE=1048576
LimitNPROC=512
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
AmbientCapabilities=CAP_NET_BIND_SERVICE
NoNewPrivileges=true
User=root
ExecStart=/usr/sbin/coredns -quiet -conf /etc/coredns/Corefile
ExecReload=/bin/kill -SIGUSR1 $MAINPID
Restart=on-failure

[Install]
WantedBy=multi-user.target
[root@dns-1 ~]#

 

配置开机启动

systemctl enable etcd
systemctl enable coredns

配置etcd API 版本为3

 

[root@dns-1 ~]# cat .bash_profile
export ETCDCTL_API=3

 

启动etcd 和coredns

 

systemctl start etcd
systemctl start coredns

 

etcdct l 命令帮助

 

[root@dns-1 ~]# etcdctl -h
NAME:
etcdctl - A simple command line client for etcd3.

USAGE:
etcdctl

VERSION:
3.3.11

API VERSION:
3.3


COMMANDS:
get Gets the key or a range of keys
put Puts the given key into the store
del Removes the specified key or range of keys [key, range_end)
txn Txn processes all the requests in one transaction
compaction Compacts the event history in etcd
alarm disarm Disarms all alarms
alarm list Lists all alarms
defrag Defragments the storage of the etcd members with given endpoints
endpoint health Checks the healthiness of endpoints specified in `--endpoints` flag
endpoint status Prints out the status of endpoints specified in `--endpoints` flag
endpoint hashkv Prints the KV history hash for each endpoint in --endpoints
move-leader Transfers leadership to another etcd cluster member.
watch Watches events stream on keys or prefixes
version Prints the version of etcdctl
lease grant Creates leases
lease revoke Revokes leases
lease timetolive Get lease information
lease list List all active leases
lease keep-alive Keeps leases alive (renew)
member add Adds a member into the cluster
member remove Removes a member from the cluster
member update Updates a member in the cluster
member list Lists all members in the cluster
snapshot save Stores an etcd node backend snapshot to a given file
snapshot restore Restores an etcd member snapshot to an etcd directory
snapshot status Gets backend snapshot status of a given file
make-mirror Makes a mirror at the destination etcd cluster
migrate Migrates keys in a v2 store to a mvcc store
lock Acquires a named lock
elect Observes and participates in leader election
auth enable Enables authentication
auth disable Disables authentication
user add Adds a new user
user delete Deletes a user
user get Gets detailed information of a user
user list Lists all users
user passwd Changes password of user
user grant-role Grants a role to a user
user revoke-role Revokes a role from a user
role add Adds a new role
role delete Deletes a role
role get Gets detailed information of a role
role list Lists all roles
role grant-permission Grants a key to a role
role revoke-permission Revokes a key from a role
check perf Check the performance of the etcd cluster
help Help about any command

OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)

 

通过etcdctl设置域名解析:


(1)A记录


删除记录

 

[root@dns-1 ~]# etcdctl get --prefix
Error: range command needs arguments.
[root@dns-1 ~]# etcdctl get / --prefix
/skydns/com/test1/www/v4
{"host":"1.1.2.3"}
/skydns/migu/test/www/v4
{"host":"1.1.1.1"}
/skydns/migu/test1/www/v4
{"host":"1.1.2.2"}
[root@dns-1 ~]# etcdctl del /skydns/com/
0
[root@dns-1 ~]# etcdctl get / --prefix
/skydns/com/test1/www/v4
{"host":"1.1.2.3"}
/skydns/migu/test/www/v4
{"host":"1.1.1.1"}
/skydns/migu/test1/www/v4
{"host":"1.1.2.2"}
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl del /skydns/com/ -p
Error: unknown shorthand flag: 'p' in -p
NAME:
del - Removes the specified key or range of keys [key, range_end)

USAGE:
etcdctl del [options] <key> [range_end]

OPTIONS:
--from-key[=false] delete keys that are greater than or equal to the given key using byte compare
--prefix[=false] delete keys with matching prefix
--prev-kv[=false] return deleted key-value pairs

GLOBAL OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)


Error: unknown shorthand flag: 'p' in -p
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl del /skydns/com/ --prefix
1
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl get / --prefix
/skydns/migu/test/www/v4
{"host":"1.1.1.1"}
/skydns/migu/test1/www/v4
{"host":"1.1.2.2"}
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl del / --prefix
2
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl get / --prefix
[root@dns-1 ~]#

 


非常奇怪,两个节点是集群,在一个节点进行数据的增加删除,其他节点会同步进行的,但在节点2删除数据后节点1上的数据还在。在之前集群状态是正常的,数据也是在一个几点添加的,后来为了测试 snapshot restore的功能,在做了 snapshot save后删除了源数据,并进行了 snapshot restore,修改数据文件属主属组为etcd,并成功在两个节点启动etcd,测试数据已经恢复。所以并没有查看集群的同步状态。、因为etcd集群需要超过半数的节点存活,否则整个集群将不可用。

[root@dns-2 ~]# etcdctl get / --prefix
/skydns/com/test1/www/v4
{"host":"1.1.2.3"}
/skydns/migu/test/www/v4
{"host":"1.1.1.1"}
/skydns/migu/test1/www/v4
{"host":"1.1.2.2"}
[root@dns-2 ~]# etcdctl member list
8e9e05c52164694d, started, etcd_dns_node1, http://localhost:2380, http://10.180.11.3:2379
[root@dns-2 ~]# etcdctl endpoint status
127.0.0.1:2379, 8e9e05c52164694d, 3.3.11, 16 kB, true, 2, 4
[root@dns-2 ~]# etcdctl endpoint health
127.0.0.1:2379 is healthy: successfully committed proposal: took = 500.505µs
[root@dns-2 ~]# etcdctl endpoint health --write-out="table"
127.0.0.1:2379 is healthy: successfully committed proposal: took = 503.821µs
[root@dns-2 ~]# etcdctl endpoint health --write-out="table"
127.0.0.1:2379 is healthy: successfully committed proposal: took = 1.066859ms
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" endpoint health
127.0.0.1:2379 is healthy: successfully committed proposal: took = 492.012µs
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" endpoint status
+----------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+----------------+------------------+---------+---------+-----------+-----------+------------+
| 127.0.0.1:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 |
+----------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2,10.180.11.3 endpoint status
Error: dial tcp: address 10.180.11.3: missing port in address
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 7 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health
10.180.11.3:2379 is healthy: successfully committed proposal: took = 529.95µs
10.180.11.2:2379 is healthy: successfully committed proposal: took = 472.66

 


好吧,那我们重启一下etcd看看会是什么情况

[root@dns-2 ~]# systemctl restart etcd
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health
10.180.11.2:2379 is healthy: successfully committed proposal: took = 513.726µs
10.180.11.3:2379 is healthy: successfully committed proposal: took = 613.431µs

[root@dns-1 ~]# systemctl restart etcd
[root@dns-1 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 9 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-1 ~]#

[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 7 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-2 ~]# etcdctl member list
8e9e05c52164694d, started, etcd_dns_node1, http://localhost:2380, http://10.180.11.3:2379

 

可以正常重启,但集群还是分裂的,那我们尝试在节点2将节点1加入集群,因为两边数据不一致,估计不会成功

 

[root@dns-2 ~]# etcdctl member add 8e9e05c52164694d
Error: member peer urls not provided.
[root@dns-2 ~]# etcdctl member add -h
NAME:
member add - Adds a member into the cluster

USAGE:
etcdctl member add <memberName> [options]

OPTIONS:
--peer-urls="" comma separated peer URLs for the new member.

GLOBAL OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)

[root@dns-2 ~]# etcdctl member add http://10.180.11.2:2380
Error: member peer urls not provided.
[root@dns-2 ~]# etcdctl member add --peer-urls="http://10.180.11.2:2380"
Error: member name not provided.
[root@dns-2 ~]# etcdctl member add etcd-dns-cluster --peer-urls="http://10.180.11.2:2380"
Member 5d81d60b777579ed added to cluster cdf818194e3a8c32

ETCD_NAME="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER="etcd-dns-cluster=http://10.180.11.2:2380,etcd_dns_node1=http://localhost:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
ETCD_INITIAL_CLUSTER_STATE="existing"
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 9 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | false | 3 | 7 |
+------------------+------------------+---------+---------+-----------+-----------+------------+

 



[root@dns-2 ~]# etcdctl get / --prefix
Error: context deadline exceeded
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health
10.180.11.2:2379 is healthy: successfully committed proposal: took = 552.875µs
10.180.11.3:2379 is unhealthy: failed to commit proposal: context deadline exceeded
Error: unhealthy cluster
[root@dns-2 ~]#
[root@dns-1 ~]# etcdctl get / --prefix
[root@dns-1 ~]#


关闭两个节点etcd,先启节点1

[root@dns-1 ~]# systemctl stop etcd
[root@dns-1 ~]# systemctl start etcd
[root@dns-1 ~]# etcdctl get / --prefix
[root@dns-1 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
Failed to get the status of endpoint 10.180.11.3:2379 (context deadline exceeded)
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 5 | 13 |
+------------------+------------------+---------+---------+-----------+-----------+------------+

 


再启节点2

[root@dns-2 ~]# systemctl start etcd
Job for etcd.service failed because a timeout was exceeded. See "systemctl status etcd.service" and "journalctl -xe" for details.

 


节点1使用之前创建的快照恢复到和节点2一样,并重启etcd

[root@dns-1 ~]# systemctl stop etcd
[root@dns-1 ~]# cd /home/etcd/
[root@dns-1 etcd]# ll
total 0
drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd
drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak
drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup
[root@dns-1 etcd]# mv data.etcd data.etcd.20200528.bak

 

 

[root@dns-1 etcd]# etcdctl snapshot restor -h
NAME:
snapshot restore - Restores an etcd member snapshot to an etcd directory

USAGE:
etcdctl snapshot restore <filename> [options]

OPTIONS:
--data-dir="" Path to the data directory
--initial-advertise-peer-urls="http://localhost:2380" List of this member's peer URLs to advertise to the rest of the cluster
--initial-cluster="default=http://localhost:2380" Initial cluster configuration for restore bootstrap
--initial-cluster-token="etcd-cluster" Initial cluster token for the etcd cluster during restore bootstrap
--name="default" Human-readable name for this member
--skip-hash-check[=false] Ignore snapshot integrity hash value (required if copied from data directory)
--wal-dir="" Path to the WAL directory (use --data-dir if none given)

GLOBAL OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)

 




[root@dns-1 etcd]# etcdctl snapshot restor /home/etcd/etcd_backup/etcd_20200527113646.db --data-dir="/home/etcd/data.etcd"
2020-05-28 11:09:15.687417 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32
[root@dns-1 etcd]#
[root@dns-1 etcd]# ll
total 0
drwx------ 3 root root 20 May 28 11:09 data.etcd
drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd.20200528.bak
drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak
drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup
[root@dns-1 etcd]# chown etcd:etcd data.etcd
[root@dns-1 etcd]# ll
total 0
drwx------ 3 etcd etcd 20 May 28 11:09 data.etcd
drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd.20200528.bak
drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak
drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup
[root@dns-1 etcd]#
[root@dns-1 etcd]# systemctl start etcd
Job for etcd.service failed because the control process exited with error code. See "systemctl status etcd.service" and "journalctl -xe" for details.
[root@dns-1 etcd]# journalctl -xe
-- The result is failed.
May 28 11:09:56 dns-1.novalocal systemd[1]: Unit etcd.service entered failed state.
May 28 11:09:56 dns-1.novalocal systemd[1]: etcd.service failed.
May 28 11:09:57 dns-1.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart.
May 28 11:09:57 dns-1.novalocal systemd[1]: start request repeated too quickly for etcd.service
May 28 11:09:57 dns-1.novalocal systemd[1]: Failed to start Etcd Server.
-- Subject: Unit etcd.service has failed
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has failed.
--
-- The result is failed.
May 28 11:09:57 dns-1.novalocal systemd[1]: Unit etcd.service entered failed state.
May 28 11:09:57 dns-1.novalocal systemd[1]: etcd.service failed.
May 28 11:10:01 dns-1.novalocal systemd[1]: Started Session 5722 of user root.
-- Subject: Unit session-5722.scope has finished start-up
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit session-5722.scope has finished starting up.
--
-- The start-up result is done.
May 28 11:10:01 dns-1.novalocal systemd[1]: Starting Session 5722 of user root.
-- Subject: Unit session-5722.scope has begun start-up
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit session-5722.scope has begun starting up.
May 28 11:10:01 dns-1.novalocal CROND[18120]: (root) CMD (/usr/lib64/sa/sa1 1 1)
May 28 11:10:15 dns-1.novalocal sshd[18126]: Connection closed by 127.0.0.1 port 54794 [preauth]
[root@dns-1 etcd]#
[root@dns-1 etcd]# chown etcd:etcd data.etcd -R
[root@dns-1 etcd]#
[root@dns-1 etcd]# systemctl start etcd
[root@dns-1 etcd]#
[root@dns-1 etcd]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
Failed to get the status of endpoint 10.180.11.3:2379 (context deadline exceeded)
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-1 etcd]#

 

再启节点2


[root@dns-2 ~]# systemctl start etcd
Job for etcd.service failed because a timeout was exceeded. See "systemctl status etcd.service" and "journalctl -xe" for details.

 


关掉node1,节点2 恢复快照重启

 

[root@dns-1 etcd]# systemctl stop etcd
[root@dns-2 etcd]# rm -rf data.etcd
[root@dns-2 etcd]#
[root@dns-2 etcd]# ll
total 20
drwx------. 3 etcd etcd 20 May 26 19:14 data.etcd.bak
-rw-r-----. 1 root root 16416 May 27 11:45 etcd_20200527113646.db
[root@dns-2 etcd]#
[root@dns-2 etcd]# etcdctl snapshot restor /home/etcd/etcd_20200527113646.db --data-dir="/home/etcd/data.etcd"
2020-05-28 11:22:06.237276 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32
[root@dns-2 etcd]#
[root@dns-2 etcd]# ll
total 20
drwx------. 3 root root 20 May 28 11:22 data.etcd
drwx------. 3 etcd etcd 20 May 26 19:14 data.etcd.bak
-rw-r-----. 1 root root 16416 May 27 11:45 etcd_20200527113646.db
[root@dns-2 etcd]# chown etcd:etcd data.etcd -R
[root@dns-2 etcd]# systemctl start etcd

 




再启节点1


[root@dns-1 etcd]# systemctl start etcd
[root@dns-1 etcd]#
[root@dns-1 etcd]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-1 etcd]#

 


[root@dns-2 etcd]# etcdctl member add etcd-dns-cluster --peer-urls="http://10.180.11.2:2380"
Member 871040fd87e765c5 added to cluster cdf818194e3a8c32

ETCD_NAME="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER="etcd-dns-cluster=http://10.180.11.2:2380,etcd_dns_node1=http://localhost:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
ETCD_INITIAL_CLUSTER_STATE="existing"
[root@dns-2 etcd]#
[root@dns-2 etcd]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | false | 2 | 5 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-2 etcd]#

 

etcd集群添加节点

 

[root@dns-1 ~]# cat /etc/etcd/etcd.conf |grep -v "#"
ETCD_DATA_DIR="/home/etcd/data.etcd"
ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
ETCD_NAME="etcd_dns_node2"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379"
ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380,etcd_dns_node3=http://10.180.11.17:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
[root@dns-1 ~]#
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl member add --peer-urls="http://10.180.11.17:2380"
Error: member name not provided.
[root@dns-1 ~]# etcdctl member add etcd_dns_node3 --peer-urls="http://10.180.11.17:2380"
Member 13db2c9d1758b274 added to cluster b45f8b203d965968

ETCD_NAME="etcd_dns_node3"
ETCD_INITIAL_CLUSTER="etcd_dns_node3=http://10.180.11.17:2380,etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.17:2380"
ETCD_INITIAL_CLUSTER_STATE="existing"
[root@dns-1 ~]#
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl member list
13db2c9d1758b274, unstarted, , http://10.180.11.17:2380,
2820e5fdaeb09ba6, started, etcd_dns_node1, http://10.180.11.3:2380, http://10.180.11.3:2379
523a2b4083effb13, started, etcd_dns_node2, http://10.180.11.2:2380, http://10.180.11.2:2379

 



启动新节点

[root@dns-3 ~]# systemctl start etcd
Job for etcd.service failed because the control process exited with error code. See "systemctl status etcd.service" and "journalctl -xe" for details.
[root@dns-3 ~]#
[root@dns-3 ~]#
[root@dns-3 ~]# journalctl -xe
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server.
-- Subject: Unit etcd.service has failed
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has failed.
--
-- The result is failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Starting Etcd Server...
-- Subject: Unit etcd.service has begun start-up
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has begun starting up.
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_ADVERTISE_CLIENT_URLS=http://10.180.11.17:2379
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_ADVERTISE_PEER_URLS=http://10.180.11.17:2380
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER=etcd_dns_node1=http://10.180.11.3:2380,etc
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER_STATE=existing
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER_TOKEN=etcd-dns-cluster
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_NAME, but unused: shadowed by corresponding flag
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_DATA_DIR, but unused: shadowed by corresponding flag
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_LISTEN_CLIENT_URLS, but unused: shadowed by corresponding flag
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: etcd Version: 3.3.11
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Git SHA: 2cf9e51
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Go Version: go1.10.3
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Go OS/Arch: linux/amd64
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: setting maximum number of CPUs to 4, total number of available CPUs is 4
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: listening for peers on http://0.0.0.0:2380
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: listening for client requests on 0.0.0.0:2379
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: cannot access data directory: mkdir /home/etcd: permission denied
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service: main process exited, code=exited, status=1/FAILURE
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server.
-- Subject: Unit etcd.service has failed
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has failed.
--
-- The result is failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: start request repeated too quickly for etcd.service
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server.
-- Subject: Unit etcd.service has failed
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has failed.
--
-- The result is failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed.
[root@dns-3 ~]#
[root@dns-3 ~]# grep -v "#" /etc/etcd/etcd.conf
ETCD_DATA_DIR="/home/etcd/data.etcd"
ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
ETCD_NAME="etcd_dns_node3"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.17:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.17:2379"
ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380,etcd_dns_node3=http://10.180.11.17:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER_STATE="existing"
[root@dns-3 ~]# mkdir -p /home/etcd/data.etcd
[root@dns-3 ~]# id etcd
uid=997(etcd) gid=994(etcd) groups=994(etcd)
[root@dns-3 ~]# chown etcd:etcd /home/etcd -R
[root@dns-3 ~]# systemctl start etcd
[root@dns-3 ~]# systemctl status etcd
â— etcd.service - Etcd Server
Loaded: loaded (/usr/lib/systemd/system/etcd.service; disabled; vendor preset: disabled)
Active: active (running) since Mon 2020-07-13 22:25:46 CST; 18s ago
Main PID: 12635 (etcd)
CGroup: /system.slice/etcd.service
└─12635 /usr/bin/etcd --name=etcd_dns_node3 --data-dir=/home/etcd/data.etcd --listen-client-urls=http://0.0.0.0:2379

Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 2820e5fdaeb09ba6 [http://10.180.11.3:2380] to cluster b45f8b203d965968
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 523a2b4083effb13 [http://10.180.11.2:2380] to cluster b45f8b203d965968
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: set the initial cluster version to 3.3
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: enabled capabilities for version 3.3
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 13db2c9d1758b274 [http://10.180.11.17:2380] to cluster b45f8b203d965968
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: ready to serve client requests
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: published {Name:etcd_dns_node3 ClientURLs:[http://10.180.11.17:2379]} to cluster b45f8b203d965968
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: serving insecure client requests on [::]:2379, this is strongly discouraged!
Jul 13 22:25:46 dns-3.novalocal systemd[1]: Started Etcd Server.
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: 13db2c9d1758b274 initialzed peer connection; fast-forwarding 8 ticks (election ticks 10) with... peer(s)
Hint: Some lines were ellipsized, use -l to show in full.
[root@dns-3 ~]# etcdctl member list
13db2c9d1758b274, started, etcd_dns_node3, http://10.180.11.17:2380, http://10.180.11.17:2379
2820e5fdaeb09ba6, started, etcd_dns_node1, http://10.180.11.3:2380, http://10.180.11.3:2379
523a2b4083effb13, started, etcd_dns_node2, http://10.180.11.2:2380, http://10.180.11.2:2379
[root@dns-3 ~]#

 

上一篇:sys&faker&jsonpath模块、异常处理、多线程、多进程


下一篇:luogu p2367 语文成绩