- 先按node的方式初始化master1节点,使用 kubeadm join 命令
-
拷贝master上的配置到master1上
[root@master1 ~]scp root@192.168.0.250:/etc/kubernetes/kubelet.conf /etc/kubernetes/kubelet.conf
[root@master1 ~]scp -r root@192.168.0.250:/etc/kubernetes/pki /etc/kubernetes
[root@master1 ~]scp root@192.168.0.250:/etc/kubernetes/manifests/etcd.yaml ./对etcd.yaml 做些修改 - etcd - --name=etcd-master1 - --initial-advertise-peer-urls=http://192.168.0.249:2380 - --listen-peer-urls=http://192.168.0.249:2380 - --listen-client-urls=http://192.168.0.249:2379,http://127.0.0.1:2379 - --advertise-client-urls=http://192.168.0.249:2379 - --initial-cluster=etcd-master1=http://192.168.0.249:2380 - --initial-cluster-state=new - --data-dir=/var/lib/etcd
[root@master1 ~]cp etcd.yaml /etc/kubernetes/manifests/etcd.yaml
[root@master ~]# systemctl daemon-reload
[root@master1 ~]# systemctl restart kubelet
kubectl exec -it etcd-master1.k8s sh -n kube-system
export ETCDCTL_API=3
etcdctl member list如果没有形成集群,删除rm -rf /var/lib/etcd/member/
systemctl restart kubelet
[root@master ~]# kubectl get pods --namespace=kube-systemNAME READY STATUS RESTARTS AGE
etcd-master.k8s 1/1 Running 2 4d
etcd-master1.k8s 1/1 Running 0 13m
kube-apiserver-master.k8s 1/1 Running 2 4d
kube-controller-manager-master.k8s 1/1 Running 2 4d
kube-dns-3913472980-tsq3r 3/3 Running 13 4d
kube-flannel-ds-fm014 2/2 Running 12 4d
kube-flannel-ds-lcqrl 2/2 Running 6 4d
kube-flannel-ds-lxf1b 2/2 Running 0 20m
kube-proxy-8fppg 1/1 Running 2 4d
kube-proxy-bpn98 1/1 Running 6 4d
kube-proxy-gssrj 1/1 Running 0 20m
kube-scheduler-master.k8s 1/1 Running 2 4d
kubernetes-dashboard-2039414953-r0pc3 1/1 Running 0 1d
kubernetes-dashboard-2066150588-7z6vf 1/1 Running 0 1d -
同步master上的etcd数据到master1上的etcd
在master1上无法访问master上的etcd时,需要修改master上的etcd.yaml[root@master ~]# vim /etc/kubernetes/manifests/etcd.yaml
- --listen-client-urls=http://127.0.0.1:2379,http://master.k8s:2379
[root@master ~]# systemctl daemon-reload
[root@master ~]# systemctl restart kubelet稍等一会,等待kubelet重启
[root@master rpm]# kubectl exec -it etcd-master1.k8s -n kube-system sh
cd /usr/local/bin
/usr/local/bin # export ETCDCTL_API=3
/usr/local/bin/# etcdctl endpoint status --endpoints=master1.k8s:2379192.168.0.250:2379, 8e9e05c52164694d, 3.0.17, 3.5 MB, true, 3, 14911
/usr/local/bin # etcdctl endpoint status --endpoints=127.0.0.1:2379
127.0.0.1:2379, 5e31d25f1f5fbb7f, 3.0.17, 25 kB, true, 2, 1434
/usr/local/bin # etcdctl make-mirror 127.0.0.1:2379 --endpoints=master1.k8s:2379
Error: etcdserver: duplicate key given in txn request
忽略这个error…
/usr/local/bin # etcdctl get --from-key /api/v2/registry/clusterrolebindings/cluster-admin --endpoints=master.k8s:2379……
compact_rev_key
6104/usr/local/bin # etcdctl get --from-key /api/v2/registry/clusterrolebindings/cluster-admin --endpoints=127.0.0.1:2379
……
compact_rev_key
6104两个数据一致都是6104,说明数据全部同步过来了。
-
将master上的api-server连接到master1上的etcd-server
[root@master ~]# vim /etc/kubernetes/manifests/kube-apiserver.yaml
- --etcd-servers=http://127.0.0.1:2379修改为
- --etcd-servers=http://master1.k8s:2379[root@master ~]# systemctl restart kubelet
出现下面错误的原因是因为kube-apiserver-master.k8s会重启,等一会就会好了
[root@master ~]# kubectl get pods --namespace=kube-system
The connection to the server 192.168.0.250:6443 was refused - did you specify the right host or port? -
重建master上的etcd
[root@master ~]# mv /etc/kubernetes/manifests/etcd.yaml ./
[root@master ~]# rm -fr /var/lib/etcd[root@master ~]# kubectl exec -it etcd-master1.k8s sh -n kube-system
cd /usr/local/bin/
/usr/local/bin # export ETCDCTL_API=3
/usr/local/bin # etcdctl member add etcd-master --peer-urls=http://master.k8s:2380[root@master ~]# vim etcd.yaml
- etcd
- --name=etcd-master
- --initial-advertise-peer-urls=http://192.168.0.250:2380
- --listen-peer-urls=http://192.168.0.250:2380
- --listen-client-urls=http://192.168.0.250:2379,http://127.0.0.1:2379
- --advertise-client-urls=http://192.128.0.250:2379
- --initial-cluster=etcd-master=http://192.168.0.250:2380,etcd-master1=http://192.168.0.249:2380,etcd-master2=http://192.168.0.251:2380
- --initial-cluster-state=existing
- --data-dir=/var/lib/etcd[root@master ~]# cp etcd.yaml /etc/kubernetes/manifests/etcd.yaml
[root@master ~]# systemctl daemon-reload
[root@master ~]# systemctl restart kubelet等会块看到etcd-master.k8s 这个pod起来
[root@master ~]# kubectl exec -it etcd-master.k8s sh -n kube-system
/ cd /usr/local/bin/
/usr/local/bin # export ETCDCTL_API=3
/usr/local/bin # ./etcdctl endpoint status --endpoints=192.168.0.249:2379,192.168.0.250:2379192.168.0.249:2379, 4cfbf6559386ae97, 3.0.17, 2.0 MB, true, 237, 30759
192.168.0.250:2379, 3d56d08a94c87332, 3.0.17, 2.0 MB, false, 237, 30759
true表示市etcd集群的master/usr/local/bin # ./etcdctl endpoint health --endpoints=192.168.0.249:2379,192.168.0.250:2379
192.168.0.249:2379 is healthy: successfully committed proposal: took = 27.179426ms
192.168.0.250:2379 is healthy: successfully committed proposal: took = 94.162395ms
两个节点都很健康。如果出现一个健康、另一个不健康的话。查看下etcd的日志中是否出现了“the clock difference against peer ebd7965c7ef3629a is too high”,出现的话请用前面介绍的ntpdate 方法同步服务器时间。 - 启动master1上的api-server
将master上的kube-apiserver.yaml复制到master1上去,并修改advertise-address和etc-servers
将各自节点上的kubelet连接自己上的api-server -
创建master1/master2上的apiserver
[root@master1 ~]# scp root@192.168.0.250:/etc/kubernetes/manifests/kube-apiserver.yaml ./
[root@master1 ~]# vim kube-apiserver.yaml
- --advertise-address=192.168.0.250-
--etcd-servers=http://192.168.0.249:2379
修改为- --advertise-address=192.168.0.249(这里只能填写具体IP)
- --etcd-servers=http://192.168.0.249:2379,http://192.168.0.250:2379,http://192.168.0.251:2379
[root@master1 ~]# systemctl daemon-reload
[root@master1 ~]# systemctl restart kubelet -
-
master连接自己的node上的apiserver
[root@master ~]# vim /etc/kubernetes/manifests/kube-apiserver.yaml
- --etcd-servers=http://192.168.0.249:2379改为
- --etcd-servers=http://192.168.0.249:2379,http://192.168.0.250:2379,http://192.168.0.251:2379
Kube-apiserve会自动重启
[root@master ~]# systemctl daemon-reload
[root@master ~]# systemctl restart kubelet -
将master1/master2上的kubelet连接自己node上的apiserver
[root@master1 ~]# vim /etc/kubernetes/kubelet.conf
(只能用IP)server: https://192.168.0.250:6443
修改为
server: https://192.168.0.249:6443[root@master1 ~]# systemctl status kubelet -l
……
Jun 23 14:51:42 master1.k8s kubelet[25786]: E0623 14:51:42.080539 25786 reflector.go:190] k8s.io/kubernetes/pkg/kubelet/kubelet.go:390: Failed to list *v1.Node: Get https://192.168.0.249:6443/api/v1/nodes?fieldSelector=metadata.name%3Dmaster1.k8s&resourceVersion=0: x509: certificate is valid for 10.96.0.1, 192.168.0.250, not 192.168.0.249
……[root@master1 ~]# openssl genrsa -out apiserver-master1.key 2048
Generating RSA private key, 2048 bit long modulus
.....................................................................................................+++
......................+++
e is 65537 (0x10001)[root@master1 ~]# openssl req -new -key apiserver-master1.key -subj "/CN=kube-apiserver," -out apiserver-master1.csr
[root@master1 ~]# vim apiserver-master1.ext
内容如下
subjectAltName = DNS:master1.k8s,DNS:kubernetes,DNS:kubernetes.default,DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, IP:10.96.0.1, IP:192.168.0.249[root@master1 ~]# openssl x509 -req -in apiserver-master1.csr -CA /etc/kubernetes/pki/ca.crt -CAkey /etc/kubernetes/pki/ca.key -CAcreateserial -out apiserver-master1.crt -days 365 -extfile apiserver-master1.ext
Signature ok
subject=/CN=kube-apiserver,
Getting CA Private Key[root@master1 ~]# openssl x509 -noout -text -in apiserver-master1.crt
Certificate:
Data:
Version: 3 (0x2)
Serial Number: 14573869911020153756 (0xca40c977e91c2b9c)
Signature Algorithm: sha1WithRSAEncryption
Issuer: CN=kubernetes
Validity
Not Before: Jun 23 07:16:06 2017 GMT
Not After : Jun 23 07:16:06 2018 GMT
Subject: CN=kube-apiserver,
Subject Public Key Info:
Public Key Algorithm: rsaEncryption
Public-Key: (2048 bit)
Modulus:
00:e6:60:eb:30:08:5f:75:e6:92:7c:54:9d:78:83:
ae:9d:b4:7b:85:1a:78:ee:9c:cf:19:f3:3e:1c:60:
3f:a7:f0:9a:83:a9:a1:35:9e:3e:86:10:25:61:7b:
2b:81:bb:13:32:b4:67:36:e1:95:10:b5:06:a5:c4:
8a:a2:f5:04:30:e1:56:be:e0:db:2e:30:f3:ed:78:
74:0b:17:6b:c3:61:c2:25:4b:1a:bd:b3:03:48:d5:
af:b8:f1:0e:64:11:ab:7a:7f:d0:3c:01:a0:f0:d3:
d5:2f:e3:7c:71:be:9a:a6:4d:44:a2:2e:4a:3a:ab:
1a:89:ad:6b:96:66:9f:94:dd:53:2c:f7:14:3e:2f:
05:8b:ef:e8:98:43:89:89:30:89:56:8e:e7:b0:a8:
3c:4c:d4:fa:57:29:3f:43:1d:e9:81:30:35:19:94:
57:bb:46:7d:32:79:ff:45:d4:3b:77:a1:54:14:87:
35:48:a3:e8:aa:6c:db:20:87:f5:b4:6c:bd:b1:ed:
2b:36:29:16:80:d1:d6:a7:a9:12:9f:73:6d:ab:fc:
8d:64:11:67:b3:a0:fb:63:d8:d0:64:f1:36:8f:1d:
7e:29:5b:c1:1b:67:17:75:b6:1f:b1:a3:0b:5b:e2:
2e:5a:a3:e8:50:ef:26:c5:0c:c2:69:d1:1a:b8:19:
be:73
Exponent: 65537 (0x10001)
X509v3 extensions:
X509v3 Subject Alternative Name:
DNS:master1.k8s, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, IP Address:10.96.0.1, IP Address:192.168.0.249
Signature Algorithm: sha1WithRSAEncryption
71:ef:2e:06:01:77:c5:90:8c:89:90:4d:ce:89:bf:9e:5c:e7:
cc:2b:74:01:89:44:92:a0:4d:c9:b4:90:a2:67:af:b7:02:63:
f1:b5:c6:6b:b2:ad:f0:84:79:50:bf:a3:70:5d:32:ac:98:3b::
ca:c6:1f:fe:2e:9d:10:63:19:84:b9:b7:e6:43:00:90:a6:95:
e8:c4:7c:86:1a:08:db:d0:be:99:d7:13:6c:8b:74:ea:1e:4b:
7f:ba:65:50:c0:1b:0a:6b:8f:2a:34:5a:2c:d0:71:98:7b:67:
af:e4:63:33:8b:af:15:5b:f0:04:50:83:f2:d1:21:71:b1:b4:
35:f8:68:55:dd:f7:c8:fc:aa:90:05:b8:2c:14:c2:eb:1d:d7:
09:1a:bc:0e:d5:03:31:0f:98:c1:4f:97:bd:f4:c2:58:21:77:
d4:40:14:5c:28:21:e4:ee:cb:76:09:9d:15:bb:7e:63:84:11:
6e:db:5c:49:d2:82:0f:7b:d4:8b:fa:f4:51:d2:8a:84:7f:34:
04:d5:9f:f6:f5:39:fa:97:bc:b6:0c:9a:67:b0:1c:c1:17:3b:
1a:8e:cd:b0:91:e9:11:3a:fb:75:01:97:97:fe:d3:33:e0:a0:
4e:87:0e:66:59:d4:b2:02:5f:a8:b8:8d:b6:da:56:4e:c7:1e:
91:d6:07:de[root@master1 ~]# cp apiserver-master1.key apiserver-master1.crt /etc/kubernetes/pki/
[root@master1 ~]# vim /etc/kubernetes/manifests/kube-apiserver.yaml
- --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
- --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
修改为
- --tls-cert-file=/etc/kubernetes/pki/apiserver-master1.crt
- --tls-private-key-file=/etc/kubernetes/pki/apiserver-master1.key如果还出现x509的话重启master1机器
在master1上使用kubectl命令来验证apiserver的可用性
[root@master1 ~]# scp root@192.168.0.250:/etc/kubernetes/admin.conf ./
[root@master1 ~]# vim admin.confserver: https://192.168.0.250:6443
修改为
server: https://192.168.0.249:6443[root@master1 ~]# sudo cp /etc/kubernetes/admin.conf $HOME/
[root@master1 ~]# sudo chown $(id -u):$(id -g) $HOME/admin.conf
[root@master1 ~]# export KUBECONFIG=$HOME/admin.conf
[root@master1 ~]# kubectl get nodesNAME STATUS AGE VERSION
master.k8s Ready 20h v1.6.4
master1.k8s Ready 20h v1.6.4 -
启动master1上的kube-controller-manager和kube-scheduler
[root@master1 ~]# scp root@192.168.0.250:/etc/kubernetes/manifests/kube-controller-manager.yaml /etc/kubernetes/manifests/
这里不要修改controller-manager.conf,虽然里面是server: https://192.168.0.250:6443 只有master和master1指向同一个APISERVER,才能进行选举[root@master1 ~]# scp root@192.168.0.250:/etc/kubernetes/manifests/kube-scheduler.yaml /etc/kubernetes/manifests/
[root@master1 ~]# scp root@192.168.0.250:/etc/kubernetes/scheduler.conf /etc/kubernetes/这里不要修改scheduler.conf,虽然里面是server: https://192.168.0.250:6443, 只有master和master1指向同一个APISERVER,才能进行选举
将kube-controller-manager和kube-scheduler配置文件里面的server全部改为 IP:6443
然后重启电脑[root@master1 ~]# systemctl daemon-reload
[root@master1 ~]# systemctl restart kubelet[root@master ~]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
etcd-master.k8s 1/1 Running 2 21h
etcd-master1.k8s 1/1 Running 5 1h
kube-apiserver-master.k8s 1/1 Running 0 2h
kube-apiserver-master1.k8s 1/1 Running 12 1h
kube-controller-manager-master.k8s 1/1 Running 7 21h
kube-controller-manager-master1.k8s 1/1 Running 8 39m
kube-dns-3913472980-qhbjn 3/3 Running 0 21h
kube-flannel-ds-b3mvc 2/2 Running 0 21h
kube-flannel-ds-kdzpv 2/2 Running 2 21h
kube-proxy-6zj1c 1/1 Running 0 21h
kube-proxy-lrxbn 1/1 Running 1 21h
kube-scheduler-master.k8s 1/1 Running 7 21h
kube-scheduler-master1.k8s 1/1 Running 1 53s
kubernetes-dashboard-2066150588-rwcbv 1/1 Running 0 2h查看 kube-controller-manager-master.k8s、kube-scheduler-master.k8s 是否被选举为leader
[root@master ~]# kubectl logs kube-controller-manager-master.k8s -n kube-system | grep leader
……
"kube-controller-manager": the object has been modified; please apply your changes to the latest version and try again
I0624 09:19:06.113689 1 leaderelection.go:189] successfully acquired lease kube-system/kube-controller-manager
I0624 09:19:06.113843 1 event.go:217] Event(v1.ObjectReference{Kind:"Endpoints", Namespace:"kube-system", Name:"kube-controller-manager", UID:"d33b5fa3-58ba-11e7-90ea-f48e387ca8b9", APIVersion:"v1", ResourceVersion:"219012", FieldPath:""}): type: 'Normal' reason: 'LeaderElection' master.k8s became leader
……[root@master ~]# kubectl logs kube-scheduler-master.k8s -n kube-system | grep leader
……
I0624 09:19:03.975391 1 leaderelection.go:189] successfully acquired lease kube-system/kube-scheduler
I0624 09:19:03.975982 1 event.go:217] Event(v1.ObjectReference{Kind:"Endpoints", Namespace:"kube-system", Name:"kube-scheduler", UID:"d348bfa4-58ba-11e7-90ea-f48e387ca8b9", APIVersion:"v1", ResourceVersion:"218995", FieldPath:""}): type: 'Normal' reason: 'LeaderElection' master.k8s became leader
……如果没有出现选举成功的日志,重启master1试试
- 为APISERVER搭建Load Balance
在自己机器上装一个nginx,不要搞一个nginx deployment。 -
在除了3个maser的node节点上,修改apiservice地址
vim /etc/kubernetes/kubelet.conf 修改apiservce地址为kubernetes.default.svc,这里不能直接填写Load Balance的地址,因为连接apiserver会认证不通过,之前为每台master生成秘钥的时候加入过kubernetes.default.svc地址,所以这个可以
因为本身没有apiserver,所以要指向Load Balance,同时修改/etc/hosts指向Load Balance地址192.168.0.120 kubernetes.default.svc
-
kube-proxy配置修改(不做这一步会导致对应NODE上的pod无法解析DNS)只需要改一个地方,因为都是一样的
1:查看
root@shaolin:~# kubectl get configmap -n kube-system
NAME DATA AGE
kube-proxy 1 5d2:保存kube-proxy的yaml文件到本地
kubectl get configmap/kube-proxy -n kube-system -o yaml > kube-proxy-configmap.yaml3:修改并保存配置
Vim kube-proxy-configmap.yamlapiVersion: v1
data:
kubeconfig.conf: |apiVersion: v1 kind: Config clusters: - cluster: certificate-authority: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt server: https://kubernetes.default.svc:6443 name: default
root@shaolin:~# kubectl apply -f kube-proxy-configmap.yaml
Warning: kubectl apply should be used on resource created by either kubectl create --save-config or kubectl apply
configmap "kube-proxy" configured4:查看更新后的配置
root@shaolin:~# kubectl get configmap/kube-proxy -n kube-system -o yaml
apiVersion: v1
data:
kubeconfig.conf: |apiVersion: v1 kind: Config clusters: - cluster: certificate-authority: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt server: https://kubernetes.default.svc:6443 name: default
5:加入kubernetes.default.svc到 hosts文件
6:删除pod以重建
Kubectl Delete pod kube-proxy.. - Kube-dns实现HA
我们先手动扩容DNS,验证试试
kubectl --namespace=kube-system scale deployment kube-dns --replicas=3如果不行,考虑下下面这篇文章
https://kubernetes.io/docs/tasks/administer-cluster/dns-horizontal-autoscaling/ -
非master节点 更改/etc/kubernetes/kubelet.conf 为LB的IP和端口
server: https://kubernetes.default.svc:844316:副本Master节点设置环境变量
export KUBECONFIG=/etc/kubernetes/admin.conf并且将export KUBECONFIG=/etc/kubernetes/admin.conf
设置开机启动(vim ~/.bashrc )17:查看集群状态
查看集群信息
kubectl cluster-info
查看各组件信息
kubectl get componentstatuses
HA测试
在每个master 上删除pod,在相应node上看容器是否被删除 在每个master上修改pod的副本个数,在相应node上看是否有容器个数 依次关闭各个master 查看,etcd,scheduler,controller-manager这个三个组件的leader情况 node关机,pod是否被在别的node上重新创建 现象:get node 可以很快看到对应node 状态变成no ready,但是pod状态还是一直是running,大约持续5分钟后关机的node上的pod状态变成 unknown,同时在其他node重建。 重新开机后,刚才的不会恢复新的pod到原来的机器上 逐个关闭maset1,然后逐个起来,看集,群是否能正常工作。 现象:关闭master之后,master上面的pod的status也变成unknow,在master1上删除和创建pod都仍然有效。但是发现存活的etcd存在raft status不一致的情况,不知道正不正常。 健康状态都是正常 Node1的controller-manage和scheduler成为集群的leader 重启master之后,master恢复功能 但是关闭master和master1之后,集群出现问题
参考
https://kubernetes.io/docs/admin/high-availability/
https://kubernetes.io/docs/tasks/administer-cluster/configure-upgrade-etcd/
http://tonybai.com/2017/05/15/setup-a-ha-kubernetes-cluster-based-on-kubeadm-part1/
http://tonybai.com/2017/05/15/setup-a-ha-kubernetes-cluster-based-on-kubeadm-part2/