文章目录
说明
算是记录一个坑吧,tm的,折磨我好久了。各种查系统日至,百度,最后是找以前的环境对比出来的,日至报错奇奇怪怪,离谱的。
对了,如果k8s有问题,我们可以先看k8s日志,方式如下 先用kubectl get pods --all-namespaces -owide
查看问题pod,然后用kubectl describe pod pod_name -n kube-system
来查看日志,一般情况下,我们都是可以通过这个方式来获取到报错原因的【一波情况下哈,并非绝对】
[root@master1-163 ~]# kubectl get pods --all-namespaces -owide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
kube-system calico-kube-controllers-76d885cfc8-qct94 0/1 ContainerCreating 0 3s <none> worker-165 <none> <none>
kube-system calico-node-7kz7f 0/1 Init:RunContainerError 0 3s 192.168.59.165 worker-165 <none> <none>
kube-system calico-node-gj8xr 0/1 Init:RunContainerError 0 3s 192.168.59.162 master2-162 <none> <none>
kube-system calico-node-kqftj 0/1 Init:RunContainerError 0 3s 192.168.59.163 master1-163 <none> <none>
kube-system coredns-545d6fc579-6l9xs 1/1 Running 0 5h46m 10.244.139.66 worker-165 <none> <none>
kube-system coredns-545d6fc579-mrm2w 1/1 Running 0 5h46m 10.244.139.65 worker-165 <none> <none>
kube-system kube-apiserver-master1-163 1/1 Running 1 5h47m 192.168.59.163 master1-163 <none> <none>
kube-system kube-apiserver-master2-162 1/1 Running 1 4h47m 192.168.59.162 master2-162 <none> <none>
kube-system kube-controller-manager-master1-163 1/1 Running 1 5h47m 192.168.59.163 master1-163 <none> <none>
kube-system kube-controller-manager-master2-162 1/1 Running 1 4h47m 192.168.59.162 master2-162 <none> <none>
kube-system kube-proxy-kp8p6 1/1 Running 1 4h47m 192.168.59.162 master2-162 <none> <none>
kube-system kube-proxy-kqg72 1/1 Running 1 5h46m 192.168.59.163 master1-163 <none> <none>
kube-system kube-proxy-nftgv 1/1 Running 0 56m 192.168.59.165 worker-165 <none> <none>
kube-system kube-scheduler-master1-163 1/1 Running 1 5h47m 192.168.59.163 master1-163 <none> <none>
kube-system kube-scheduler-master2-162 1/1 Running 1 4h47m 192.168.59.162 master2-162 <none> <none>
[root@master1-163 ~]#
[root@master1-163 ~]# kubectl describe pod calico-node-gj8xr -n kube-system
Name: calico-node-gj8xr
Namespace: kube-system
Priority: 2000001000
Priority Class Name: system-node-critical
Node: master2-162/192.168.59.162
Start Time: Mon, 29 Nov 2021 17:53:22 +0800
Labels: controller-revision-hash=c4646b5c6
k8s-app=calico-node
pod-template-generation=1
Annotations: scheduler.alpha.kubernetes.io/critical-pod:
Status: Pending
IP: 192.168.59.162
IPs:
IP: 192.168.59.162
Controlled By: DaemonSet/calico-node
Init Containers:
upgrade-ipam:
Container ID: docker://298fecda934316eb4fa7e0e0e93beb1bd818c357343afa3de198468ec84cfe33
Image: calico/cni:v3.19.1
Image ID: docker://sha256:5749e8b276f9b5aea2683d8a0d59afd05a2be478f5092fbaadc4715f4db76e5a
Port: <none>
Host Port: <none>
Command:
/opt/cni/bin/calico-ipam
-upgrade
State: Terminated
Reason: Completed
Exit Code: 0
Started: Mon, 29 Nov 2021 17:53:23 +0800
Finished: Mon, 29 Nov 2021 17:53:23 +0800
Ready: True
Restart Count: 0
Environment:
KUBERNETES_NODE_NAME: (v1:spec.nodeName)
CALICO_NETWORKING_BACKEND: <set to the key 'calico_backend' of config map 'calico-config'> Optional: false
Mounts:
/host/opt/cni/bin from cni-bin-dir (rw)
/var/lib/cni/networks from host-local-net-dir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-xz2nm (ro)
install-cni:
Container ID: docker://1389feae63f96f81eba7175cd71597e7456a7180e9b71f99265083822fad1303
Image: calico/cni:v3.19.1
Image ID: docker://sha256:5749e8b276f9b5aea2683d8a0d59afd05a2be478f5092fbaadc4715f4db76e5a
Port: <none>
Host Port: <none>
Command:
/install-cni.sh
State: Waiting
Reason: CrashLoopBackOff
Last State: Terminated
Reason: ContainerCannotRun
Message: OCI runtime create failed: container_linux.go:380: starting container process caused: exec: "/install-cni.sh": stat /install-cni.sh: no such file or directory: unknown
Exit Code: 127
Started: Mon, 29 Nov 2021 17:54:07 +0800
Finished: Mon, 29 Nov 2021 17:54:07 +0800
Ready: False
Restart Count: 3
Environment:
CNI_CONF_NAME: 10-calico.conflist
CNI_NETWORK_CONFIG: <set to the key 'cni_network_config' of config map 'calico-config'> Optional: false
KUBERNETES_NODE_NAME: (v1:spec.nodeName)
CNI_MTU: <set to the key 'veth_mtu' of config map 'calico-config'> Optional: false
SLEEP: false
Mounts:
/host/etc/cni/net.d from cni-net-dir (rw)
/host/opt/cni/bin from cni-bin-dir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-xz2nm (ro)
flexvol-driver:
Container ID:
Image: calico/pod2daemon-flexvol:v3.19.1
Image ID:
Port: <none>
Host Port: <none>
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Environment: <none>
Mounts:
/host/driver from flexvol-driver-host (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-xz2nm (ro)
Containers:
calico-node:
Container ID:
Image: calico/node:v3.19.1
Image ID:
Port: <none>
Host Port: <none>
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Requests:
cpu: 250m
Liveness: exec [/bin/calico-node -felix-live -bird-live] delay=10s timeout=1s period=10s #success=1 #failure=6
Readiness: exec [/bin/calico-node -felix-ready -bird-ready] delay=0s timeout=1s period=10s #success=1 #failure=3
Environment:
DATASTORE_TYPE: kubernetes
WAIT_FOR_DATASTORE: true
NODENAME: (v1:spec.nodeName)
CALICO_NETWORKING_BACKEND: <set to the key 'calico_backend' of config map 'calico-config'> Optional: false
CLUSTER_TYPE: k8s,bgp
IP: autodetect
CALICO_IPV4POOL_IPIP: Always
CALICO_IPV4POOL_VXLAN: Never
FELIX_IPINIPMTU: <set to the key 'veth_mtu' of config map 'calico-config'> Optional: false
FELIX_VXLANMTU: <set to the key 'veth_mtu' of config map 'calico-config'> Optional: false
CALICO_IPV4POOL_CIDR: 10.244.0.0/16
CALICO_DISABLE_FILE_LOGGING: true
FELIX_DEFAULTENDPOINTTOHOSTACTION: ACCEPT
FELIX_IPV6SUPPORT: false
FELIX_LOGSEVERITYSCREEN: info
FELIX_HEALTHENABLED: true
Mounts:
/lib/modules from lib-modules (ro)
/run/xtables.lock from xtables-lock (rw)
/var/lib/calico from var-lib-calico (rw)
/var/run/calico from var-run-calico (rw)
/var/run/nodeagent from policysync (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-xz2nm (ro)
Conditions:
Type Status
Initialized False
Ready False
ContainersReady False
PodScheduled True
Volumes:
lib-modules:
Type: HostPath (bare host directory volume)
Path: /lib/modules
HostPathType:
var-run-calico:
Type: HostPath (bare host directory volume)
Path: /var/run/calico
HostPathType:
var-lib-calico:
Type: HostPath (bare host directory volume)
Path: /var/lib/calico
HostPathType:
xtables-lock:
Type: HostPath (bare host directory volume)
Path: /run/xtables.lock
HostPathType: FileOrCreate
cni-bin-dir:
Type: HostPath (bare host directory volume)
Path: /opt/cni/bin
HostPathType:
cni-net-dir:
Type: HostPath (bare host directory volume)
Path: /etc/cni/net.d
HostPathType:
host-local-net-dir:
Type: HostPath (bare host directory volume)
Path: /var/lib/cni/networks
HostPathType:
policysync:
Type: HostPath (bare host directory volume)
Path: /var/run/nodeagent
HostPathType: DirectoryOrCreate
flexvol-driver-host:
Type: HostPath (bare host directory volume)
Path: /usr/libexec/kubernetes/kubelet-plugins/volume/exec/nodeagent~uds
HostPathType: DirectoryOrCreate
kube-api-access-xz2nm:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: Burstable
Node-Selectors: kubernetes.io/os=linux
Tolerations: :NoSchedule op=Exists
:NoExecute op=Exists
CriticalAddonsOnly op=Exists
node.kubernetes.io/disk-pressure:NoSchedule op=Exists
node.kubernetes.io/memory-pressure:NoSchedule op=Exists
node.kubernetes.io/network-unavailable:NoSchedule op=Exists
node.kubernetes.io/not-ready:NoExecute op=Exists
node.kubernetes.io/pid-pressure:NoSchedule op=Exists
node.kubernetes.io/unreachable:NoExecute op=Exists
node.kubernetes.io/unschedulable:NoSchedule op=Exists
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 74s default-scheduler Successfully assigned kube-system/calico-node-gj8xr to master2-162
Normal Pulled 74s kubelet Container image "calico/cni:v3.19.1" already present on machine
Normal Created 74s kubelet Created container upgrade-ipam
Normal Started 74s kubelet Started container upgrade-ipam
Normal Pulled 30s (x4 over 74s) kubelet Container image "calico/cni:v3.19.1" already present on machine
Normal Created 30s (x4 over 74s) kubelet Created container install-cni
Warning Failed 30s (x4 over 73s) kubelet Error: failed to start container "install-cni": Error response from daemon: OCI runtime create failed: container_linux.go:380: starting container process caused: exec: "/install-cni.sh": stat /install-cni.sh: no such file or directory: unknown
Warning BackOff 5s (x6 over 57s) kubelet Back-off restarting failed container
[root@master1-163 ~]#
coredns报错状态和原因说明
coredns状态为pending如下 原因:是因为没有安装网络插件,安装calico等网络插件以后该状态就会为running了 这个比较简单就多说了
[root@master1-163 ~]# kubectl get pods --all-namespaces -owide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
kube-system coredns-545d6fc579-6l9xs 0/1 pending 0 5h21m 10.244.139.66 worker-165 <none> <none>
kube-system coredns-545d6fc579-mrm2w 0/1 pending 0 5h21m 10.244.139.65 worker-165 <none> <none>
kube-system kube-apiserver-master1-163 1/1 Running 1 5h21m 192.168.59.163 master1-163 <none> <none>
kube-system kube-apiserver-master2-162 1/1 Running 1 4h21m 192.168.59.162 master2-162 <none> <none>
...
calico部署报错原因及处理
报错部署说明
看最后,部署有问题的话,app界面显示最后是这个内容
[root@master1-163 ~]# kubectl apply -f calico.yaml
#删除部分内容
daemonset.apps/calico-node created
serviceaccount/calico-node created
deployment.apps/calico-kube-controllers created
serviceaccount/calico-kube-controllers created
[root@master1-163 ~]#
[root@master ~]#
daemonset.apps/calico-node created
serviceaccount/calico-node created
deployment.apps/calico-kube-controllers created
serviceaccount/calico-kube-controllers created
Warning: policy/v1beta1 PodDisruptionBudget is deprecated in v1.21+, unavailable in v1.25+; use policy/v1 PodDisruptionBudget
poddisruptionbudget.policy/calico-kube-controllers created
[root@master ~]#
报错Init:0/3说明及处理
[root@master1-163 ~]# kubectl apply -f calico.yaml
[root@master1-163 ~]# kubectl get pods --all-namespaces -owide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
kube-system calico-kube-controllers-7676785684-67bbt 0/1 ContainerCreating 0 5s <none> worker-165 <none> <none>
kube-system calico-node-jbhgs 0/1 Init:0/3 0 5s 192.168.59.162 master2-162 <none> <none>
kube-system calico-node-jz8fh 0/1 Init:0/3 0 5s 192.168.59.163 master1-163 <none> <none>
kube-system calico-node-qtnwz 0/1 Init:0/3 0 5s 192.168.59.165 worker-165 <none> <none>
kube-system coredns-545d6fc579-6l9xs 1/1 Running 0 5h24m 10.244.139.66 worker-165 <none> <none>
kube-system coredns-545d6fc579-mrm2w 1/1 Running 0 5h24m 10.244.139.65 worker-165 <none> <none>
kube-system kube-apiserver-master1-163 1/1 Running 1 5h24m 192.168.59.163 master1-163 <none> <none>
kube-system kube-apiserver-master2-162 1/1 Running 1 4h24m 192.168.59.162 master2-162 <none> <none>
kube-system kube-controller-manager-master1-163 1/1 Running 1 5h24m 192.168.59.163 master1-163 <none> <none>
kube-system kube-controller-manager-master2-162 1/1 Running 1 4h24m 192.168.59.162 master2-162 <none> <none>
kube-system kube-proxy-kp8p6 1/1 Running 1 4h24m 192.168.59.162 master2-162 <none> <none>
kube-system kube-proxy-kqg72 1/1 Running 1 5h24m 192.168.59.163 master1-163 <none> <none>
kube-system kube-proxy-nftgv 1/1 Running 0 33m 192.168.59.165 worker-165 <none> <none>
kube-system kube-scheduler-master1-163 1/1 Running 1 5h24m 192.168.59.163 master1-163 <none> <none>
kube-system kube-scheduler-master2-162 1/1 Running 1 4h24m 192.168.59.162 master2-162 <none> <none>
[root@master1-163 ~]#
[root@master1-163 ~]#
原因: 是因为calico文件中镜像版本不一致,如下,calico文件中是v3.14.2,而我镜像是v3.19.1
[root@master1-163 ~]# cat calico.yaml| grep image
image: calico/cni:v3.14.2
image: calico/cni:v3.14.2
image: calico/pod2daemon-flexvol:v3.14.2
image: calico/node:v3.14.2
image: calico/kube-controllers:v3.14.2
[root@master1-163 ~]#
[root@master1-163 ~]# docker images | grep ca
calico/node v3.19.1 c4d75af7e098 6 months ago 168MB
calico/pod2daemon-flexvol v3.19.1 5660150975fb 6 months ago 21.7MB
calico/cni v3.19.1 5749e8b276f9 6 months ago 146MB
calico/kube-controllers v3.19.1 5d3d5ddc8605 6 months ago 60.6MB
registry.aliyuncs.com/google_containers/kube-apiserver v1.21.1 771ffcf9ca63 6 months ago 126MB
registry.aliyuncs.com/google_containers/kube-scheduler v1.21.0 62ad3129eca8 7 months ago 50.6MB
[root@master1-163 ~]#
解决方法 编辑calico.yaml
文件,把版本号改成和镜像一致即可
报错Init:RunContainerError
[root@master1-163 ~]# kubectl apply -f calico.yaml
[root@master1-163 ~]# kubectl get pods --all-namespaces -owide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
kube-system calico-kube-controllers-76d885cfc8-l7nwn 0/1 ContainerCreating 0 2s <none> worker-165 <none> <none>
kube-system calico-node-6xndv 0/1 Init:RunContainerError 0 2s 192.168.59.165 worker-165 <none> <none>
kube-system calico-node-htjqz 0/1 Init:RunContainerError 0 2s 192.168.59.163 master1-163 <none> <none>
kube-system calico-node-r5f7z 0/1 Init:RunContainerError 0 2s 192.168.59.162 master2-162 <none> <none>
kube-system coredns-545d6fc579-6l9xs 1/1 Running 0 5h37m 10.244.139.66 worker-165 <none> <none>
kube-system coredns-545d6fc579-mrm2w 1/1 Running 0 5h37m 10.244.139.65 worker-165 <none> <none>
kube-system kube-apiserver-master1-163 1/1 Running 1 5h38m 192.168.59.163 master1-163 <none> <none>
kube-system kube-apiserver-master2-162 1/1 Running 1 4h38m 192.168.59.162 master2-162 <none> <none>
kube-system kube-controller-manager-master1-163 1/1 Running 1 5h38m 192.168.59.163 master1-163 <none> <none>
kube-system kube-controller-manager-master2-162 1/1 Running 1 4h38m 192.168.59.162 master2-162 <none> <none>
kube-system kube-proxy-kp8p6 1/1 Running 1 4h38m 192.168.59.162 master2-162 <none> <none>
kube-system kube-proxy-kqg72 1/1 Running 1 5h37m 192.168.59.163 master1-163 <none> <none>
kube-system kube-proxy-nftgv 1/1 Running 0 47m 192.168.59.165 worker-165 <none> <none>
kube-system kube-scheduler-master1-163 1/1 Running 1 5h38m 192.168.59.163 master1-163 <none> <none>
kube-system kube-scheduler-master2-162 1/1 Running 1 4h38m 192.168.59.162 master2-162 <none> <none>
[root@master1-163 ~]#
原因 是因为我在calico.yaml
文件中加入了imagePullPolicy: IfNotPresent
[root@master1-163 ~]# cat calico.yaml | grep image
image: calico/cni:v3.19.1
imagePullPolicy: IfNotPresent
image: calico/cni:v3.19.1
imagePullPolicy: IfNotPresent
image: calico/pod2daemon-flexvol:v3.19.1
imagePullPolicy: IfNotPresent
image: calico/node:v3.19.1
imagePullPolicy: IfNotPresent
image: calico/kube-controllers:v3.19.1
imagePullPolicy: IfNotPresent
[root@master1-163 ~]#
解决方法 把imagePullPolicy: IfNotPresent
删掉,因为这个部署如果本地有镜像以后,即使是离线环境也默认使用本地镜像的,不需要指定imagePullPolicy: IfNotPresent
,指定后就报错了。 如下,是正常的最终版本calico.yaml
文件
[root@master1-163 ~]# cat calico.yaml| grep image
image: docker.io/calico/cni:v3.19.1
image: docker.io/calico/cni:v3.19.1
image: docker.io/calico/pod2daemon-flexvol:v3.19.1
image: docker.io/calico/node:v3.19.1
image: docker.io/calico/kube-controllers:v3.19.1
[root@master1-163 ~]#
[root@master1-163 ~]# docker images | grep ca
calico/node v3.19.1 c4d75af7e098 6 months ago 168MB
calico/pod2daemon-flexvol v3.19.1 5660150975fb 6 months ago 21.7MB
calico/cni v3.19.1 5749e8b276f9 6 months ago 146MB
calico/kube-controllers v3.19.1 5d3d5ddc8605 6 months ago 60.6MB
registry.aliyuncs.com/google_containers/kube-apiserver v1.21.1 771ffcf9ca63 6 months ago 126MB
registry.aliyuncs.com/google_containers/kube-scheduler v1.21.0 62ad3129eca8 7 months ago 50.6MB
[root@master1-163 ~]#
部署正常显示内容,coredns状态正常了
[root@master1-163 ~]# kubectl apply -f calico.yaml
configmap/calico-config configured
customresourcedefinition.apiextensions.k8s.io/bgpconfigurations.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/bgppeers.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/blockaffinities.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/clusterinformations.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/felixconfigurations.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/globalnetworkpolicies.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/globalnetworksets.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/hostendpoints.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/ipamblocks.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/ipamconfigs.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/ipamhandles.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/ippools.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/kubecontrollersconfigurations.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/networkpolicies.crd.projectcalico.org configured
customresourcedefinition.apiextensions.k8s.io/networksets.crd.projectcalico.org configured
clusterrole.rbac.authorization.k8s.io/calico-kube-controllers configured
clusterrolebinding.rbac.authorization.k8s.io/calico-kube-controllers unchanged
clusterrole.rbac.authorization.k8s.io/calico-node unchanged
clusterrolebinding.rbac.authorization.k8s.io/calico-node unchanged
daemonset.apps/calico-node configured
serviceaccount/calico-node unchanged
deployment.apps/calico-kube-controllers configured
serviceaccount/calico-kube-controllers unchanged
Warning: policy/v1beta1 PodDisruptionBudget is deprecated in v1.21+, unavailable in v1.25+; use policy/v1 PodDisruptionBudget
poddisruptionbudget.policy/calico-kube-controllers created
[root@master1-163 ~]#
[root@master1-163 ~]#
[root@master1-163 ~]# kubectl get pods --all-namespaces -owide
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
kube-system calico-kube-controllers-78d6f96c7b-tqxp4 1/1 Running 0 10s 10.244.139.67 worker-165 <none> <none>
kube-system calico-node-9l4b8 1/1 Running 0 14s 192.168.59.165 worker-165 <none> <none>
kube-system calico-node-hkwz5 1/1 Running 0 14s 192.168.59.163 master1-163 <none> <none>
kube-system calico-node-mgfb5 1/1 Running 0 14s 192.168.59.162 master2-162 <none> <none>
kube-system coredns-545d6fc579-6l9xs 1/1 Running 0 5h43m 10.244.139.66 worker-165 <none> <none>
kube-system coredns-545d6fc579-mrm2w 1/1 Running 0 5h43m 10.244.139.65 worker-165 <none> <none>
kube-system kube-apiserver-master1-163 1/1 Running 1 5h43m 192.168.59.163 master1-163 <none> <none>
kube-system kube-apiserver-master2-162 1/1 Running 1 4h44m 192.168.59.162 master2-162 <none> <none>
kube-system kube-controller-manager-master1-163 1/1 Running 1 5h43m 192.168.59.163 master1-163 <none> <none>
kube-system kube-controller-manager-master2-162 1/1 Running 1 4h44m 192.168.59.162 master2-162 <none> <none>
kube-system kube-proxy-kp8p6 1/1 Running 1 4h44m 192.168.59.162 master2-162 <none> <none>
kube-system kube-proxy-kqg72 1/1 Running 1 5h43m 192.168.59.163 master1-163 <none> <none>
kube-system kube-proxy-nftgv 1/1 Running 0 52m 192.168.59.165 worker-165 <none> <none>
kube-system kube-scheduler-master1-163 1/1 Running 1 5h43m 192.168.59.163 master1-163 <none> <none>
kube-system kube-scheduler-master2-162 1/1 Running 1 4h44m 192.168.59.162 master2-162 <none> <none>
[root@master1-163 ~]#