【随笔】Kubernetes容器管理

搭建集群

k8s硬件要求:测试环境中master(2核 4G内存 20G硬盘)和node(4核 8G内存 40G硬盘);生产环境中master和node均有更高要求
搭建集群方式:kubeadm方式(Kubeadm 是一个 K8s 部署工具,提供 kubeadm init 和 kubeadm join,用于快速部 署 Kubernetes 集群。官方地址,见链接)和二进制包方式(从 github 下载发行版的二进制包,手动部署每个组件,组成 Kubernetes 集群)
k8s平台规划:环境平台规划分为单master集群和多master集群(常用,避免单master故障导致系统崩溃)

master node1 node2 node3 master1 负载均衡 master2 master3 node1 node2 node3

平台搭建1:单master集群的kubeadm方式安装

  • 准备虚拟主机(记得做好快照,以便环境快速恢复)。网络要求:集群机器之间能够互通,且能够上外网(操作系统此例为CentOS7)【随笔】Kubernetes容器管理
  • 三个虚拟机的初始化(记得做好快照,以便环境快速恢复)
    # 1.三台虚拟机均关闭防火墙:
    systemctl stop firewalld #临时
    systemctl disable firewalld #永久
    
    # 2.三台虚拟机均关闭selinux
    setenforce 0 #临时
    sed -i 's/enforcing/disabled/' /etc/selinux/config #永久
    
    # 3.三台虚拟机均关闭swap分区
    swapoff -a #临时
    sed -ri 's/.*swap.*/#&/' /etc/fstab #永久
    
    # 4.三台虚拟机分别设置主机名
    hostnamectl set-hostname k8s-master
    
    # 5.在master添加 hosts
    [root@k8s-master ~]# cat >> /etc/hosts << EOF
    > 172.16.90.146 k8s-master
    > 172.16.90.145 k8s-node1
    > 172.16.90.144 k8s-node2
    > EOF
    
    # 6. 将三台虚拟机桥接的IPv4流量传递到iptables的链,并使之生效
    [root@k8s-master ~]# cat > /etc/sysctl.d/k8s.conf << EOF
    > net.bridge.bridge-nf-call-ip6tables = 1
    > net.bridge.bridge-nf-call-iptables = 1
    > EOF
    [root@k8s-master ~]# sysctl --system
    
    # 7. 三台虚拟机均设置时间同步
    yum install ntpdate -y
    ntpdate time.windows.com
    
  • 所有节点安装 Docker/kubeadm/kubelet
    # 安装wget
    [root@k8s-master ~]# yum install wget
    
    # 下载docker源文件
    [root@k8s-master ~]# wget https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -O /etc/yum.repos.d/docker-ce.repo
    
    # 安装docker
    [root@k8s-master ~]# yum -y install docker-ce-18.06.1.ce-3.el7
    
    # 启动docker
    [root@k8s-master ~]# systemctl enable docker && systemctl start docker
    
    # 检查是否安装成功
    [root@k8s-master ~]# docker --version
    
    # 添加阿里云YUM软件源
    # 设置仓库地址
    [root@k8s-master ~]# cat > /etc/docker/daemon.json << EOF
    > {
    > "registry-mirrors": ["https://b9pmyelo.mirror.aliyuncs.com"]
    > }
    > EOF
    
    # 添加yum源
    [root@k8s-master ~]# cat > /etc/yum.repos.d/kubernetes.repo << EOF
    > [kubernetes]
    > name=Kubernetes
    > baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
    > enabled=1
    > gpgcheck=0
    > repo_gpgcheck=0
    > gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
    > https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
    > EOF
    
    # 安装 kubeadm,kubelet 和 kubectl
    [root@k8s-master ~]# yum install -y kubelet kubeadm kubectl
    
    # 设置开机启动
    [root@k8s-master ~]# systemctl enable kubelet
    
  • 在master节点执行kubeadm init命令进行初始化
    # 在master节点机部署Kubernetes Master,并执行一下目录
    # 由于默认拉取镜像地址 k8s.gcr.io 国内无法访问,这里指定阿里云镜像仓库地址。
    [root@k8s-master ~]# kubeadm init --apiserver-advertise-address=172.16.90.146 --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1.21.0 --service-cidr=10.96.0.0/12 --pod-network-cidr=10.244.0.0/16
    ...
    error execution phase preflight: [preflight] Some fatal errors occurred:
            [ERROR ImagePull]: failed to pull image registry.aliyuncs.com/google_containers/coredns:v1.8.0: output: Error response from daemon: manifest for registry.aliyuncs.com/google_containers/coredns:v1.8.0 not found
    , error: exit status 1
    ...
    
    # 检查发现缺少镜像coredns
    [root@k8s-master ~]# docker images
    REPOSITORY                                                        TAG                 IMAGE ID            CREATED             SIZE
    registry.aliyuncs.com/google_containers/kube-apiserver            v1.21.0             4d217480042e        3 months ago        126MB
    registry.aliyuncs.com/google_containers/kube-proxy                v1.21.0             38ddd85fe90e        3 months ago        122MB
    registry.aliyuncs.com/google_containers/kube-controller-manager   v1.21.0             09708983cc37        3 months ago        120MB
    registry.aliyuncs.com/google_containers/kube-scheduler            v1.21.0             62ad3129eca8        3 months ago        50.6MB
    registry.aliyuncs.com/google_containers/pause                     3.4.1               0f8457a4c2ec        6 months ago        683kB
    registry.aliyuncs.com/google_containers/etcd                      3.4.13-0            0369cf4303ff        11 months ago       253MB
    
    # 解决该问题:Kubernetes 需要的是 registry.aliyuncs.com/google_containers/coredns:v1.8.0 这个镜像,使用 docker tag 命令重命名
    # 拉取镜像
    [root@k8s-master ~]# docker pull registry.aliyuncs.com/google_containers/coredns:1.8.0
    # 重命名
    [root@k8s-master ~]# docker tag registry.aliyuncs.com/google_containers/coredns:1.8.0 registry.aliyuncs.com/google_containers/coredns:v1.8.0
    # 删除原有镜像
    [root@k8s-master ~]# docker rmi registry.aliyuncs.com/google_containers/coredns:1.8.0
    
    # 检查
    [root@k8s-master ~]# docker images
    REPOSITORY                                                        TAG                 IMAGE ID            CREATED             SIZE
    registry.aliyuncs.com/google_containers/kube-apiserver            v1.21.0             4d217480042e        3 months ago        126MB
    registry.aliyuncs.com/google_containers/kube-proxy                v1.21.0             38ddd85fe90e        3 months ago        122MB
    registry.aliyuncs.com/google_containers/kube-controller-manager   v1.21.0             09708983cc37        3 months ago        120MB
    registry.aliyuncs.com/google_containers/kube-scheduler            v1.21.0             62ad3129eca8        3 months ago        50.6MB
    registry.aliyuncs.com/google_containers/pause                     3.4.1               0f8457a4c2ec        6 months ago        683kB
    registry.aliyuncs.com/google_containers/coredns                   v1.8.0              296a6d5035e2        9 months ago        42.5MB
    registry.aliyuncs.com/google_containers/etcd                      3.4.13-0            0369cf4303ff        11 months ago       253MB
    
    # 再次执行kubeadm init
    ...
    Your Kubernetes control-plane has initialized successfully!
    
    To start using your cluster, you need to run the following as a regular user:
    
      mkdir -p $HOME/.kube
      sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
      sudo chown $(id -u):$(id -g) $HOME/.kube/config
    ...
    kubeadm join 172.16.90.146:6443 --token y761gh.vxrkrulwu0tt74sw \
        --discovery-token-ca-cert-hash sha256:b611e2e88052ec60ac4716b0a9a48a9fa45d99a4b457563593dc29805214bbc5 
    # 使用 kubectl 工具:
    [root@k8s-master ~]# mkdir -p $HOME/.kube
    [root@k8s-master ~]# cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
    [root@k8s-master ~]# chown $(id -u):$(id -g) $HOME/.kube/config
    [root@k8s-master ~]# kubectl get nodes
    NAME         STATUS     ROLES                  AGE     VERSION
    k8s-master   NotReady   control-plane,master   3m25s   v1.21.3
    
  • 在node节点上执行kubeadm join命令把node节点添加到当前集群里面
    [root@k8s-node1 ~]# kubeadm join 172.16.90.146:6443 --token y761gh.vxrkrulwu0tt74sw --discovery-token-ca-cert-hash sha256:b611e2e88052ec60ac4716b0a9a48a9fa45d99a4b457563593dc29805214bbc5
    [root@k8s-node2 ~]# kubeadm join 172.16.90.146:6443 --token y761gh.vxrkrulwu0tt74sw --discovery-token-ca-cert-hash sha256:b611e2e88052ec60ac4716b0a9a48a9fa45d99a4b457563593dc29805214bbc5
    
    # 检查是否添加成功
    [root@k8s-master ~]# kubectl get nodes
    NAME         STATUS     ROLES                  AGE    VERSION
    k8s-master   NotReady   control-plane,master   11m    v1.21.3
    k8s-node1    NotReady   <none>                 2m6s   v1.21.3
    k8s-node2    NotReady   <none>                 60s    v1.21.3
    
  • 配置网络插件
    [root@k8s-master ~]# kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
    
    # 查看是否有运行
    [root@k8s-master ~]# kubectl get pods -n kube-system
    NAME                                 READY   STATUS    RESTARTS   AGE
    coredns-59d64cd4d4-dqx8m             1/1     Running   0          31m
    coredns-59d64cd4d4-z8pdq             1/1     Running   0          31m
    etcd-k8s-master                      1/1     Running   0          31m
    kube-apiserver-k8s-master            1/1     Running   0          31m
    kube-controller-manager-k8s-master   1/1     Running   0          31m
    kube-flannel-ds-h7v2g                1/1     Running   0          2m46s
    kube-flannel-ds-xmzfh                1/1     Running   0          2m46s
    kube-flannel-ds-z9nbj                1/1     Running   0          2m46s
    kube-proxy-6c9cd                     1/1     Running   0          20m
    kube-proxy-cnvfg                     1/1     Running   0          31m
    kube-proxy-p4nx4                     1/1     Running   0          22m
    kube-scheduler-k8s-master            1/1     Running   0          31m
    
    # 验证是否启动
    [root@k8s-master ~]# kubectl get nodes
    NAME         STATUS   ROLES                  AGE   VERSION
    k8s-master   Ready    control-plane,master   32m   v1.21.3
    k8s-node1    Ready    <none>                 23m   v1.21.3
    k8s-node2    Ready    <none>                 22m   v1.21.3
    
  • 测试 kubernetes 集群
    # 在 Kubernetes 集群中创建一个 pod,验证是否正常运行
    # 联网下载nginx镜像
    [root@k8s-master ~]# kubectl create deployment nginx --image=nginx
    deployment.apps/nginx created
    # 查看pod状态
    [root@k8s-master ~]# kubectl get pod
    NAME                     READY   STATUS    RESTARTS   AGE
    nginx-6799fc88d8-lj24f   1/1     Running   0          62s
    # 对外暴露80端口
    [root@k8s-master ~]# kubectl expose deployment nginx --port=80 --type=NodePort
    service/nginx exposed
    # 查看对外端口
    [root@k8s-master ~]# kubectl get pod,svc
    NAME                         READY   STATUS    RESTARTS   AGE
    pod/nginx-6799fc88d8-lj24f   1/1     Running   0          3m4s
    
    NAME                 TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)        AGE
    service/kubernetes   ClusterIP   10.96.0.1        <none>        443/TCP        39m
    service/nginx        NodePort    10.105.227.129   <none>        80:30640/TCP   20s
    
    # 访问地址:http://NodeIP:Port(NodeIP为任意节点ip,port为暴露出来的端口号)
    

平台搭建2:单master集群的二进制方式安装

  • 准备虚拟主机(记得做好快照,以便环境快速恢复)。网络要求:集群机器之间能够互通,且能够上外网(操作系统此例为CentOS7)
    角色 IP 组件
    k8s-master 172.16.90.147 kube-apiserver,kube-controller-manager,kube -scheduler,etcd
    k8s-node1 172.16.90.148 kubelet,kube-proxy,docker etcd
    【随笔】Kubernetes容器管理
  • 两个虚拟机的初始化(记得做好快照,以便环境快速恢复)
    # 关闭防火墙
    systemctl stop firewalld
    systemctl disable firewalld
    
    # 关闭 selinux
    sed -i 's/enforcing/disabled/' /etc/selinux/config # 永久
    setenforce 0 # 临时
    
    # 关闭 swap
    swapoff -a # 临时
    sed -ri 's/.*swap.*/#&/' /etc/fstab # 永久
    
    # 根据规划设置主机名
    hostnamectl set-hostname <hostname>
    
    # 在 master 添加 hosts
    cat >> /etc/hosts << EOF
    172.16.90.147 m1
    172.16.90.148 n1
    EOF
    
    # 将桥接的 IPv4 流量传递到 iptables 的链
    cat > /etc/sysctl.d/k8s.conf << EOF
    net.bridge.bridge-nf-call-ip6tables = 1
    net.bridge.bridge-nf-call-iptables = 1
    EOF
    
    # 生效
    sysctl --system 
    
    # 时间同步
    yum install ntpdate -y
    ntpdate time.windows.com
    
  • 为etcd和apiserver自签证书
    # 准备 cfssl 证书生成工具
    # cfssl 是一个开源的证书管理工具,使用 json 文件生成证书,相比 openssl 更方便使用。 找任意一台服务器操作,这里用 Master 节点
    # 下载
    wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 
    wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 
    wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64 
    chmod +x cfssl_linux-amd64 cfssljson_linux-amd64 cfssl-certinfo_linux-amd64 
    mv cfssl_linux-amd64 /usr/local/bin/cfssl 
    mv cfssljson_linux-amd64 /usr/local/bin/cfssljson 
    mv cfssl-certinfo_linux-amd64 /usr/bin/cfssl-certinfo
    
    # 生成 Etcd 证书
    # 自签证书颁发机构(CA)
    # 创建工作目录
    [root@m1 TSL]# mkdir -p ~/TLS/{etcd,k8s}
    [root@m1 TSL]# cd TLS/etcd
    
    # 自签CA
    [root@m1 TSL]# cat > ca-config.json<< EOF
    {
    	"signing": { 
    		"default": { 
    			"expiry": "87600h" 
    		},
    		"profiles": { 
    			"www": { 
    				"expiry": "87600h", 
    				"usages": ["signing", "key encipherment", "server auth", "client auth"] 
    			} 
    		} 
    	}
    }
    EOF
    [root@m1 TSL]# cat > ca-csr.json<< EOF
    { 
    	"CN": "etcd CA", 
    	"key": { 
    		"algo": "rsa", 
    		"size": 2048 
    	},
    	"names": [
    		{ 
    			"C": "CN", 
    			"L": "Beijing", 
    			"ST": "Beijing"
    		}
    	] 
    }
    EOF
    
    # 生成证书
    [root@m1 etcd]# cfssl gencert -initca ca-csr.json | cfssljson -bare ca -
    [root@m1 etcd]# ls *pem
    ca-key.pem  ca.pem
    
    # 使用自签 CA 签发 Etcd HTTPS 证书
    # 创建证书申请文件
    [root@m1 etcd]# cat > server-csr.json<< EOF
    {
    	"CN": "etcd",
    	"hosts": [
    		"172.16.90.147",
    		"172.16.90.148"
    	],
    	"key": {
    		"algo": "rsa",
    		"size": 2048
    	},
    	"names": [
    		{
    			"C": "CN",
    			"L": "BeiJing",
    			"ST": "BeiJing"
    		}
    	]
    }
    EOF
    # 注意:上述文件 hosts 字段中 IP 为所有 etcd 节点的集群内部通信 IP,一个都不能少!为了 方便后期扩容可以多写几个预留的 IP。
    
    # 生成证书
    [root@m1 etcd]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=www server-csr.json | cfssljson -bare server
    
    #查看结果
    [root@m1 etcd]# ls server*pem
    server-key.pem  server.pem
    
  • 部署etcd集群
    # 以下在节点 1 上操作,为简化操作,待会将节点 1 生成的所有文件拷贝到节点 2 和节点 3.
    # 创建工作目录并解压二进制包
    [root@m1 ~]# mkdir -p /opt/etcd/{bin,cfg,ssl}
    [root@m1 ~]# tar zxvf etcd-v3.4.9-linux-amd64.tar.gz
    [root@m1 ~]# mv etcd-v3.4.9-linux-amd64/{etcd,etcdctl} /opt/etcd/bin/
    [root@m1 opt]# tree etcd
    etcd
    ├── bin
    │   ├── etcd
    │   └── etcdctl
    ├── cfg
    └── ssl
    
    # 创建etcd配置文件
    [root@m1 ~]# cat > /opt/etcd/cfg/etcd.conf << EOF
    #[Member]
    #ETCD_NAME:节点名称,集群中唯一
    ETCD_NAME="etcd-1"
    #ETCD_DATA_DIR:数据目录
    ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
    #ETCD_LISTEN_PEER_URLS:集群通信监听地址
    ETCD_LISTEN_PEER_URLS="https://172.16.90.147:2380"
    #ETCD_LISTEN_CLIENT_URLS:客户端访问监听地址
    ETCD_LISTEN_CLIENT_URLS="https://172.16.90.147:2379"
    #[Clustering]
    #ETCD_INITIAL_ADVERTISE_PEER_URLS:集群通告地址
    ETCD_INITIAL_ADVERTISE_PEER_URLS="https://172.16.90.147:2380"
    #ETCD_ADVERTISE_CLIENT_URLS:客户端通告地址
    ETCD_ADVERTISE_CLIENT_URLS="https://172.16.90.147:2379"
    #ETCD_INITIAL_CLUSTER:集群节点地址
    ETCD_INITIAL_CLUSTER="etcd-1=https://172.16.90.147:2380,etcd-2=https://172.16.90.148:2380"
    #ETCD_INITIAL_CLUSTER_TOKEN:集群 Token
    ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
    #ETCD_INITIAL_CLUSTER_STATE:加入集群的当前状态,new 是新集群,existing 表示加入 已有集群
    ETCD_INITIAL_CLUSTER_STATE="new"
    
    #systemd 管理 etcd
    [root@m1 /]# cat > /usr/lib/systemd/system/etcd.service << EOF
    [Unit]
    Description=Etcd Server
    After=network.target
    After=network-online.target
    Wants=network-online.target
    [Service]
    Type=notify
    EnvironmentFile=/opt/etcd/cfg/etcd.conf
    ExecStart=/opt/etcd/bin/etcd \
    --cert-file=/opt/etcd/ssl/server.pem \
    --key-file=/opt/etcd/ssl/server-key.pem \
    --peer-cert-file=/opt/etcd/ssl/server.pem \
    --peer-key-file=/opt/etcd/ssl/server-key.pem \
    --trusted-ca-file=/opt/etcd/ssl/ca.pem \
    --peer-trusted-ca-file=/opt/etcd/ssl/ca.pem \
    --logger=zap
    Restart=on-failure
    LimitNOFILE=65536
    [Install]
    WantedBy=multi-user.target
    EOF
    
    # 笔者再启动etcd服务时报错,经排查因为 \ 引发,改后可行
    [Unit]
    Description=Etcd Server
    After=network.target
    After=network-online.target
    Wants=network-online.target
    [Service]
    Type=notify
    EnvironmentFile=/opt/etcd/cfg/etcd.conf
    ExecStart=/opt/etcd/bin/etcd --cert-file=/opt/etcd/ssl/server.pem --key-file=/opt/etcd/ssl/server-key.pem --peer-cert-file=/opt/etcd/ssl/server.pem --peer-key-file=/opt/etcd/ssl/server-key.pem --trusted-ca-file=/opt/etcd/ssl/ca.pem --peer-trusted-ca-file=/opt/etcd/ssl/ca.pem --logger=zap
    Restart=on-failure
    LimitNOFILE=65536
    [Install]
    WantedBy=multi-user.target
    
    # 拷贝刚才生成的证书
    [root@m1 ssl]# cp ~/TLS/etcd/ca*pem ~/TLS/etcd/server*pem /opt/etcd/ssl/
    [root@m1 ssl]# ls
    ca-key.pem  ca.pem  server-key.pem  server.pem
    
    # 将上面mastert所有生成的文件拷贝到从节点
    [root@m1 system]# scp -r /opt/etcd/ root@172.16.90.148:/opt/
    [root@m1 system]# scp /usr/lib/systemd/system/etcd.service root@172.16.90.148:/usr/lib/systemd/system/
    
    # 到从节点点修改配置文件
    [root@n1 ~]# vim /opt/etcd/cfg/etcd.conf 
    #[Member]
    ETCD_NAME="etcd-2"
    ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
    ETCD_LISTEN_PEER_URLS="https://172.16.90.148:2380"
    ETCD_LISTEN_CLIENT_URLS="https://172.16.90.148:2379"
    #[Clustering]
    ETCD_INITIAL_ADVERTISE_PEER_URLS="https://172.16.90.148:2380"
    ETCD_ADVERTISE_CLIENT_URLS="https://172.16.90.148:2379"
    ETCD_INITIAL_CLUSTER="etcd-1=https://172.16.90.147:2380,etcd-2=https://172.16.90.148:2380"
    ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
    ETCD_INITIAL_CLUSTER_STATE="new"
    
    # 启动并设置开机启动
    # 重新服务的配置文件
    [root@m1 system]# systemctl daemon-reload
    #启动etcd服务
    [root@m1 system]# systemctl start etcd
    #有错误可用此命令查看日志;也可检查是否启动
    [root@m1 system]# systemctl status etcd.service 
    #将etcd服务设置为开机启动
    [root@m1 system]# systemctl enable etcd
    
    # 在master节点执行以下命令,检查是否启动成功
    [root@m1 ~]# ETCDCTL_API=3 /opt/etcd/bin/etcdctl --cacert=/opt/etcd/ssl/ca.pem --cert=/opt/etcd/ssl/server.pem --key=/opt/etcd/ssl/server-key.pem --endpoints="https://172.16.90.147:2379,https://172.16.90.148:2379" endpoint health
    https://172.16.90.147:2379 is healthy: successfully committed proposal: took = 23.120604ms
    https://172.16.90.148:2379 is healthy: successfully committed proposal: took = 24.304144ms
    #如果输出上面信息,就说明集群部署成功。如果有问题第一步先看日志: /var/log/message 或 journalctl -u etcd
    
  • 部署master组件
  • 部署node组件
  • 部署集群网络

终止更新!!!

上一篇:K8S学习笔记之卸载K8S集群


下一篇:如何让html中的td文字只显示部分