环境准备: IP hostname role 192.168.200.150 nginx LB 192.168.200.151 master01-151 docker-ce/rke/helm/kubectl 192.168.200.152 master02-152 docker-ce/rke 192.168.200.153 master03-153 docker-ce/rke (1)设置主机名 (2)设置域名解析 # 生产环境中使用内网dns服务器解析域名 (3)关闭swap交换分区 (4)开放防火墙端口(参考) portol port server tcp 22 sshd tcp 10250 kubelet tcp 10251 kube-schedule :接收 http 请求,非安全端口,不需要认证授权。 tcp 10259 kube-schedule :接收 https 请求,安全端口,需要认证授权。 tcp 10252 kube-controll :接收 http 请求,非安全端口,不需要认证授权。 tcp 10257 kube-controll :接收 https 请求,安全端口,需要认证授权。 tcp 6443 kube-apiserve tcp 2379 etcd tcp 2380 etcd udp 8472 overlay2 udp 323 chronyd (5)设置selinux策略 (6)调整时区时间同步 # 生产环境中使用内网ntp服务器并配置chrony服务 (7)系统其他优化 软件版本(所有版本均使用最新稳定版 2021.9.15): docker-ce:20.10.8 rke:v1.3.0 kubernetes:v1.21.4-rancher1-1 kubectl:v1.21.4 helm:3.6.3 cert-manager:1.5.3 nginx:v1.20.1 1.安装docker-ce wget -O /etc/yum.repos.d/docker-ce.repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo yum list --showduplicates docker-ce yum install -y docker-ce-20.10.8 mkdir -p /etc/docker /data/docker # 定义docker配置 vim /etc/docker/daemon.json { "graph": "/data/docker", "storage-driver": "overlay2", "insecure-registries": ["registry.access.redhat.com","quay.io","harbor.example.com"], "registry-mirrors": ["https://q2gr04ke.mirror.aliyuncs.com"], "exec-opts": ["native.cgroupdriver=systemd"], "log-opts": {"max-size":"32M", "max-file":"2"} } systemctl restart docker systemctl enable docker 2.加载内核模块 modprobe -a br_netfilter ip6_udp_tunnel ip_set ip_set_hash_ip ip_set_hash_net \ iptable_filter iptable_nat iptable_mangle iptable_raw nf_conntrack_netlink nf_conntrack \ nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat nf_nat_ipv4 nf_nat_masquerade_ipv4 nfnetlink \ udp_tunnel VETH VXLAN x_tables xt_addrtype xt_conntrack xt_comment xt_mark xt_multiport \ xt_nat xt_recent xt_set xt_statistic xt_tcpudp 3.修改内核参数 cat >> /etc/sysctl.conf <<EOF net.ipv4.ip_forward=1 net.bridge.bridge-nf-call-iptables=1 EOF sysctl -p /etc/sysctl.conf 4.创建用户并配置免密登录 # rke用户需要调用docker客户端工具创建容器 useradd -m -s /bin/bash -G docker rke echo "123456" | passwd --stdin rke # rke用户通过ssh协议远程安装和运行服务组件、类似于ceph-deploy su - rke ssh-keygen ssh-copy-id rke@192.168.200.151 ssh-copy-id rke@192.168.200.152 ssh-copy-id rke@192.168.200.153 exit 5.安装rke wget https://github.com/rancher/rke/releases/download/v1.3.0/rke_linux-amd64 mv rke_linux-amd64 /usr/local/bin/rke chmod +x /usr/local/bin/rke # 定义集群基础信息 vim cluster.yml nodes: - address: 192.168.200.151 port: "22" internal_address: 192.168.200.151 role: [controlplane,worker,etcd] hostname_override: "master01-151" user: rke ssh_key_path: /home/rke/.ssh/id_rsa ssh_agent_auth: true labels: {} taints: [] - address: 192.168.200.152 port: "22" internal_address: 192.168.200.152 role: [controlplane,worker,etcd] hostname_override: "master02-152" user: rke ssh_key_path: /home/rke/.ssh/id_rsa ssh_agent_auth: true labels: {} taints: [] - address: 192.168.200.153 port: "22" internal_address: 192.168.200.153 role: [controlplane,worker,etcd] hostname_override: "master03-153" user: rke ssh_key_path: /home/rke/.ssh/id_rsa ssh_agent_auth: true labels: {} taints: [] kubernetes_version: "v1.21.4-rancher1-1" cluster_name: "mycluster" services: etcd: backup_config: enabled: true interval_hours: 6 retention: 60 # 运行rke rke up --config ./cluster.yml 6.保存重要文件 # 请保存下文中列出来的所有文件,这些文件可以用于维护集群,排查问题和升级集群。请将这些文件复制并保存到安全的位置: cluster.yml:RKE 集群的配置文件。 kube_config_cluster.yml:该集群的Kubeconfig 文件包含了获取该集群所有权限的认证凭据。 cluster.rkestate:Kubernetes 集群状态文件,包含了获取该集群所有权限的认证凭据,使用 RKE v0.2.0 时才会创建这个文件。 7.安装kubectl wget http://rancher-mirror.cnrancher.com/kubectl/v1.21.4/linux-amd64-v1.21.4-kubectl mv linux-amd64-v1.21.4-kubectl /usr/local/bin/kubectl chmod +x /usr/local/bin/kubectl kubectl version --client # 拷贝认证文件并查看集群信息 mkdir ~/.kube && cp kube_config_cluster.yml ~/.kube/config kubectl get cs kubectl get nodes 8.安装helm wget http://rancher-mirror.cnrancher.com/helm/v3.6.3/helm-v3.6.3-linux-amd64.tar.gz tar zxvf helm-v3.6.3-linux-amd64.tar.gz -C /usr/local helm version # 增加rancher稳定版仓库 helm repo add rancher-stable http://rancher-mirror.oss-cn-beijing.aliyuncs.com/server-charts/stable 9.安装cert-manager # 创建CRD资源 wget https://github.com/jetstack/cert-manager/releases/download/v1.5.3/cert-manager.crds.yaml kubectl apply -f cert-manager.crds.yaml # 增加jetstack仓库并安装cert-manager kubectl create namespace cert-manager helm repo add jetstack https://charts.jetstack.io helm repo update helm install cert-manager jetstack/cert-manager --namespace cert-manager --version v1.5.3 kubectl get pods -n cert-manager 10.安装Rancher # hostname定义rancher访问域名,需配置域名解析 helm install rancher rancher-stable/rancher \ --namespace cattle-system \ --set hostname=rancher.example.com \ --set ingress.tls.source=letsEncrypt \ --set letsEncrypt.email=admin@example.com # 查询rancher部署状态 kubectl -n cattle-system rollout status deploy/rancher kubectl -n cattle-system get deploy rancher 11.配置负载均衡 # CentOS7.6发行版epel源安装的nginx/1.20.1默认没有stream模块,则需要安装nginx-all-modules包 yum -y install nginx-all-modules.noarch # http中server段,将80端口流量重定向到443端口 server { listen 80; return 301 https://$host$request_uri; } # stream段配置4层反向代理 stream { log_format proxy '$time_local|$remote_addr|$upstream_addr|$protocol|$status|' '$session_time|$upstream_connect_time|$bytes_sent|$bytes_received|' '$upstream_bytes_sent|$upstream_bytes_received' ; upstream rancher_servers { least_conn; server 192.168.200.151:443 max_fails=3 fail_timeout=5s; server 192.168.200.152:443 max_fails=3 fail_timeout=5s; server 192.168.200.153:443 max_fails=3 fail_timeout=5s; } server { listen 443; proxy_pass rancher_servers; access_log /var/log/nginx/proxy.log proxy; } } # 重启nginx systemctl restart nginx 12.访问rancher域名 https://rancher.example.com 新建或增加下游集群,配置到此完成。 附:需要完全卸载rke时提供以下脚本 #!/bin/bash #杀死所有正在运行的容器 docker stop $(docker ps -a -q) #删除所有容器 docker rm -f $(docker ps -qa) #删除所有容器卷 docker volume rm $(docker volume ls -q) #卸载mount目录 for mount in $(mount | grep tmpfs | grep '/var/lib/kubelet' | awk '{ print $3 }') /var/lib/kubelet /var/lib/rancher; do umount $mount; done #删除残留路径 rm -rf /etc/ceph \ /etc/cni \ /etc/kubernetes \ /opt/cni \ /opt/rke \ /run/secrets/kubernetes.io \ /run/calico \ /run/flannel \ /var/lib/calico \ /var/lib/etcd \ /var/lib/cni \ /var/lib/kubelet \ /var/lib/rancher/rke/log \ /var/log/containers \ /var/log/pods \ #清理网络接口 network_interface=`ls /sys/class/net` for net_inter in $network_interface; do if ! echo $net_inter | grep -qiE 'lo|docker0|eth*|ens*';then ip link delete $net_inter fi done #清理残留进程 port_list='80 443 6443 2376 2379 2380 8472 9099 10250 10254' for port in $port_list do pid=`netstat -atlnup|grep $port |awk '{print $7}'|awk -F '/' '{print $1}'|grep -v -|sort -rnk2|uniq` if [[ -n $pid ]];then kill -9 $pid fi done pro_pid=`ps -ef |grep -v grep |grep kube|awk '{print $2}'` if [[ -n $pro_pid ]];then kill -9 $pro_pid fi #清理路由规则 sudo iptables --flush sudo iptables --flush --table nat sudo iptables --flush --table filter sudo iptables --table nat --delete-chain sudo iptables --table filter --delete-chain # 重启docker sudo systemctl restart docker
参考文档:
https://docs.rancher.cn/rancher2.5/
https://blog.csdn.net/AmorKlee/article/details/116940446
https://www.cnblogs.com/xiao987334176/p/12981735.html