前言
前面我们已经使用kubernetes_sd_config对K8s集群进行一系列的监控了。包括使用node发现模式监控node节点资源,监控kubelet,schedule,controlle-manager,pod等。但是由于这些都是k8s内置的指标数据,不是挺好完整的,而且其他的资源对象也需要监控的,例如Deployment,Daemonset,ConfigMap等等的资源对象。因此我们可以额外在K8s集群部署一个指标采集系统kube-state-metrics,通过他去与apiserver通信,然后prometheus再去获取它的指标数据。
一、部署kube-state-metrics
github地址:https://github.com/kubernetes/kube-state-metrics
部署kube-state-metrics需要创建以下Kubernetes资源对象
RBAC认证:访问Kubernetes api的凭据相关信息
Deployment资源对象:管理和部署Pod
Service资源对象
以下为资源对象的yaml文件:
[root@k8s-master ~]# vim rbac.yaml
# 源YAML文件可到Github下载:https://github.com/shaxiaozz/prometheus
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.2.3
name: kube-state-metrics
namespace: monitor
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.2.3
name: kube-state-metrics
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
- ingresses
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.2.3
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: monitor
[root@k8s-master ~]# vim deploy.yaml
# 源YAML文件可到Github下载:https://github.com/shaxiaozz/prometheus
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.2.3
name: kube-state-metrics
namespace: monitor
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
template:
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.2.3
spec:
containers:
- image: registry.cn-guangzhou.aliyuncs.com/shaxiaozz/kube-state-metrics:v2.2.3
# 容器资源限制
resources:
limits:
memory: 300Mi
cpu: 200m
requests:
memory: 150Mi
cpu: 100m
# 健康检查
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 15
timeoutSeconds: 5
name: kube-state-metrics
ports:
- containerPort: 8080
name: http-metrics
- containerPort: 8081
name: telemetry
# 就绪检查
readinessProbe:
httpGet:
path: /
port: 8081
initialDelaySeconds: 5
timeoutSeconds: 5
# 容器安全上下文
securityContext:
runAsUser: 65534
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-state-metrics
[root@k8s-master ~]# vim svc.yaml
# 源YAML文件可到Github下载:https://github.com/shaxiaozz/prometheus
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.2.3
name: kube-state-metrics
namespace: monitor
spec:
clusterIP: None
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
- name: telemetry
port: 8081
targetPort: telemetry
selector:
app.kubernetes.io/name: kube-state-metrics
[root@pre01 kube-state-metrics]# kubectl apply -f serviceaccount.yaml
[root@pre01 kube-state-metrics]# kubectl apply -f deploy.yaml
[root@pre01 kube-state-metrics]# kubectl apply -f svc.yaml
[root@pre01 kube-state-metrics]# kubectl get svc,ep,pods -n monitor -l app.kubernetes.io/name=kube-state-metrics
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/kube-state-metrics ClusterIP None <none> 8080/TCP,8081/TCP 34m
NAME ENDPOINTS AGE
endpoints/kube-state-metrics 10.244.219.126:8081,10.244.219.126:8080 34m
NAME READY STATUS RESTARTS AGE
pod/kube-state-metrics-676d558fbb-tghjx 1/1 Running 0 35m
二、修改prometheus配置文件
[root@k8s-master ~]# vim prometh_configmap.yaml
由于我的kube-state-metrics是部署在monitor命名空间下的,因此在正则匹配上,命名空间为monitor,svc名称为kube-state-metrics才保留下来,否则就不进行监控。
#k8s kube-state-metrics监控任务
- job_name: "kube-state-metrics"
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_endpoints_name]
regex: monitor;kube-state-metrics
action: keep
[root@k8s-master ~]# kubectl apply -f prometh_configmap.yaml
[root@k8s-master ~]# curl -X POST http://10.244.219.114:9090/-/reload #热加载配置文件
三、查看Targets
可以看到kube-state-metrics监控任务下已经有两个Targets实例了。我们可以到grup界面通过PromQL语句查询监控数据了。
例如:查看当前K8s集群有多少configmap资源对象
count(kube_configmap_created)
例如:查看当前集群有多少个Pod正在运行
count (kube_pod_container_state_started)
对文章中的yaml文件有兴趣的可查看我的github项目:https://github.com/shaxiaozz/prometheus