1.1 Prometheus原理
1.1.1 Prometheus架构
数据采集模块:最左边的Prometheus target就是数据采集对象,而Retrieval则负责采集这些数据,并同时支持push,pull两种采集方式。
Pull模式:有服务端的采集模式来触发,只要采集目标,即target提供了http接口就可以采集,也是最常用的采集方式。后面描述的基本都是采用这种方式采集。
Push模式:由各个采集目标主动向push gateway推送指标,再由服务器端拉取。
存储模块:为了保证监控数据持久化,由TSDB进行存储,是专门为时间序列数据设置的数据库,能够以时间为索引进行存储。
数据查询和处理模块:TSDB在提供存储的同时还提供了数据查询和处理的基本功能,就是PromSQL,这也是告警系统以及可视化页面的基础。
告警模块:由Alertmanager进行告警,能够动态进行分组,静默,抑制,减少告警数量,能够支持多种告警方式。
可视化模块:Grafana实现的多维度可视化界面。
1.1.2 怎么来监控?
Prometheus通过push,pull两种数据采集方式采集数据,通过在配置文件中定义的相关采集项,对k8s不同组件甚至开源应用如mysql,redis进行数据采集,将数据存储在prometheus server中的TSDB时序数据库中,当rules在指定时间段被触发后推送给Alertmanager进行报警处理,Grafana通过PromQL拉取监控数据进行可视化图表展示。
ps:node_exporter采集器相关地址
使用文档:https://prometheus.io/docs/guides/node-exporter/
GitHub:https://github.com/prometheus/node_exporter
exporter列表:https://prometheus.io/docs/instrumenting/exporters/
1.1.3 Prometheus标识
Prometheus将所有数据存储为时间序列;具有相同度量名称以及标签属于同一个指标。
每个时间序列都由度量标准名称和一组键值对(也成为标签)唯一标识。
时间序列格式:
<metric name>{<label name>=<label value>, ...}
示例如下:
container_cpu_user_seconds_total{beta_kubernetes_io_arch="amd64",beta_kubernetes_io_os="linux",container_name="POD",id="/kubepods/besteffort/pod011eaa38-af5c-11e8-a65f-801844f11504/b659e04fa88e0f3080612d6479dec32d41afe632efd4f43f80714245be899d14",image="k8s.gcr.io/pause-amd64:3.1",instance="h-ldocker-02",job="kubernetes-cadvisor",kubernetes_io_hostname="h-ldocker-02",name="k8s_POD_consul-client-0_plms-pro_011eaa38-af5c-11e8-a65f-801844f11504_0",namespace="plms-pro",pod_name="consul-client-0",rolehost="k8smaster"}
1.2 Prometheus容器化部署及应用
解压k8s-prometheus-grafana.tar.gz,部署prometheus.svc.yml,rbac-setup.yaml, prometheus-rules.yaml ,configmap.yaml,prometheus.deploy.yaml。
①prometheus.svc:配置prometheus的nodePort为30003,供外部访问,调试Promsql 。
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat prometheus.svc.yml
---
kind: Service
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus
namespace: kube-system
spec:
type: NodePort
ports:
- port: 9090
targetPort: 9090
nodePort: 30003
selector:
app: prometheus
②rbac-setup:配置prometheus用户在k8s的组件访问权限。
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat rbac-setup.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: kube-system
③prometheus-rules:设置prometheus的监控项。分为general.rules,node.rules,分别监控k8s的pod以及node节点的数据。
以下图InstanceDown监控项为例,监控的数据来自于”expr: up ==0“,“up“监控的是prometheus target中的组件健康状态,如apiserver,当up==0时表示异常,当异常持续两分钟,即”for: 2m”,就会将异常数据传递给alertmanager进行分组报警处理。
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat prometheus-rules.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-rules
namespace: kube-system
data:
general.rules: |
groups:
- name: general.rules
rules:
- alert: InstanceDown
expr: up == 0
for: 2m
labels:
severity: error
annotations:
summary: "Instance {{ $labels.instance }} 停止工作"
description: "{{ $labels.instance }} job {{ $labels.job }} 已经停止5分钟以上."
- alert: PodCpuuse
expr: sum(rate(container_cpu_usage_seconds_total{image!=""}[1m])) by (pod_name, namespace) / (sum(container_spec_cpu_quota{image!=""}/100000) by (pod_name, namespace)) * 100 > 80
for: 5m
labels:
severity: warn
annotations:
summary: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod_name }} CPU使用率过高"
description: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod_name }} CPU使用率大于80%. (当前值:{{ $value }}%)"
- alert: PodMemoryuse
expr: sum(container_memory_rss{image!=""}) by(pod_name, namespace) / sum(container_spec_memory_limit_bytes{image!=""}) by(pod_name, namespace) * 100 != +inf > 80
for: 5m
labels:
severity: warn
annotations:
summary: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod_name }} 内存使用率过高"
description: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod_name }} 内存使用率大于80%. (当前值:{{ $value }}%)"
- alert: PodFailed
expr: sum (kube_pod_status_phase{phase="Failed"}) by (pod,namespace) > 0
for: 1m
labels:
severity: error
annotations:
summary: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod }} pod status is Failed"
description: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod }} pod status is Failed. (当前值:{{ $value }})"
- alert: PodPending
expr: sum (kube_pod_status_phase{phase="Pending"}) by (pod,namespace) > 0
for: 1m
labels:
severity: error
annotations:
summary: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod }} pod status is Pending"
description: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod }} pod status is Pending. (当前值:{{ $value }})"
- alert: PodNetworkReceive
expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*"}[5m]) /1000) by (pod_name,namespace) > 20000
for: 5m
labels:
severity: warn
annotations:
summary: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod_name }} 接受到的网络流量过大"
description: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod_name }} 接受到的网络流量大于20MB/s. (当前值:{{ $value }}B/s)"
- alert: PodNetworkTransmit
expr: sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*"}[5m]) /1000) by (pod_name,namespace) > 20000
for: 5m
labels:
severity: warn
annotations:
summary: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod_name }} 传输的网络流量过大"
description: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod_name }} 传输的网络流量大于20MB/s. (当前值:{{ $value }}B/s)"
- alert: PodRestart
expr: sum (changes (kube_pod_container_status_restarts_total[1m])) by (pod,namespace) > 0
for: 1m
labels:
severity: warn
annotations:
summary: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod }} pod is restart"
description: "Namespaces: {{ $labels.namespace }} | PodName: {{ $labels.pod }} 在一分钟内重启. (当前值:{{ $value }})"
node.rules: |
groups:
- name: node.rules
rules:
- alert: NodeFilesystemUsage
expr: 100 - (node_filesystem_free_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"} * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} : {{ $labels.mountpoint }} 分区使用率过高"
description: "{{ $labels.instance }}: {{ $labels.mountpoint }} 分区使用大于80% (当前值: {{ $value }})"
- alert: NodeMemoryUsage
expr: 100 - (node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} 内存使用率过高"
description: "{{ $labels.instance }}内存使用大于80% (当前值: {{ $value }})"
- alert: NodeCPUUsage
expr: 100 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) * 100) > 60
for: 5m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} CPU使用率过高"
description: "{{ $labels.instance }}CPU使用大于60% (当前值: {{ $value }})"
④configmap.yaml:prometheus 配置文件,主要是去采集监控的时序数据,通过cadvisor,获取容器的cpu,内存,网络io等基本数据,通过kubernetes-node获取node主机的相关监控数据,通过kubernetes-services-endpoint来获取kube-state-metrics组件数据,configmap是以角色(role)来定义收集的,比如node,service,pod,endpoints,ingress等。
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: kube-system
data:
prometheus.yml: |
rule_files: ##在配置文件中显性指定prometheus rules文件路径,需要挂载在deployment中;
- /etc/config/rules/*.rules
global:
scrape_interval: 15s ##prometheus采集数据间隔
evaluation_interval: 15s
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: kubernetes-nodes ##node-exporter采集node数据
scrape_interval: 30s
static_configs:
- targets:
- *
- job_name: 'kubernetes-nodes-kubelet'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-nodes-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-services'
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module: [http_2xx]
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- job_name: 'kubernetes-ingresses'
kubernetes_sd_configs:
- role: ingress
relabel_configs:
- source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
regex: (.+);(.+);(.+)
replacement: ${1}://${2}${3}
target_label: __param_target
- target_label: __address__
replacement: blackbox-exporter.example.com:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_ingress_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_ingress_name]
target_label: kubernetes_name
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
alerting: ##alertmanager与prometheus集成配置
alertmanagers:
- static_configs:
- targets: ["alertmanager:80"]
⑤部署prometheus deployment
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat prometheus.deploy.yml
---
apiVersion: apps/v1beta2
kind: Deployment
metadata:
labels:
name: prometheus-deployment
name: prometheus
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
containers:
- image: prom/prometheus:v2.0.0
name: prometheus
command:
- "/bin/prometheus"
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention=24h"
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: "/prometheus"
name: data
- mountPath: "/etc/prometheus"
name: config-volume
- name: prometheus-rules ###指定prometheus rule监控项数据挂载,需要在configmap中指定rules路径
mountPath: /etc/config/rules
- name: localtime
mountPath: /etc/localtime
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 500m
memory: 2500Mi
serviceAccountName: prometheus
volumes:
- name: data
hostPath:
path: /var/lib/docker/prometheus
- name: config-volume
configMap:
name: prometheus-config
- name: prometheus-rules
configMap:
name: prometheus-rules
- name: localtime
hostPath:
path: /etc/localtime
至此prometheus部署完成,访问http://IP:30003
查看rules是否正常:
查看target是否正常:
1.3 部署node-exporter
node-exporter的部署在这里采用二进制包的形式部署,因为发现采用容器化部署node-exporter获取到的数据不是很准确,所以在这里采用二进制的方式,运行一下部署脚本即可,脚本如下,注意,需要在每个node节点执行一下脚本,启动端口为9100
#!/bin/bash
wget https://github.com/prometheus/node_exporter/releases/download/v0.17.0/node_exporter-0.17.0.linux-amd64.tar.gz
tar zxf node_exporter-0.17.0.linux-amd64.tar.gz
mv node_exporter-0.17.0.linux-amd64 /usr/local/node_exporter
cat <<EOF >/usr/lib/systemd/system/node_exporter.service
[Unit]
Description=https://prometheus.io
[Service]
Restart=on-failure
ExecStart=/usr/local/node_exporter/node_exporter --collector.systemd --collector.systemd.unit-whitelist=(docker|kubelet|kube-proxy|flanneld).service
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable node_exporter
systemctl restart node_exporter
1.4 部署kube-state-metrics
Kubernetes集群上Pod, DaemonSet, Deployment, Job, CronJob等各种资源对象的状态需要监控,这也反映了使用这些资源部署的应用的状态。但通过查看前面Prometheus从k8s集群拉取的指标(这些指标主要来自apiserver和kubelet中集成的cAdvisor),并没有具体的各种资源对象的状态指标。对于Prometheus来说,当然是需要引入新的exporter来暴露这些指标,Kubernetes提供了一个kube-state-metrics正是我们需要。
部署kube-state-metrics-rbac.yaml进行授权认证
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat kube-state-metrics-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kube-state-metrics
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups: [""]
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources:
- daemonsets
- deployments
- replicasets
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources:
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- cronjobs
- jobs
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources:
- horizontalpodautoscalers
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: kube-state-metrics-resizer
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups: [""]
resources:
- pods
verbs: ["get"]
- apiGroups: ["extensions"]
resources:
- deployments
resourceNames: ["kube-state-metrics"]
verbs: ["get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kube-state-metrics-resizer
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
部署kube-state-metrics-service.yaml
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat kube-state-metrics-service.yaml
apiVersion: v1
kind: Service
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/name: "kube-state-metrics"
annotations:
prometheus.io/scrape: 'true'
spec:
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
protocol: TCP
- name: telemetry
port: 8081
targetPort: telemetry
protocol: TCP
selector:
k8s-app: kube-state-metrics
部署kube-state-metrics-deployment.yaml
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat kube-state-metrics-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
k8s-app: kube-state-metrics
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v1.3.0
spec:
selector:
matchLabels:
k8s-app: kube-state-metrics
version: v1.3.0
replicas: 1
template:
metadata:
labels:
k8s-app: kube-state-metrics
version: v1.3.0
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-cluster-critical
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: **kube-state-metrics:v1.3.0
ports:
- name: http-metrics
containerPort: 8080
- name: telemetry
containerPort: 8081
readinessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
- name: addon-resizer
image: **addon-resizer:1.8.3
resources:
limits:
cpu: 100m
memory: 30Mi
requests:
cpu: 100m
memory: 30Mi
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- name: config-volume
mountPath: /etc/config
command:
- /pod_nanny
- --config-dir=/etc/config
- --container=kube-state-metrics
- --cpu=100m
- --extra-cpu=1m
- --memory=100Mi
- --extra-memory=2Mi
- --threshold=5
- --deployment=kube-state-metrics
volumes:
- name: config-volume
configMap:
name: kube-state-metrics-config
---
# Config map for resource configuration.
apiVersion: v1
kind: ConfigMap
metadata:
name: kube-state-metrics-config
namespace: kube-system
labels:
k8s-app: kube-state-metrics
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
data:
NannyConfiguration: |-
apiVersion: nannyconfig/v1alpha1
kind: NannyConfiguration
验证:在prometheus上查看target是否正常获取到,Grath页面能够获取到kube相关配置数据,如下
1.5 PromSQL基本语法
指标在grafana的体现:
查看某个指标的使用率,比如cpu使用率
用到rate函数,rate用来计算两个间隔时间内发生的变化率。如
rate(指标名{筛选条件}[时间间隔])
这两个函数在 promethues 中经常用来计算增量或者速率,在使用时需要指定时间范围如[1m]
irate(): 计算的是给定时间窗口内的每秒瞬时增加速率。
rate(): 计算的是给定时间窗口内的每秒的平均值。
比如查看1分钟内非idle的cpu使用率rate(node_cpu_seconds_total{mode!="idle"}[1m])
Promsql表达式需要在prometheus调试页面调试好再放入grafana进行数据展示,在prometheus-rule中编写的expr作为监控项,其实就是grafana中的各个监控指标,稍加修改,或者直接挪用作为prometheus监控指标。
Node节点监控表达式
CPU使用率:获取5分钟cpu每秒空闲的平均值,从而得出cpu使用率
100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m]))by(instance)*100)
内存使用率:将内存buffer,cache,free相加得出总的空闲内存,除以总内存大小,从而得出内存使用率
100-(node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes)/node_memory_MemTotal_bytes*100
磁盘使用率:同理可得
100-(node_filesystem_free_bytes{mountpoint="/",fstype=~"ext4|xfs"}/node_filesystem_size_bytes{mountpoint="/",fstype=~"ext4|xfs"}*100)
容器监控表达式
target组件的健康状态:可以通过添加apiserver,scheduler,controller进行监控,1为正常,0为异常
up == 0
容器cpu使用率:获取1分钟内容器cpu使用值,除以容器cpu limit值,获取到cpu使用率
sum(rate(container_cpu_usage_seconds_total{image!=""}[1m])) by (pod_name, namespace) / (sum(container_spec_cpu_quota{image!=""}/100000) by (pod_name, namespace)) * 100 > 80
容器内存使用率:容器实际申请的内存(RSS)除以限定的内存值,获取到内存使用率
sum(container_memory_rss{image!=""}) by(pod_name, namespace) / sum(container_spec_memory_limit_bytes{image!=""}) by(pod_name, namespace) * 100 != +inf > 80
容器状态为Failed,0为正常,1为Failed
sum (kube_pod_status_phase{phase="Failed"}) by (pod,namespace) > 0
容器状态为Pending,0为正常,1为异常
sum (kube_pod_status_phase{phase="Pending"}) by (pod,namespace) > 0
容器 network 入io,若超过20MB/s则告警(视情况而定)
sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*"}[5m]) /1000) by (pod_name,namespace) > 20000
容器 network 出io,若超过20MB/s则告警(视情况而定)
sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*"}[5m]) /1000) by (pod_name,namespace) > 20000
容器是否自动重启,如内存溢出等情况
sum (changes (kube_pod_container_status_restarts_total[1m])) by (pod,namespace) > 0
2.1 Grafana容器化部署及应用
部署grafana-svc.yaml
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat grafana-svc.yaml
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: kube-system
labels:
app: grafana
component: core
spec:
type: NodePort
ports:
- port: 3000
targetPort: 3000
nodePort: 30002
selector:
app: grafana
component: core
部署grafana-deploy.yaml使用的是5.4.2较新版本
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: grafana-core
namespace: kube-system
labels:
app: grafana
component: core
spec:
replicas: 1
template:
metadata:
labels:
app: grafana
component: core
spec:
containers:
- image: grafana/grafana:5.4.2
name: grafana-core
imagePullPolicy: IfNotPresent
# env:
resources:
# keep request = limit to keep this container in guaranteed class
limits:
cpu: 2000m
memory: 2000Mi
requests:
cpu: 1000m
memory: 2000Mi
env:
# The following env variables set up basic auth twith the default admin user and admin password.
- name: GF_AUTH_BASIC_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "false"
# - name: GF_AUTH_ANONYMOUS_ORG_ROLE
# value: Admin
# does not really work, because of template variables in exported dashboards:
# - name: GF_DASHBOARDS_JSON_ENABLED
# value: "true"
readinessProbe:
httpGet:
path: /login
port: 3000
# initialDelaySeconds: 30
# timeoutSeconds: 1
volumeMounts:
- name: grafana-storage
mountPath: /var/lib/grafana
volumes:
- name: grafana-storage
hostPath:
path: /var/lib/grafana
type: DirectoryOrCreate
2.2 Grafana 配置Prometheus数据源
访问地址:IP: 30002
第一次登陆grafana默认用户名密码均为admin
配置数据源
2.3 Grafana加载自定义的监控模板
导入已经提前定义好的模板
Kubernetes AllPod monitoring.json:监控所有pod,包括历史pod。
Kubernetes 集群资源监控.json:监控当前在线pod。
Kubernetes Node监控.json:监控node节点。
也可以输入模板编号,在线获取模板,推荐使用
集群资源监控:3119
资源状态监控:6417
Node监控:9276
配置好的grafana页面如下
3.1 Alertmanager简介
Alertmanager 主要用于接收 Prometheus 发送的告警信息,它支持丰富的告警通知渠道,而且很容易做到告警信息进行去重,静默,分组等,将同类型的告警信息进行分类,防止告警信息轰炸,尽量精简告警的条数。
3.2 Alertmanager容器化部署及应用
部署alertmanager-service.yaml:设置nodeport供外部访问。
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat alertmanager-service.yaml
apiVersion: v1
kind: Service
metadata:
name: alertmanager
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/name: "Alertmanager"
spec:
type: NodePort
ports:
- name: http
port: 80
protocol: TCP
targetPort: 9093
nodePort: 30004
selector:
k8s-app: alertmanager
#type: "ClusterIP"
部署alertmanager-templates.yaml:企业微信报警的短信模板,若使用默认模板,太过杂乱,不够精简,需要将该配置挂载至alertmanager-server中才会生效。告警模板中的变量都是通过调用prometheus pull到的metrics数据值获取到的
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat alertmanager-templates.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-templates
namespace: kube-system
data:
wechat.tmpl: |
{{ define "wechat.default.message" }}
{{ if gt (len .Alerts.Firing) 0 -}}
Alerts Firing:
{{ range .Alerts}}
告警级别: {{ .Labels.severity }}
告警类型: {{ .Labels.alertname }}
故障主机: {{ .Labels.instance }}
告警主题: {{ .Annotations.summary }}
告警详情: {{ .Annotations.description }}
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }}
{{- end }}
{{- end }}
{{ if gt (len .Alerts.Resolved) 0 -}}
Alerts Resolved:
{{ range .Alerts}}
告警级别: {{ .Labels.severity }}
告警类型: {{ .Labels.alertname }}
故障主机: {{ .Labels.instance }}
告警主题: {{ .Annotations.summary }}
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }}
恢复时间: {{ .EndsAt.Format "2006-01-02 15:04:05" }}
{{- end }}
{{- end }}
{{- end }}
部署alertmanager-configmap.yaml:设置告警渠道,在这里配置了邮件告警,企业微信告警。在此设置告警时间间隔,分组,分流。
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat alertmanager-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
data:
alertmanager.yml: |
global:
resolve_timeout: 5m ##多久没有收到告警,默认告警已恢复
smtp_smarthost: '*' ##注意:这里的mail-server要加端口,要不然发不出去
smtp_from: '*'
smtp_auth_username: '*'
smtp_auth_password: '*'
templates:
- '/etc/alertmanager-templates/*.tmpl' ##指定告警模板路径
receivers:
- name: default-receiver
email_configs:
- to: 'wujqc@*'
send_resolved: true ##是否发送告警恢复邮件
wechat_configs:
- send_resolved: true ##是否发送告警恢复微信
agent_id: '*' ##应用页面上的配置
to_party: '1' ##企业微信后台->通讯录->部门ID
corp_id: '*' ##应用页面上的配置
api_secret: '*' ##应用页面上的配置
route:
group_interval: 10s ##第一次以后的告警分组要等到多久继续
group_by: ['alertname'] ##根据prometheus-rule的alertname进行告警分组
group_wait: 10s ##第一次告警分组要等待多久
receiver: default-receiver ##告警组名称要与receivers保持一致
repeat_interval: 5m ##相同告警间隔多久发送下一条
agent_id,api_secret:
to_party:
corp_id:
Alertmanager告警可以根据标签值进行分流,如mysql告警分发到DBA组,架构告警发到架构组。实现方式如下,通过routes 的labels进行正则匹配,实现路由转发,若都不匹配则使用默认的route告警组default-receiver进行告警
route:
receiver: 'default-receiver'
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
group_by: [cluster, alertname] # All alerts that do not match the following child
routes
# will remain at the root node and be dispatched to 'default-receiver'.
routes:
# All alerts with service=mysql or service=cassandra
# are dispatched to the database pager.
- receiver: 'database-pager'
group_wait: 10s
match_re: ###正则匹配labels来判断使用哪个路由进行报警
service: mysql|cassandra
部署alertmanager-deployment.yaml:部署alertmanager-server,挂载alertmanager configmap进行告警设置,在这里还在alertmanager-server pod创建的基础上加了配置文件热加载的镜像,保证重新加载alertmanager configmap配置时不需要重启alertmanager pod。
注意,这里还需要挂载alertmanager-templates,否则告警模板不生效
[root@PH-K8S-M01 k8s-prometheus-grafana]# cat alertmanager-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: alertmanager
namespace: kube-system
labels:
k8s-app: alertmanager
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v0.14.0
spec:
replicas: 1
selector:
matchLabels:
k8s-app: alertmanager
version: v0.14.0
template:
metadata:
labels:
k8s-app: alertmanager
version: v0.14.0
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-cluster-critical
containers:
- name: prometheus-alertmanager
image: "prom/alertmanager:v0.14.0"
imagePullPolicy: "IfNotPresent"
args:
- --config.file=/etc/config/alertmanager.yml
- --storage.path=/data
- --web.external-url=/
ports:
- containerPort: 9093
readinessProbe:
httpGet:
path: /#/status
port: 9093
initialDelaySeconds: 30
timeoutSeconds: 30
volumeMounts:
- name: config-volume
mountPath: /etc/config
- name: storage-volume
mountPath: "/data"
subPath: ""
- name: templates-volume
mountPath: /etc/alertmanager-templates ##模板挂载点要与configmap一致
- name: localtime
mountPath: /etc/localtime
resources:
limits:
cpu: 10m
memory: 50Mi
requests:
cpu: 10m
memory: 50Mi
- name: prometheus-alertmanager-configmap-reload
image: "jimmidyson/configmap-reload:v0.1"
imagePullPolicy: "IfNotPresent"
args:
- --volume-dir=/etc/config
- --webhook-url=http://localhost:9093/-/reload
volumeMounts:
- name: config-volume
mountPath: /etc/config
readOnly: true
resources:
limits:
cpu: 10m
memory: 10Mi
requests:
cpu: 10m
memory: 10Mi
volumes:
- name: config-volume
configMap:
name: alertmanager-config
- name: storage-volume
emptyDir: {}
- name: templates-volume
configMap:
name: alertmanager-templates ###挂载告警模板
- name: localtime
hostPath:
path: /etc/localtime
验证:可以手动触发prometheus-rule加载的配置,进行报警验证
邮件报警:
Fring表示触发告警
Resolved表示告警恢复
3.3 Alertmanager告警状态
Inactive:这里什么都没有发生。
Pending:已触发阈值,但未满足告警持续时间
Firing:已触发阈值且满足告警持续时间。警报发送给接受者。
3.4 Alertmanager告警静默设置
访问alertmanager后台,访问地址
http://IP:30004
右上角new silence用于手动设置静默,对一些需要忽略的告警设置维护模式,根据labels进行正则匹配设置,如