confd的docker镜像Dockerfile,内容如下:
FROM alpine:3.10
MAINTAINER Alvin4u <firstep@qq.com>
ENV CONFD_VERSION=0.16.0 \
CONFD_INTERVAL=60 \
CONFD_LOG_LEVEL=info \
CONFD_BACKEND=consul \
CONFD_NODE=localhost:8500 \
CONFD_HOST_NODE=localhost:9090
RUN apk add --update curl ca-certificates \
&& curl -L -o /usr/bin/confd https://github.com/kelseyhightower/confd/releases/download/v$CONFD_VERSION/confd-$CONFD_VERSION-linux-amd64 \
&& chmod +x /usr/bin/confd
ADD confd /etc/confd
ADD start.sh /start.sh
RUN chmod +x /start.sh
VOLUME /etc/prometheus
VOLUME /etc/confd
ENTRYPOINT ["/start.sh"]
启动confd脚本start.sh,内容:
#!/bin/sh
set -e
echo "Starting confd..."
until /usr/bin/confd -onetime -backend $CONFD_BACKEND -node $CONFD_NODE; do
echo "[confd] Winting for create initail configuration."
sleep 5
done
/usr/bin/confd -confdir="/etc/confd" -onetime=false -interval=$CONFD_INTERVAL -backend $CONFD_BACKEND -node $CONFD_NODE -log-level=$CONFD_LOG_LEVEL
confd动态修改prometheus配置文件
1.confd/templates下配置文件
alertmanager.tmpl,内容:
# 现有的配置项可在consul的/monitor/alertmanager/common配置(JSON格式)
{{$config := getv "/monitor/alertmanager/common"|json}}
global:
resolve_timeout: {{or $config.resolve_timeout "5m"}}
route:
group_by: {{or $config.group_by "[‘alertname‘]"}} #分组方式
group_wait: {{or $config.group_wait "10s"}} #组报警等待时间
group_interval: {{or $config.group_interval "10s"}} #组报警间隔时间
repeat_interval: {{or $config.repeat_interval "1m"}} #重复报警间隔时间
receiver: {{or $config.receiver "‘web.hook‘"}}
routes: #不同的规则不同的重复报警间隔
{{range gets "/monitor/alarms/*/*"}}
{{$rule := json .Value}}
- receiver: ‘web.hook‘
repeat_interval: {{or $rule.suppressDuration "1m"}}
match:
ruleId: {{$rule.ruleId}}
{{end}}
#在consul的/monitor/alertmanager/receivers配置接收者(YML格式)
{{getv "/monitor/alertmanager/receivers"}}
# receivers:
# - name: ‘web.hook‘
# webhook_configs:
# - url: ‘http://192.168.2.240:7002/webHook/send‘
# send_resolved: true
#在consul的/monitor/alertmanager/inhibit配置抑制(YML格式)
{{getv "/monitor/alertmanager/inhibit"}}
# inhibit_rules:
# - source_match:
# severity: ‘critical‘
# target_match:
# severity: ‘warning‘
# equal: [‘alertname‘, ‘dev‘, ‘instance‘]
prometheus.alert.tmpl,内容:
groups:
- name: alarms
rules: {{range gets "/monitor/alarms/*/*"}}
{{$rule := json .Value}}
- alert: {{$rule.name}}
expr: {{$rule.expression}}
labels:
level: {{$rule.level}}
alarmStrategyId: {{$rule.strategyId}}
ruleId: {{$rule.ruleId}}
dimensionName: {{$rule.dimensionName}}
annotations:
value: {{`"{{ $value }}"`}}
{{end}}
2.confd/conf.d下配置文件
alertmanager.toml,内容:
[template]
src = "alertmanager.tmpl"
dest = "/etc/alertmanager/alertmanager.yml"
keys = [
"/monitor/alarms",
"/monitor/alertmanager"
]
reload_cmd = ‘echo $CONFD_HOST_NODE && curl -XPOST http://{$CONFD_HOST_NODE}/-/reload‘
prometheus.toml,内容:
[template]
src = "prometheus.alert.tmpl"
dest = "/etc/prometheus/rules/alert.yml"
keys = [
"/monitor/alarms"
]
reload_cmd = ‘echo $CONFD_HOST_NODE && curl -XPOST http://{$CONFD_HOST_NODE}/-/reload‘