笔记
1、报警的介绍
2、部署AlterManager
# 下载
[root@VM-0-9-centos ~]# wget https://github.com/prometheus/alertmanager/releases/download/v0.23.0/alertmanager-0.23.0.linux-amd64.tar.gz
# 暗转
[root@VM-0-9-centos ~]# tar -xf alertmanager-0.23.0.linux-amd64.tar.gz -C /usr/local/
# 创建软连接
[root@VM-0-9-centos local]# ln -s /usr/local/alertmanager-0.23.0.linux-amd64 /usr/local/alertmanager
# 注册服务
[root@VM-0-9-centos alertmanager]# vim /usr/lib/systemd/system/altermanagerd.service
[Unit]
Description=Prometheus
[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml
Restart=on-failure
[Install]
WantedBy=multi-user.target
# 启动
[root@VM-0-9-centos alertmanager]# systemctl daemon-reload
[root@VM-0-9-centos alertmanager]# systemctl start altermanagerd.service
3、AlterManager的配置
# altermanager的配置使用的是yaml的格式。
vim alertmanager.yml
# 1、全局配置
global:
resolve_timeout: '5s'
smtp_smarthost: 'smtp.163.com:465'
smtp_from: 'chenyangqit@163.com'
smtp_auth_username: 'chen....qit@163.com'
smtp_auth_password: 'XWO........ZECK'
smtp_require_tls: true
打开企业微信注册 https://work.weixin.qq.com
微信API官方文档 https://work.weixin.qq.com/api/doc#90002/90151/90854
2、路由
route:
group_by: ['altername']
group_wait: '2s'
group_interval: '10s'
repeat_interval: 4h
receiver: 'email'
3、报警方式
receivers:
- name: 'email'
email_configs:
- to: 'chenyangqit@163.com'
send_resolved: true
4、报警抑制
debug info warning critical error
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
5、报警模板
6、报警规则
配置在prometheus配置文件中
rule_files:
- "/usr/local/prometheus/rules/mysql.yaml"
- "/usr/local/prometheus/rules/node.yaml"
groups:
- name: MySQLMonitor
rules:
- alert: MySQLIsDown
expr: mysql_up == 0
for: 1m
labels:
severity: error
annotations:
summary: "Moniter MySQL is Down"
groups:
- name: NodeMonitor
rules:
- alert: NodeMemoryMonitor
expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes) / node_memory_MemTotal_bytes * 100 > 40
for: 1m
labels:
severity: error
annotations:
summary: "Moniter Memory is Down"
访问http://192.168.15.114:9093/出现如下界面即成功
prometheus如何连接AlterManager
[root@prometheus ~]# vim /usr/local/prometheus/prometheus.yml# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 192.168.15.114:9093
[root@prometheus ~]# systemctl restart prometheusd
[root@prometheus ~]# systemctl restart prometheusd
4、使用AlterManager报警
global:
resolve_timeout: '5s'
smtp_smarthost: 'smtp.163.com:465'
smtp_from: 'chen。。。。qit@163.com'
smtp_auth_username: 'che。。。gqit@163.com'
smtp_auth_password: 'XWO。。。。。ESZECK'
smtp_require_tls: false
route:
group_by: ['altername']
group_wait: '2s'
group_interval: '10s'
repeat_interval: 4h
receiver: 'email'
templates:
- '/usr/local/alertmanager/template/email.tmpl'
receivers:
- name: 'email'
email_configs:
- to: 'c。。。。gqit@163.com'
send_resolved: true
html: '{{ template "email.to.html" . }}'
headers: { Subject: "Prometheus [Warning] 报警邮件" }
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
[root@VM-0-9-centos alertmanager]# cat ../prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 192.168.0.9:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "/usr/local/prometheus/rules/mysql.yaml"
- "/usr/local/prometheus/rules/node.yaml"
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: 'NodeExporter'
static_configs:
- targets:
- 192.168.0.9:9100
- 192.168.0.3:9100
- job_name: 'MySQlExporter'
static_configs:
- targets:
- 192.168.0.3:9104
[root@VM-0-9-centos alertmanager]# cat ../prometheus/rules/mysql.yaml
groups:
- name: MySQLMonitor
rules:
- alert: MySQLIsDown
expr: mysql_up == 0
for: 1m
labels:
severity: error
annotations:
summary: "Moniter MySQL is Down"
[root@VM-0-9-centos alertmanager]# cat ../prometheus/rules/node.yaml
groups:
- name: NodeMonitor
rules:
- alert: NodeMemoryMonitor
expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes) / node_memory_MemTotal_bytes * 100 > 40
for: 1m
labels:
severity: error
annotations:
summary: "Moniter Memory is Down"