3 报警的介绍

笔记

1、报警的介绍

 

2、部署AlterManager

# 下载
[root@VM-0-9-centos ~]# wget https://github.com/prometheus/alertmanager/releases/download/v0.23.0/alertmanager-0.23.0.linux-amd64.tar.gz

# 暗转
[root@VM-0-9-centos ~]# tar -xf alertmanager-0.23.0.linux-amd64.tar.gz -C /usr/local/

# 创建软连接
[root@VM-0-9-centos local]# ln -s /usr/local/alertmanager-0.23.0.linux-amd64 /usr/local/alertmanager

# 注册服务
[root@VM-0-9-centos alertmanager]# vim /usr/lib/systemd/system/altermanagerd.service
[Unit]
Description=Prometheus

[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml
Restart=on-failure

[Install]
WantedBy=multi-user.target

# 启动
[root@VM-0-9-centos alertmanager]# systemctl daemon-reload
[root@VM-0-9-centos alertmanager]# systemctl start altermanagerd.service

3、AlterManager的配置

# altermanager的配置使用的是yaml的格式。
vim alertmanager.yml

# 1、全局配置
global:
resolve_timeout: '5s'
smtp_smarthost: 'smtp.163.com:465'
smtp_from: 'chenyangqit@163.com'
smtp_auth_username: 'chen....qit@163.com'
smtp_auth_password: 'XWO........ZECK'
smtp_require_tls: true
 
打开企业微信注册 https://work.weixin.qq.com
微信API官方文档 https://work.weixin.qq.com/api/doc#90002/90151/90854


2、路由
route:
group_by: ['altername']
group_wait: '2s'
group_interval: '10s'
repeat_interval: 4h
receiver: 'email'

3、报警方式
receivers:
- name: 'email'
  email_configs:
    - to: 'chenyangqit@163.com'
      send_resolved: true

4、报警抑制
debug info warning critical error
inhibit_rules:
- source_match:
    severity: 'critical'
  target_match:
    severity: 'warning'
  equal: ['alertname', 'dev', 'instance']

5、报警模板

6、报警规则

配置在prometheus配置文件中

rule_files:
- "/usr/local/prometheus/rules/mysql.yaml"
- "/usr/local/prometheus/rules/node.yaml"

groups:
- name: MySQLMonitor
  rules:
    - alert: MySQLIsDown
      expr: mysql_up == 0
      for: 1m
      labels:
        severity: error
      annotations:
        summary: "Moniter MySQL is Down"
groups:
- name: NodeMonitor
  rules:
    - alert: NodeMemoryMonitor
      expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes) / node_memory_MemTotal_bytes * 100 > 40
      for: 1m
      labels:
        severity: error
      annotations:
        summary: "Moniter Memory is Down"

访问http://192.168.15.114:9093/出现如下界面即成功

3 报警的介绍

prometheus如何连接AlterManager

 
[root@prometheus ~]# vim /usr/local/prometheus/prometheus.yml# Alertmanager configuration

alerting:
alertmanagers:

  - static_configs:
    - targets:
      - 192.168.15.114:9093
[root@prometheus ~]# systemctl restart prometheusd
[root@prometheus ~]# systemctl restart prometheusd

 

 

4、使用AlterManager报警

global:
resolve_timeout: '5s'
smtp_smarthost: 'smtp.163.com:465'
smtp_from: 'chen。。。。qit@163.com'
smtp_auth_username: 'che。。。gqit@163.com'
smtp_auth_password: 'XWO。。。。。ESZECK'
smtp_require_tls: false

route:
group_by: ['altername']
group_wait: '2s'
group_interval: '10s'
repeat_interval: 4h
receiver: 'email'

templates:
 - '/usr/local/alertmanager/template/email.tmpl'

receivers:
 - name: 'email'
  email_configs:
     - to: 'c。。。。gqit@163.com'
      send_resolved: true
      html: '{{ template "email.to.html" . }}'
      headers: { Subject: "Prometheus [Warning] 报警邮件" }

inhibit_rules:
 - source_match:
    severity: 'critical'
  target_match:
    severity: 'warning'
  equal: ['alertname', 'dev', 'instance']





[root@VM-0-9-centos alertmanager]# cat ../prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
 # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
alertmanagers:
   - static_configs:
       - targets:
           - 192.168.0.9:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
 - "/usr/local/prometheus/rules/mysql.yaml"
 - "/usr/local/prometheus/rules/node.yaml"
 # - "first_rules.yml"
 # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
 # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
 - job_name: "prometheus"

   # metrics_path defaults to '/metrics'
   # scheme defaults to 'http'.

  static_configs:
     - targets: ["localhost:9090"]
 - job_name: 'NodeExporter'
  static_configs:
     - targets:
         - 192.168.0.9:9100
         - 192.168.0.3:9100
 - job_name: 'MySQlExporter'
  static_configs:
     - targets:
         - 192.168.0.3:9104
         
         
[root@VM-0-9-centos alertmanager]# cat ../prometheus/rules/mysql.yaml
groups:
 - name: MySQLMonitor
  rules:
     - alert: MySQLIsDown
      expr: mysql_up == 0
       for: 1m
      labels:
        severity: error
      annotations:
        summary: "Moniter MySQL is Down"
[root@VM-0-9-centos alertmanager]# cat ../prometheus/rules/node.yaml
groups:
 - name: NodeMonitor
  rules:
     - alert: NodeMemoryMonitor
      expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes) / node_memory_MemTotal_bytes * 100 > 40
       for: 1m
      labels:
        severity: error
      annotations:
        summary: "Moniter Memory is Down"

 

 

 

 

 

 

 

 

上一篇:Linux 进程


下一篇:监控prometheus+alertmanager+PrometheusAlert