1、安装mysql_exporter
[root@mysqld ~]# wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.12.1/mysqld_exporter-0.12.1.linux-amd64.tar.gz
[root@mysqld ~]# tar xf mysqld_exporter-0.12.1.linux-amd64.tar.gz -C /usr/local/
[root@mysqld ~]# ln -sv /usr/local/mysqld_exporter-0.12.1.linux-amd64/ /usr/local/prometheus_mysqld
2、在MySQL中创建用户并授权
mysql> CREATE USER 'mysqld_exporter'@'%' IDENTIFIED BY 'mysqld_exporter';
mysql> GRANT REPLICATION CLIENT,PROCESS ON *.* TO 'mysqld_exporter'@'%' identified by 'mysqld_exporter';
mysql> GRANT SELECT ON *.* TO 'mysqld_exporter'@'%';
3、写一个mysqld_exporter的配置文件
[root@mysqld ~]# cat /usr/local/prometheus_mysqld/.my.cnf
[client]
user=mysqld_exporter
password=mysqld_exporter
port=3306
4、配置systemd启动mysqld_exporter
[root@mysqld ~]# cat /lib/systemd/system/mysqld_exporter.service
[Unit]
Description=Mysqld_exporter
After=network.target
[Service]
ExecStart=/usr/local/prometheus_mysqld/mysqld_exporter --config.my-cnf=/usr/local/prometheus_mysqld/.my.cnf
[Install]
WantedBy=multi-user.target
systemctl daemon-reload
systemctl start mysqld_exporter
systemctl enable mysqld_exporter
ss -lnpt|grep 9104
LISTEN 0 128 [::]:9104 [::]:* users:(("mysqld_exporter",pid=1853,fd=3))
5、确认监控指标正常
[root@mysqld ~]# curl http://localhost:9104/metrics|grep mysql_up
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 130k 0 130k 0 0 6920k 0 --:--:-- --:--:-- --:--:-- 7269k
# HELP mysql_up Whether the MySQL server is up.
# TYPE mysql_up gauge
mysql_up 1
6、在Prometheus的server端添加job任务
[root@prometheus prometheus]# vim /usr/local/prometheus/prometheus.yml
- job_name: 'mysql_db'
static_configs:
- targets: ['10.0.0.52:9104']
labels:
group: mysql_db
[root@prometheus prometheus]# systemctl restart prometheus
在浏览器中访问 http://10.0.0.51:9090/targets
7、在grafana中导入MySQL监控图表
点击import,在弹出界面中输入7362,数据源选择Prometheus
在grafana中看到采集到的MySQL数据库的相关参数
8、配置mysql_exporter告警规则
[root@prometheus rules]# pwd
/usr/local/prometheus/rules
[root@prometheus rules]# cat mysql_rules.yml
groups:
- name: mysql.rules
rules:
- alert: MysqlDown
expr: up == 0
for: 0m
labels:
severity: critical
annotations:
title: 'MySQL down'
description: "Mysql实例: 【{{ $labels.instance }}】, MySQL instance is down"
- alert: MysqlRestarted
expr: mysql_global_status_uptime < 60
for: 0m
labels:
severity: info
annotations:
title: 'MySQL Restarted'
description: "Mysql实例: 【{{ $labels.instance }}】, MySQL has just been restarted, less than one minute ago"
- alert: MysqlTooManyConnections(>80%)
expr: avg by (instance) (rate(mysql_global_status_threads_connected[1m])) / avg by (instance) (mysql_global_variables_max_connections) * 100 > 80
for: 2m
labels:
severity: warning
annotations:
title: 'MySQL too many connections (> 80%)'
description: "Mysql实例: 【{{ $labels.instance }}】, More than 80% of MySQL connections are in use, Current Value: {{ $value }}%"
- alert: MysqlThreadsRunningHigh
expr: mysql_global_status_threads_running > 40
for: 2m
labels:
severity: warning
annotations:
title: 'MySQL Threads_Running High'
description: "Mysql实例: 【{{ $labels.instance }}】, Threads_Running above the threshold(40), Current Value: {{ $value }}"
- alert: MysqlQpsHigh
expr: sum by (instance) (rate(mysql_global_status_queries[2m])) > 500
for: 2m
labels:
severity: warning
annotations:
title: 'MySQL QPS High'
description: "Mysql实例: 【{{ $labels.instance }}】, MySQL QPS above 500"
- alert: MysqlSlowQueries
expr: increase(mysql_global_status_slow_queries[1m]) > 0
for: 2m
labels:
severity: warning
annotations:
title: 'MySQL slow queries'
description: "Mysql实例: 【{{ $labels.instance }}】, has some new slow query."
- alert: MysqlTooManyAbortedConnections
expr: round(increase(mysql_global_status_aborted_connects[5m])) > 20
for: 2m
labels:
severity: warning
annotations:
title: 'MySQL too many Aborted connections in 2 minutes'
description: "Mysql实例: 【{{ $labels.instance }}】, {{ $value }} Aborted connections within 2 minutes"
- alert: MysqlTooManyAbortedClients
expr: round(increase(mysql_global_status_aborted_clients[120m])) > 10
for: 2m
labels:
severity: warning
annotations:
title: 'MySQL too many Aborted connections in 2 hours'
description: "Mysql实例: 【{{ $labels.instance }}】, {{ $value }} Aborted Clients within 2 hours"
- alert: MysqlSlaveIoThreadNotRunning
expr: mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_io_running == 0
for: 0m
labels:
severity: critical
annotations:
title: 'MySQL Slave IO thread not running'
description: "Mysql实例: 【{{ $labels.instance }}】, MySQL Slave IO thread not running"
- alert: MysqlSlaveSqlThreadNotRunning
expr: mysql_slave_status_master_server_id > 0 and ON (instance) mysql_slave_status_slave_sql_running == 0
for: 0m
labels:
severity: critical
annotations:
title: 'MySQL Slave SQL thread not running'
description: "Mysql实例: 【{{ $labels.instance }}】, MySQL Slave SQL thread not running"
- alert: MysqlSlaveReplicationLag
expr: mysql_slave_status_master_server_id > 0 and ON (instance) (mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay) > 30
for: 1m
labels:
severity: critical
annotations:
title: 'MySQL Slave replication lag'
description: "Mysql实例: 【{{ $labels.instance }}】, MySQL replication lag"
- alert: MysqlInnodbLogWaits
expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10
for: 0m
labels:
severity: warning
annotations:
title: 'MySQL InnoDB log waits'
description: "Mysql实例: 【{{ $labels.instance }}】, innodb log writes stalling"
9、将告警规则集成到Prometheus
[root@prometheus prometheus]# vim /usr/local/prometheus/prometheus.yml
rule_files:
- "/usr/local/prometheus/rules/*.yml"
[root@prometheus prometheus]# systemctl restart prometheus