//检查各个端口的放行
//部署各个模块与应用
cd /usr/local/Prometheus_compose
vim docker-compose.yml
version: "3"
services:
prom:
image: quay.io/prometheus/prometheus:latest
container_name: prometheus
volumes:
- ./prometheus:/etc/prometheus
command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus"
ports:
- 9090:9090
depends_on:
- exporter
environment:
- TZ=Asia/Shanghai
exporter:
image: prom/node-exporter:latest
container_name: node-exporter
hostname: cicd
ports:
- "9100:9100"
environment:
- TZ=Asia/Shanghai
grafana:
image: grafana/grafana
container_name: grafana
ports:
- "3000:3000"
environment:
- “GF_SECURITY_ADMIN_PASSWORD=123123”
- “GF_INSTALL_PLUGINS=alexanderzobnin-zabbix-app”
- TZ=Asia/Shanghai
restart: "always"
volumes:
- ./grafana:/etc/grafana/”
- ./grafana/conf/grafana.ini:/etc/grafana/grafana.ini
- ./grafana/data:/var/lib/grafana:rw
- ./grafana/plugins:/var/lib/grafana/plugins:rw
- /etc/localtime:/etc/localtime
depends_on:
- prom
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
hostname: alertmanager
restart: always
ports:
- '9093:9093'
volumes:
- './alertmanager/config:/etc/alertmanager'
- './alertmanager/data:/alertmanager/data'
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
environment:
- TZ=Asia/Shanghai
cadvisor:
image: google/cadvisor
container_name: cadvisors
restart: always
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
ports:
- 8080:8080
privileged: true
environment:
- TZ=Asia/Shanghai
grafana-reporter:
image: izakmarais/grafana-reporter
container_name: grafana_reporter
ports:
- 8686:8686
command: "-ip grafana.mitaiot.com"
environment:
- TZ=Asia/Shanghai
//编辑报警模块的配置文件
cd /usr/local/Prometheus_compose/alertmanager/config
cat alertmanager.yml
global:
resolve_timeout: 5m
smtp_from: '123456789@sina.com'
smtp_smarthost: 'smtp.sina.com:587'
smtp_auth_username: '123456789@sina.com'
smtp_auth_password: 'aabbccdd'
smtp_require_tls: false
smtp_hello: 'sina.com'
route:
group_by: ['alertname']
group_wait: 5s
group_interval: 5s
repeat_interval: 5m
receiver: 'email'
receivers:
- name: 'email'
email_configs:
- to: '{{ template "email.to" . }}'
html: '{{ template "email.to.html" . }}'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
templates:
- "/etc/alertmanager/alertmanager-tmpl/email.tmpl"
//编辑发送的邮件模板
cd /usr/local/Prometheus_compose/alertmanager/config/alertmanager-tmpl
cat email.tmpl
{{ define "email.from" }}123456789@sina.com{{ end }}
{{ define "email.to" }}123456789@sina.com{{ end }}
{{ define "email.to.html" }}
{{ range .Alerts }}
=========start==========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} 级 <br>
告警类型: {{ .Labels.alertname }} <br>
故障主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }} <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2019-08-04 16:58:15" }} <br>
=========end==========<br>
{{ end }}
{{ end }}
cd /usr/local/Prometheus_compose/grafana/conf
vim grafana.ini # 配置文件太长,标出修改部分
[auth.anonymous]
enabled = true
org_name = Main Org.
org_role = Viewer
[smtp]
enabled = true
host = smtp.sina.com:587
user = 123456789@sina.com
password =dc28ac6ec64af9c1
skip_verify = true
from_address = 123456789@sina.com
from_name = Grafana
ehlo_identity =
default_timezone = Asia/Shanghai #添加时区
//修改 普罗米修斯 的配置文件
cd /usr/local/Prometheus_compose/prometheus
global:
scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 5s # Evaluate rules every 15 seconds. The default is every 1 minute.
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['172.16.225.154:9090']
- job_name: 'node'
file_sd_configs:
- files: ['/etc/prometheus/groups/nodegroups/*.json']
static_configs:
- targets:
- '172.16.225.154:9100'
- '172.16.225.156:9100'
- '172.16.225.155:9100'
- '172.16.225.157:9100'
- '172.16.225.156:8085'
- '172.16.225.154:8080'
- '172.16.225.155:8085'
- '172.16.225.157:8085'
- '172.16.225.157:9104'
alerting:
alertmanagers:
- static_configs:
- targets:
- '172.16.225.154:9093'
rule_files:
- "/etc/prometheus/rules/*.yml"
//配置报警规则
cd /usr/local/Prometheus_compose/prometheus/rules
groups:
- name: node-up
rules:
- alert: node-up
expr: up{job="node"} == 0
for: 15s
labels:
severity: 1
team: node
annotations:
summary: "{{ $labels.instance }} 已停止运行!"
description: "{{ $labels.instance }} 检测到异常停止!请重点关注!!!"
- name: node-cpu
rules:
- alert: node-cpu
expr: 100 - ((avg by (instance,job,env)(irate(node_cpu_seconds_total{mode="idle"}[30s]))) *100) > 90
for: 1m
labels:
severity: 1
team: node
level: warning
annotations:
summary: "{{ $labels.instance }} CPU使用率超过 百分之90!"
description: "{{ $labels.instance }} 检测CPU连续1分钟占用率超出90%!请重点关注!!!"
- name: node-mem
rules:
- alert: node-mem
expr: ((node_memory_MemTotal_bytes -(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) )/node_memory_MemTotal_bytes ) * 100 > 90 #设置内存使用率高于90时发送告警,计算方式为 总内存-空闲内存 - buffers - cached
for: 5s
labels:
severity: 1
team: node
level: warning
annotations:
summary: "{{ $labels.instance }} MEM使用率超过 百分之90!"
description: "{{ $labels.instance }} 检测CPU连续1分钟占用率超出90%!请重点关注!!!"
- name: node-disk_used
rules:
- alert: node-disk_used
expr: 100 - (node_filesystem_free_bytes{fstype=~"ext3|ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext3|ext4|xfs"} * 100) > 90 #设置挂载分区使用率为95以上时告警
for: 1m
labels:
severity: 1
team: node
level: warning
annotations:
summary: "{{ $labels.instance }} 挂在分区使用率超过 百分之90!"
description: "{{ $labels.instance }} 挂在分区使用率超出90%!请重点关注!!!"
#如需监控 MySQL和容器和主机信息 需要在 主机上部署 prom/node-exporter 、cadvisor、prom/mysqld-exporter。
version: "3"
services:
exporter:
image: prom/node-exporter:latest
container_name: node-exporter
hostname: db01
ports:
- "9100:9100"
cadvisor:
image: google/cadvisor
container_name: cadvisor
restart: always
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
ports:
- 8085:8080
privileged: true
mysqld-exporter:
image: prom/mysqld-exporter
ports:
- 9104:9104
restart: always
container_name: mysql_exporter
hostname: db01
environment:
- DATA_SOURCE_NAME=root:0GXwwchW4rP@(172.16.225.157:3306)/
- TZ=Asia/Shanghai
导入模板,模板 ID 分别是:8919、7362
更多模块链接访问:点击获取监控模块,只需要导入对应ID即可