Prometheus入门

部署

官网地址: https://prometheus.io/

rsyslog配置

[root@ops ~]# cat > /etc/rsyslog.d/syslog_server.conf << EOF
if $programname == alertmanager               then /opt/logs/alertmanager.log
if $programname == prometheus                 then /opt/logs/prometheus.log
if $programname == node_exporter              then /opt/logs/node_exporter.log
if $programname == process_exporter            then /opt/logs/process_exporter.log
if $programname == mysql_exporter            then /opt/logs/mysql_exporter.log
if $programname == redis_exporter            then /opt/logs/redis_exporter.log
if $programname == blackbox_exporter          then /opt/logs/blackbox_exporter.log
if $programname == mysqld_exporter          then /opt/logs/mysqld_exporter.log
if $programname == process-exporter          then /opt/logs/process-exporter.log
if $programname == pushgateway                then /opt/logs/pushgateway.log
if $programname == m3coordinator              then /opt/logs/m3coordinator.log
if $programname == m3dbnode                   then /opt/logs/m3dbnode.log
EOF

logrotate

[root@ops ~]# cat > /etc/logrotate.d/logrotate.conf << EOF
/opt/logs/*.log
{
    daily
    missingok
    notifempty
    dateext
    compress
    delaycompress
    copytruncate
    rotate 15
}
EOF

重启rsyslog

[root@ops ~]# systemctl restart rsyslog.service 
[root@ops ~]# systemctl status rsyslog.service 
[root@ops ~]# mkdir /opt/logs

 Prometheus部署

下载软件

wget https://github.com/prometheus/prometheus/releases/download/v2.29.1/prometheus-2.29.1.linux-amd64.tar.gz
tar xf prometheus-2.29.1.linux-amd64.tar.gz -C /opt/
cd /opt/
ln -s prometheus-2.29.1.linux-amd64 prometheus

准备启动文件

cat > /usr/lib/systemd/system/prometheus.service << EOF
[Unit]
Description="prometheus"
Documentation=https://prometheus.io/
After=network.target

[Service]
Type=simple
ExecStart=/opt/prometheus/prometheus  --config.file=/opt/prometheus/prometheus.yml --storage.tsdb.path=/opt/prometheus/data --web.enable-lifecycle

Restart=on-failure
RestartSecs=5s
SuccessExitStatus=0
LimitNOFILE=65536
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=prometheus


[Install]
WantedBy=multi-user.target
EOF

启动服务

systemctl start prometheus.service 
systemctl status prometheus.service 

浏览器访问

http://192.168.168.106:9090/

 热重启

# 命令行开启  --web.enable-lifecycle
curl -X POST http://localhost:9090/-/reload 

主配置文件

[root@ops prometheus]# cat prometheus.yml 
# 全局配置段
global:
  scrape_interval: 15s     # 采集间隔时间
  evaluation_interval: 15s # 计算报警和预聚合间隔
  scrape_timeout: 10s      # 采集超时时间
  query_log_file: /opt/logs/prometheus_query_log  # 查询日志,包含各阶段耗时统计
  external_labels:          # 全局标签组
    account: huawei-main  # 通过本实例采集的数据都会叠加下面的标签
        
# Alertmanager 信息段
alerting:
  alertmanagers:
    - static_configs:
        - targets:
          # - alertmanager:9093

# 告警、预聚合配置文件段
rule_files:
  # - "first_rules.yml"
   #- "second_rules.yml"


# 采集配置段
scrape_configs:
  - job_name: "prometheus"
    honor_timestamps: true
    scrape_interval: 15s
    scrape_timeout: 10s
    metrics_path: /metrics
    scheme: http
    file_sd_configs:
    - files:
      - targets/prometheus-*.yml
      refresh_interval: 1m

  - job_name: "nodes"
    honor_timestamps: true
    scrape_interval: 15s
    scrape_timeout: 10s
    metrics_path: /metrics
    scheme: http
    file_sd_configs:
    - files:
      - targets/nodes-*.yml
      refresh_interval: 1m
          
  - job_name: "mysqld"
    honor_timestamps: true
    scrape_interval: 15s
    scrape_timeout: 10s
    metrics_path: /metrics
    scheme: http
    file_sd_configs:
    - files:
      - targets/mysql-*.yml
      refresh_interval: 1m
  
  - job_name: "process"
    honor_timestamps: true
    scrape_interval: 15s
    scrape_timeout: 10s
    metrics_path: /metrics
    scheme: http
    file_sd_configs:
    - files:
      - targets/process-*.yml
      refresh_interval: 1m
# 远程查询段
remote_read:
  # prometheus 
  #- url: http://prometheus/v1/read
  #  read_recent: true

  # m3db 
  #- url: "http://m3coordinator-read:7201/api/v1/prom/remote/read"
  #  read_recent: true

# 远程写入段
remote_write:
  #- url: "http://m3coordinator-write:7201/api/v1/prom/remote/write"
  #  queue_config:
  #    capacity: 10000
  #    max_samples_per_send: 60000
  #  write_relabel_configs:
  #    - source_labels: [__name__]
  #     separator: ;
  #      # 标签key前缀匹配到的drop
  #      regex: (kubelet_|apiserver_|container_fs_).*
  #      replacement: $1
  #      action: drop  

 

Prometheus入门

上一篇:torch.optim.lr_scheduler


下一篇:pytorch入门学习第七课Seq2Seq, Attention