深度解锁SpringCloud主流组件 一战解决微服务诸多难题

download:深度解锁SpringCloud主流组件 一战解决微服务诸多难题

本课程是一门基于SpringCloud整合"猫眼"商城后台项目进行的微服务课程,从零起步到深度讲解包括Gateway,Hystrix,Ribbon,Feign等几大主流组件及其高级特性。同时还会引入微服务安全,自动化测试和Docker部署等工作中会遇到的微服务难题,并讲解产生原因和解决方案,从根本上帮助大家解决工作和面试的难题!
适合人群
想要了解微服务的同学;
对SpringCloud诸多开发高级特性感兴趣的同学;
工作、面试遇到奇葩问题无从着手的同学们
技术储备要求
熟悉Spring、SpringMVC框架基本应用
熟悉MySQL常用命令
了解基础Linux命令

ansible部署prometheus+node-exporter
简单部署prometheus监控系统

yum装置ansible
yum install ansible
ansible的hosts文件

[alertmanagers]
10.9.119.1
[prometheus]
10.9.119.1
[node-exporter]
10.9.119.1
10.9.119.2
10.9.119.3
文件层次格式如下:
深度解锁SpringCloud主流组件 一战解决微服务诸多难题

prometheus
prometheus.yml

  • hosts: prometheus
    remote_user: root
    tasks:
    • name: create dir
      file:
      path: /opt/prometheus
      state: directory # 没有目录则创立
    • name: copy file
      unarchive:
      old-src: prometheus-2.24.0.linux-amd64.tar.gz
      dest: /opt/prometheus
    • name: create link
      file:
      old-src: /opt/prometheus/prometheus-2.24.0.linux-amd64
      dest: /opt/prometheus/prometheus
      state: link # 软链接
    • name: copy service file
      template:
      old-src: prometheus.service.j2
      dest: /usr/lib/systemd/system/prometheus.service
    • name: copy config yaml
      template:
      old-src: prometheus.yml.j2
      dest: /opt/prometheus/prometheus/prometheus.yml
      notify:
      • restart prometheus
    • name: create rules dir
      file:
      path: /opt/prometheus/prometheus/rules
      state: directory
    • name: copy rules yaml # node里面有特殊符号所以运用copy
      copy:
      old-src: node.yml
      dest: /opt/prometheus/prometheus/rules/node.yml
      notify: # 此动作将触发handlers
      • restart prometheus
    • name: start prometheus
      service:
      name: prometheus
      state: started
      enabled: yes
      handlers:
    • name: restart prometheus
      service:
      name: prometheus
      state: restarted
      prometheus.service.j2 能够运用copy模块,这里运用了template

[Unit]
Description=Prometheus
Documentation=
After=network.target
[Service]
WorkingDirectory=/opt/prometheus/prometheus
ExecStart=/opt/prometheus/prometheus/prometheus
ExecReload=/bin/kill -HUP $MAINPID
ExecStop=/bin/kill -KILL $MAINPID
Type=simple
KillMode=control-group
Restart=on-failure
RestartSec=3s
[Install]
WantedBy=multi-user.target
prometheus.yml.j2

全局配置

global:
scrape_interval: 30s #抓取距离时间
evaluation_interval: 30s #规则引擎执行距离时间
query_log_file: ./promql.log

告警配置

alerting:
alertmanagers: # Alertmanagers配置

  • static_configs: # Alertmanager静态配置
    • targets: # alertmanager发送目的配置
      {% for alertmanager in groups[‘alertmanagers’] %}
      • {{ alertmanager }}:9093
        {% endfor %}
        rule_files: # 规则文件配置
  • “rules/*.yml”
    scrape_configs: # 抓取配置
  • job_name: ‘prometheus’ #任务 采集目的分类
    static_configs: # 抓取目的静态配置
    • targets:
      {% for prometheu in groups[‘prometheus’] %}
      • “{{ prometheu }}:9090” #抓取目的
        {% endfor %}
  • job_name: “node”
    static_configs:
    • targets:
      {% for node in groups[‘node-exporter’] %}
      • “{{ node }}:9100”
        {% endfor %}
        node-rules规则文件node.yml

groups:

  • name: node.rules # 报警规则组称号
    rules:

    • alert: node is Down
      expr: up == 0
      for: 30s #持续时间,表示持续30秒获取不到信息,则触发报警
      labels:
      severity: serious # 自定义标签 严重的
      annotations:
      summary: “Instance {{ $labels.instance }} down” # 自定义摘要
      description: “{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.” # 自定义详细描绘
    • alert: node Filesystem
      expr: 100 - (node_filesystem_free_bytes{fstype=~“ext4|xfs”} / node_filesystem_size_bytes{fstype=~“ext4|xfs”} * 100) > 80
      for: 2m
      labels:
      severity: warning
      annotations:
      summary: “{{KaTeX parse error: Expected 'EOF', got '}' at position 16: labels.instance}̲}: {{labels.mountpoint }} 分区运用过高”
      description: “{{KaTeX parse error: Expected 'EOF', got '}' at position 16: labels.instance}̲}: {{labels.mountpoint }} 分区运用大于 80% (当前值: {{ $value }})”
    • alert: node Memory
      expr: 100 - (node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 > 80
      for: 2m
      labels:
      severity: warning
      annotations:
      summary: “{{KaTeX parse error: Expected 'EOF', got '}' at position 16: labels.instance}̲}: 内存运用过高" de…labels.instance}}: 内存运用大于 80% (当前值: {{ $value }})”
    • alert: node CPU
      expr: 100 - (avg(irate(node_cpu_seconds_total{mode=“idle”}[5m])) by (instance) * 100) > 80
      for: 2m
      labels:
      severity: warning
      annotations:
      summary: “{{KaTeX parse error: Expected 'EOF', got '}' at position 16: labels.instance}̲}: CPU运用过高" d…labels.instance}}: CPU运用大于 80% (当前值: {{ $value }})”
      node-exporter
      node-exporter.yml
  • hosts: node-exporter
    remote_user: root
    tasks:

    • name: create dir
      file:
      path: /opt/prometheus
      state: directory
    • name: copy file
      unarchive:
      old-src: node_exporter-1.0.1.linux-amd64.tar.gz
      dest: /opt/prometheus
    • name: create link
      file:
      old-src: /opt/prometheus/node_exporter-1.0.1.linux-amd64
      dest: /opt/prometheus/node_exporter
      state: link
    • name: copy service file
      template:
      old-src: node_exporter.service.j2
      dest: /usr/lib/systemd/system/node_exporter.service
    • name: start node_exporter
      service:
      name: node_exporter
      state: restarted
      enabled: yes
      node_exporter.service.j2

[Unit]
Description=Node Exporter
Documentation=
After=network.target
[Service]
WorkingDirectory=/opt/prometheus/node_exporter/
ExecStart=/opt/prometheus/node_exporter/node_exporter
ExecStop=/bin/kill -KILL $MAINPID
Type=simple
KillMode=control-group
Restart=on-failure
RestartSec=3s
[Install]
WantedBy=multi-user.target
alertmanager
alertmanager.yaml

  • hosts: alertmanagers
    remote_user: root
    tasks:
    • name: create dir
      file:
      path: /opt/prometheus
      state: directory
    • name: copy file
      unarchive:
      old-src: alertmanager-0.21.0.linux-amd64.tar.gz
      dest: /opt/prometheus
    • name: create link
      file:
      old-src: /opt/prometheus/alertmanager-0.21.0.linux-amd64
      dest: /opt/prometheus/alertmanager
      state: link
    • name: copy service file
      template:
      old-src: alertmanager.service.j2
      dest: /usr/lib/systemd/system/alertmanager.service
    • name: copy config yaml
      template:
      old-src: alertmanager.yml.j2
      dest: /opt/prometheus/alertmanager/alertmanager.yml
      notify:
      • restart alertmanager
    • name: start server
      service:
      name: alertmanager
      state: restarted
      enabled: yes
      handlers:
    • name: restart alertmanager
      service:
      name: alertmanager
      state: restarted
      alertmanager.service.j2

[Unit]
Description=AlertManager
Documentation=
After=network.target
[Service]
WorkingDirectory=/opt/prometheus/alertmanager/
ExecStart=/opt/prometheus/alertmanager/alertmanager
ExecReload=/bin/kill -HUP $MAINPID
ExecStop=/bin/kill -KILL $MAINPID
Type=simple
KillMode=control-group
Restart=on-failure
RestartSec=3s
[Install]
WantedBy=multi-user.target
alertmanager.yml.j2 这里运用了邮箱告警

global:
resolve_timeout: 5m # 当告警的状态有firing变为resolve的以后还要呆多长时间,才宣公告警解除。
smtp_from: “123456789@qq.com”
smtp_smarthost: ‘smtp.qq.com:465’
smtp_auth_username: “123456789@qq.com” # 邮箱账号
smtp_auth_password: “bcvizcgqbgojjjeb” # 口令密码,非QQ密码
smtp_require_tls: false # 运用465端口,这里选false
route:
group_by: [‘alertname’] # 采用哪个标签作为分组的根据
group_wait: 10s # 分组等候的时间10s
group_interval: 10s # 上下两组发送告警的距离时间10s
repeat_interval: 24h # 反复发送告警时间。默许1h 不会反复发送相同告警 静默
receiver: ‘default-receiver’ # 默许接纳人

一切不匹配以下子路由的告警都将保存在根节点,并发送到’default-receiver’

routes: # 分组

  • receiver: ‘db’
    group_wait: 10s
    match_re:

    运用正则匹配告警包含两个效劳,发送到db

    service: mysql|redis #一切service=mysql或者service=redis的告警分配到db接纳端
  • receiver: ‘web’
    group_by: [product, environment] # 采用product和environment标签作为分组的根据
    match:
    team: frontend # 一切告警标签带有frontend发送到web
    receivers:
  • name: ‘default-receiver’
    email_configs:
    • to: ‘123456789@qq.com’ # 告警收件人
  • name: ‘db’
    # 经过邮箱发送报警
    email_configs:
    - to: ‘111111111@qq.com’
  • name: ‘web’
    email_configs:
    - to: ‘222222222@qq.com’
    inhibit_rules: # 抑止,但两个都报警了,级别严重的会抑止级别正告的,只发作严重级别的告警
    • source_match:
      severity: ‘critical’ # critaical的报警会抑止warning级别的报警信息
      target_match:
      severity: ‘warning’
      equal: [‘alertname’, ‘dev’, ‘instance’]
上一篇:[kubernetes]-k8s安装alertmanager和prometheus-webhook-dingtalk


下一篇:promethues邮件告警