1. 部署mysqld-exporter
- 前提:创建好用户
CREATE USER '用户'@'%' IDENTIFIED BY '密码';
GRANT SELECT ON *.* TO '用户'@'%';
部署
docker run -d --name mysqld-exporter --restart=always -p 9104:9104 -e DATA_SOURCE_NAME="mysql用户:密码@(地址:3306)/" --restart=unless-stopped prom/mysqld-exporter:v0.12.1
2. 配置Endpoints和Service将外部服务暴露到k8s集群内部
vim mysql-exporter.yaml
apiVersion: v1
kind: Endpoints
metadata:
name: mysqld-exporter
namespace: monitoring
labels:
app: mysqld-exporter
app.kubernetes.io/name: mysqld-exporter
subsets:
- addresses:
# 这里是外部资源列表
- ip: 10.200.1.4 # mysql所在的机器
ports:
- name: metrics
port: 9104
---
apiVersion: v1
kind: Service
metadata:
name: mysqld-exporter
namespace: monitoring
labels:
app: mysqld-exporter
app.kubernetes.io/name: mysqld-exporter
spec:
clusterIP: None
ports:
- name: metrics
port: 9104
protocol: TCP
targetPort: 9104
3. 配置ServiceMonitor和prometheusrules
- 规则参考:https://github.com/samber/awesome-prometheus-alerts/blob/master/dist/rules/mysql/mysqld-exporter.yml
vim mysql-ServiceMonitor-prometheusrules.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: mysqld-exporter
namespace: monitoring
labels:
app: mysqld-exporter
release: prometheus
spec:
selector:
matchLabels:
app: mysqld-exporter
namespaceSelector:
matchNames:
- monitoring
endpoints:
- port: metrics
interval: 1m
path: /metrics
scheme: http
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: mysqld-rules
namespace: monitoring
spec:
groups:
- name: MysqldExporter
rules:
- alert: MysqlDown
expr: 'mysql_up == 0'
for: 0m
labels:
severity: critical
annotations:
summary: MySQL down (instance {{ $labels.instance }})
description: "MySQL 实例在 {{ $labels.instance }} 上宕机\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MysqlTooManyConnections(>80%)
expr: 'max_over_time(mysql_global_status_threads_connected[1m]) / mysql_global_variables_max_connections * 100 > 80'
for: 2m
labels:
severity: warning
annotations:
summary: MySQL too many connections (> 80%) (instance {{ $labels.instance }})
description: "MySQL 连接数超过最大连接数的 80% 在 {{ $labels.instance }} 上\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MysqlHighThreadsRunning
expr: 'max_over_time(mysql_global_status_threads_running[1m]) / mysql_global_variables_max_connections * 100 > 60'
for: 2m
labels:
severity: warning
annotations:
summary: MySQL high threads running (instance {{ $labels.instance }})
description: "超过 60% 的 MySQL 连接正在运行状态 在 {{ $labels.instance }} 上\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MysqlSlowQueries
expr: 'increase(mysql_global_status_slow_queries[1m]) > 0'
for: 2m
labels:
severity: warning
annotations:
summary: MySQL slow queries (instance {{ $labels.instance }})
description: "MySQL 服务器 {{ $labels.instance }} 有新的慢查询\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MysqlInnodbLogWaits
expr: 'rate(mysql_global_status_innodb_log_waits[15m]) > 10'
for: 0m
labels:
severity: warning
annotations:
summary: MySQL InnoDB log waits (instance {{ $labels.instance }})
description: "MySQL InnoDB 日志写入卡住\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MysqlRestarted
expr: 'mysql_global_status_uptime < 60'
for: 0m
labels:
severity: info
annotations:
summary: MySQL restarted (instance {{ $labels.instance }})
description: "MySQL 刚刚重启,时间少于一分钟 在 {{ $labels.instance }} 上\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MysqlMemoryUsageHigh
expr: 'mysql_global_status_innodb_buffer_pool_bytes_data / mysql_global_variables_innodb_buffer_pool_size * 100 > 80'
for: 2m
labels:
severity: warning
annotations:
summary: MySQL memory usage high (> 80%) (instance {{ $labels.instance }})
description: "MySQL 内存使用超过 80% 在 {{ $labels.instance }} 上\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MysqlCPUUsageHigh
expr: 'rate(process_cpu_seconds_total{job="mysql"}[1m]) * 100 > 80'
for: 2m
labels:
severity: critical
annotations:
summary: MySQL CPU usage high (> 80%) (instance {{ $labels.instance }})
description: "MySQL CPU 使用超过 80% 在 {{ $labels.instance }} 上\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: MysqlQPSHigh
expr: 'rate(mysql_global_status_queries[1m]) > 1000'
for: 2m
labels:
severity: warning
annotations:
summary: MySQL QPS high (> 1000) (instance {{ $labels.instance }})
description: "MySQL 每秒查询次数 (QPS) 超过 1000 在 {{ $labels.instance }} 上\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"