一.基于Operator和二进制安装prometheus环境
- 基于Operator安装prometheus环境
1.下载项目文件,创建基础环境
[root@deploy-1 ~]# wget https://github.com/prometheus-operator/kube-prometheus/archive/refs/tags/v0.12.0.tar.gz
[root@deploy-1 ~]# tar -xf kube-prometheus-0.12.0.tar.gz
[root@deploy-1 manifests]# cd kube-prometheus-0.12.0/manifests/
[root@deploy-1 manifests]# kubectl create -f setup/ #使用apply创建会提示注解太长
2.查看对应yaml文件所需镜像
[root@deploy-1 manifests]# grep -R 'image: ' ./*
./alertmanager-alertmanager.yaml: image: quay.io/prometheus/alertmanager:v0.25.0
./blackboxExporter-deployment.yaml: image: quay.io/prometheus/blackbox-exporter:v0.23.0
./blackboxExporter-deployment.yaml: image: jimmidyson/configmap-reload:v0.5.0
./blackboxExporter-deployment.yaml: image: quay.io/brancz/kube-rbac-proxy:v0.14.0
./grafana-deployment.yaml: image: grafana/grafana:9.3.2
./kubeStateMetrics-deployment.yaml: image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.7.0
./kubeStateMetrics-deployment.yaml: image: quay.io/brancz/kube-rbac-proxy:v0.14.0
./kubeStateMetrics-deployment.yaml: image: quay.io/brancz/kube-rbac-proxy:v0.14.0
./nodeExporter-daemonset.yaml: image: quay.io/prometheus/node-exporter:v1.5.0
./nodeExporter-daemonset.yaml: image: quay.io/brancz/kube-rbac-proxy:v0.14.0
./prometheusAdapter-deployment.yaml: image: registry.k8s.io/prometheus-adapter/prometheus-adapter:v0.10.0
./prometheusOperator-deployment.yaml: image: quay.io/prometheus-operator/prometheus-operator:v0.62.0
./prometheusOperator-deployment.yaml: image: quay.io/brancz/kube-rbac-proxy:v0.14.0
./prometheus-prometheus.yaml: image: quay.io/prometheus/prometheus:v2.41.0
3.下载镜像
[root@deploy-1 ~]# docker pull bitnami/kube-state-metrics:2.7.0
[root@deploy-1 ~]# docker pull v5cn/prometheus-adapter:v0.10.0
4.上传镜像至本地harbor仓库
[root@deploy-1 ~]# docker tag bitnami/kube-state-metrics:2.7.0 qj.harbor.com/baseimages/kube-state-metrics:v2.7.0
[root@deploy-1 ~]# docker tag v5cn/prometheus-adapter:v0.10.0 qj.harbor.com/baseimages/prometheus-adapter:v0.10.0
[root@deploy-1 ~]# docker push qj.harbor.com/baseimages/kube-state-metrics:v2.7.0
root@deploy-1 ~]# docker push qj.harbor.com/baseimages/prometheus-adapter:v0.10.0
5.修改yaml文件镜像名称
[root@prometheus manifests]# vim ./kubeStateMetrics-deployment.yaml
image: qj.harbor.com/baseimages/kube-state-metrics:v2.7.0
[root@prometheus manifests]# vim ./prometheusAdapter-deployment.yaml
image: qj.harbor.com/baseimages/prometheus-adapter:v0.10.0
6.执行创建
[root@deploy-1 manifests]# kubectl apply -f .
[root@deploy-1 manifests]# kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-main-0 2/2 Running 1 (18m ago) 19m
alertmanager-main-1 2/2 Running 1 (18m ago) 19m
alertmanager-main-2 2/2 Running 1 (18m ago) 19m
blackbox-exporter-6fd586b445-wggkv 3/3 Running 0 20m
grafana-9f58f8675-hsrjm 1/1 Running 0 20m
kube-state-metrics-75586f4d76-dccd8 3/3 Running 0 20m
node-exporter-8jz2n 2/2 Running 0 20m
node-exporter-d8d4j 2/2 Running 0 20m
node-exporter-pkgdc 2/2 Running 0 20m
node-exporter-qcb66 2/2 Running 0 20m
node-exporter-r9hgm 2/2 Running 0 20m
node-exporter-v5m8w 2/2 Running 0 20m
prometheus-adapter-89c46bf6c-jqz8j 1/1 Running 0 20m
prometheus-adapter-89c46bf6c-t5wt8 1/1 Running 0 20m
prometheus-k8s-0 2/2 Running 0 19m
prometheus-k8s-1 2/2 Running 0 19m
prometheus-operator-776c6c6b87-f6k8t 2/2 Running 0 20m
[root@deploy-1 manifests]# kubectl get svc -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager-main ClusterIP 10.100.251.166 <none> 9093/TCP,8080/TCP 21m
alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 19m
blackbox-exporter ClusterIP 10.100.253.34 <none> 9115/TCP,19115/TCP 21m
grafana NodePort 10.100.253.167 <none> 3000:33000/TCP 21m
kube-state-metrics ClusterIP None <none> 8443/TCP,9443/TCP 21m
node-exporter ClusterIP None <none> 9100/TCP 21m
prometheus-adapter ClusterIP 10.100.96.127 <none> 443/TCP 21m
prometheus-k8s NodePort 10.100.26.134 <none> 9090/TCP,8080/TCP 21m
prometheus-operated ClusterIP None <none> 9090/TCP 19m
prometheus-operator ClusterIP None <none> 8443/TCP 21m
7.验证Prometheus grafana Web页面
#客户端浏览器访问,需将yaml文件中service type更改为NodePort
[root@deploy-1 manifests]# cat prometheus-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
spec:
type: NodePort
ports:
- name: web
port: 9090
targetPort: web
nodePort: 39090
- name: reloader-web
port: 8080
targetPort: reloader-web
nodePort: 38080
selector:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
[root@deploy-1 manifests]# cat grafana-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
spec:
type: NodePort
ports:
- name: http
port: 3000
targetPort: http
nodePort: 33000
selector:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
#默认已设置相关网络策略,可先删除相关策略,后续可根据实际需求进行修改调整
[root@k8s-deploy manifests]#for i in `ls |grep network`;do kubectl delete -f $i;done
networkpolicy.networking.k8s.io "alertmanager-main" deleted
networkpolicy.networking.k8s.io "blackbox-exporter" deleted
networkpolicy.networking.k8s.io "grafana" deleted
networkpolicy.networking.k8s.io "kube-state-metrics" deleted
networkpolicy.networking.k8s.io "node-exporter" deleted
networkpolicy.networking.k8s.io "prometheus-k8s" deleted
networkpolicy.networking.k8s.io "prometheus-adapter" deleted
networkpolicy.networking.k8s.io "prometheus-operator" deleted
- 基于二进制安装prometheus环境
[root@prometheus ~]# wget https://github.com/prometheus/prometheus/releases/download/v2.37.0/prometheus-2.37.0.linux-amd64.tar.gz
[root@prometheus ~]# tar -xf prometheus-2.37.0.linux-amd64.tar.gz
[root@prometheus ~]# mv prometheus-2.37.0 /usr/local/prometheus
[root@prometheus ~]# vim /etc/systemd/system/prometheus.service #添加服务自启动
[Unit]
Description=Prometheus Server
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Restart=on-failure
WorkingDirectory=/usr/local/prometheus/
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml
[Install]
WantedBy=multi-user.target
[root@prometheus ~]# systemctl daemon-reload
[root@prometheus ~]# systemctl start prometheus
[root@prometheus ~]# systemctl enable prometheus
二.通过node-exporter和cadvisor收集指标数据
- node-exporter
1.所有node节点二进制安装node-exporter
cd /usr/local/src
wgethttps://github.com/prometheus/node_exporter/releases/download/v1.5.0/node_exporter-1.5.0.linux-amd64.tar.gz
tar -xf node_exporter-1.5.0.linux-amd64.tar.gz
mv node_exporter-1.5.0.linux-amd64.tar.gz /usr/local/node_exporter
vim /etc/systemd/system/node-exporter.service
[Unit]
Description=Prometheus Node Exporter
After=network.target
[Service]
ExecStart=/usr/local/node_exporter/node_exporter
[Install]
WantedBy=multi-user.target
systemctl daemon-reload
systemctl start node_exporter.service
systemctl enable node_exporter.service
2.prometheus server添加采集node节点数据配置
[root@prometheus ~]# vim /usr/local/prometheus/prometheus.yml
- job_name: "ks-node"
static_configs:
- targets: ["10.10.20.12:9110","10.10.20.14:9110","10.10.20.15:9110"]
[root@prometheus ~]# systemctl restart prometheus
- cadvisor
1.下载cadvisor镜像
[root@deploy-1 ~]# docker pull registry.cn-hangzhou.aliyuncs.com/zhangshijie/cadvisor-amd64:v0.39.3
[root@deploy-1 ~]# docker tag registry.cn-hangzhou.aliyuncs.com/zhangshijie/cadvisor-amd64:v0.39.3 qj.harbor.com/baseimages/cadvisor-amd64:v0.39.3
[root@deploy-1 ~]# docker push qj.harbor.com/baseimages/cadvisor-amd64:v0.39.3
2. 编写yaml文件
[root@deploy-1 case]# vim case1-daemonset-deploy-cadvisor.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: cadvisor
namespace: monitoring
spec:
selector:
matchLabels:
app: cAdvisor
template:
metadata:
labels:
app: cAdvisor
spec:
tolerations: #污点容忍,忽略master的NoSchedule
- effect: NoSchedule
key: node-role.kubernetes.io/master
hostNetwork: true
restartPolicy: Always # 重启策略
containers:
- name: cadvisor
image: qj.harbor.com/baseimages/cadvisor-amd64:v0.39.3
imagePullPolicy: Always # 镜像策略
ports:
- containerPort: 8080
volumeMounts:
- name: root
mountPath: /rootfs
- name: run
mountPath: /var/run
- name: sys
mountPath: /sys
- name: docker
mountPath: /var/lib/containerd
volumes:
- name: root
hostPath:
path: /
- name: run
hostPath:
path: /var/run
- name: sys
hostPath:
path: /sys
- name: docker
hostPath:
path: /var/lib/containerd # containerd默认数据目录,docker默认数据目录为/var/lib/docker
3.执行创建
kubectl create ns monitoring
kubectl apply -f case1-daemonset-deploy-cadvisor.yaml
[root@deploy-1 case]# kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
cadvisor-42jvm 1/1 Running 0 22m
cadvisor-9dlw5 1/1 Running 0 22m
cadvisor-h78sv 1/1 Running 0 22m
cadvisor-pq7ct 1/1 Running 0 22m
cadvisor-r9l8k 1/1 Running 0 22m
cadvisor-xwvkf 1/1 Running 0 22m
4.prometheus server添加采集cadvisor数据配置
[root@prometheus ~]# vim /usr/local/prometheus/prometheus.yml
- job_name: "ks-cadvisor"
static_configs:
- targets: ["10.10.20.12:8080","10.10.20.14:8080","10.10.20.15:8080","10.10.20.17:8080","10.10.20.19:8080","10.10.20.6:8080"]
[root@prometheus ~]# systemctl restart prometheus
三.通过grafana展示prometheus的node和pod数据
- 安装grafana
[root@prometheus src]# rpm -ivh grafana-8.4.6-1.x86_64.rpm
[root@prometheus src]# vim /etc/grafana/grafana.ini
[server]
protocol = http
http_addr = 0.0.0.0
http_port = 3000
[root@prometheus src]# systemctl restart grafana-server.service
-
添加数据源
image.png -
导入模版
image.png
-
pod数据展示
image.png
-node数据展示
四.梳理prometheus服务发现
服务发现机制
prometheus默认是采用pull方式拉取监控数据的,也就是定时去目标主机上抓取metrics数据,每一个被抓取的目标需要暴露一个HTTP接口,prometheus通过这个暴露的接口就可以获取到相应的指标数据,这种方式需要由目标服务决定采集的目标有哪些,通过配置在scarpe_confis中的各种job来实现,无法动态感知新服务,如果后面增加了节点或组件信息,就得手动修改prometheus配置,并重启prometheus,很不方便,所以出现了动态服务发现,动态服务发现能够自动发现集群中的新端点,并加入到配置中,通过服务发现,prometheus能查询到需要监控的target列表,然后轮询这些target获取监控数据。-
标签重写(relabeling)
prometheus的relabeling能够在抓取到目标实例之前把目标实例的元数据标签动态重新修改,动态添加或者覆盖标签。
prometheus从kubernetes API动态发现target之后,在被发现的target实例中,都包含一些原始的Metadata标签信息,默认标签有:
address: 以<host>:<port>格式显示targets地址
scheme: 采集的目标服务地址的scheme形式,HTTP或HTTPS
metrics_path:采集的目标服务访问路径
image.png -
重写目的
为了更好的识别监控指标,便于后期调用数据绘图、告警等需求,prometheus支持对发现的目标进行label修改,在两个阶段可以重新标记:
image.png relabel_configs
在对target进行数据采集之前(例如在采集数据之前重新定义标签信息,如目的IP、目的端口等信息),可以使用relabel_configs添加、修改或修改一些标签,也可以只采集特定目标或过滤目标。metric_relabel_configs
在对target进行数据采集之后,即如果是已抓取到指标数据时,可以使用metric_relabel_configs做最后的重新标记和过滤label
source_label:源标签,没有经过relabel处理之前标签的名称
target_label:通过action处理之后新的标签名称
regex:给定的值或正则表达式匹配,匹配源标签的值
replacement:通过分组替换后标签(target_label)对应的/()/()2
action
replace:替换标签值,根据regex正则匹配到源标签的值,使用replacement来引用表达式匹配的分组
keep:满足regex正则条件的实例进行采集,把source_labels中没有匹配到regex正则内容的target实例丢掉,即只采集匹配成功的实例
drop:满足regex正则条件的实例不采集,把source_labels中匹配到的regex正则内容的target实例丢掉,即只采集没有匹配成功的实例
hashmod:使用hashmod计算source_labels的Hash值并进行对比,基于自定义的模数取模,以实现对目标进行分类、重新赋值等功能
scrape_configs:
- job_name: ip_job
relabel_configs:
- source_labels: [__address__]
modulus: 4
target_label: __ip_hash
action: hashmod
- source_labels: [__ip_hash]
regex: ^1$
action: keep
labelmap:匹配regex所有标签名称,然后复制匹配标签的值进行分组,可以通过replacement分组引用({2},...)替代
labelkeep:匹配regex所有标签名称,其他不匹配的标签都将从标签集中删除
labeldrop:匹配regex所有标签名称,其他匹配的标签都将从标签集中删除
- 服务发现类型
1.静态服务发现:
静态服务发现,基于prometheus配置文件指定的监控目标,每当有一个新的目标实例需要监控,都需要手动修改配置文件,配置目标target
scrape_configs:
- job_name: "staic_test" # job名称
# metrics_path: "/metrics" # 默认URI
# scheme: http # 默认协议
static_configs: # 静态服务配置
- targets: ["10.0.0.11:8080","10.0.0.12:8080","10.0.0.13:8080"] # 目标端点地址
2.基于文件的服务发现:
scrape_configs:
# 基于文件服务发现监控配置
- job_name: 'file_sd_test'
scrape_interval: 10s # 数据采集间隔时间
file_sd_configs:
- files: # 支持yaml和json格式文件
- /data/prometheus/static_conf/*.yml
refresh_interval: 10s # 重新读取文件的刷新时间
- DNS服务发现
基于DNS的服务发现允许配置指定一组的DNS域名,这些域名会定期查询以发现目标列表,域名需要可以被配置的DNS服务器解析为IP。
此服务发现方式仅支持基本的DNS A、AAAA和SRV记录查询。
A记录: 域名解析为一个IPv4地址
AAAA记录: 域名解析为一个IPv6地址
SRV: SRV记录了哪台计算机提供了具体哪个服务,格式为:服务名称.协议类型.域名(如:_example-server._tcp.www.mydns.com)
scrape_configs:
- job_name: 'dns_sd_test'
scrape_interval: 10s # 数据采集间隔时间
dns_sd_configs:
- name: ["node1.example.com","node2.example.com"] # 域名
type: A
port: 9100
4.Consul服务发现
consul基于golang开发的开源工具,主要面向分布式,服务化的系统提供服务注册、服务发现和配置管理的功能,提供服务发现/注册、健康检查和保持一致性等功能。
Consul是一个分布式k/v数据库,常用于服务的服务注册和发现。基于consul服务动态发现监控目标,prometheus一直监控consul服务,当发现在consul中注册的服务有变化,prometheus就会自动监控到所有注册到consul中目标资源。
scrape_configs:
- job_name: 'consul_sd_test'
honor_labels: true
metrics_path: "/metrics"
scheme: http
consul_sd_configs:
- server: 10.0.0.11:8500
services: [] # 发现的目标服务名称,空为所有服务
- server: 10.0.0.12:8500
services: []
参数说明:
honor_labels :控制prometheus如何处理已经存在于已抓取数据中的标签与prometheus将附加服务器端的标签之间的冲突("job"和"instance"标签,手动配置的目标标签已经服务发现实现生成的标签)。
如果honor_labels设置为“true”,则保留已抓取数据的标签值并忽略冲突的prometheus服务器端标签来解决标签冲突;另外如果被采集端有标签但是值为空,则使用prometheus本地标签值;如果被采集端没有此标签,但是prometheus配置了,那使用prometheus配置的标签值。
如果honor_labels设置为“false”,则通过将已抓取数据中的冲突标签重命名为exported_<original-label>(如expoeterd_instance,exporterd_job)然后附加服务器端标签来解决标签冲突。
5.基于kubernetes API实现服务发现
基于kubernetes API实现服务发现,prometheus与kubernetes的API进行交互,动态的发现kubernetes中部署的所有可监控的目标资源。
在Kubernetes中,Prometheus 通过与 Kubernetes API 集成主要支持5种服务发现模式:Node、Service、Pod、Endpoints、Ingress。不同的服务发现模式适用于不同的场景,例如:node适用于与主机相关的监控资源,如节点中运行的Kubernetes 组件状态、节点上运行的容器状态等;service 和 ingress 适用于通过黑盒监控的场景,如对服务的可用性以及服务质量的监控;endpoints 和 pod 均可用于获取 Pod 实例的监控数据,如监控用户或者管理员部署的支持 Prometheus 的应用。
scrape_configs:
- job_name: "kubernetes_sd_test"
scheme: http
kubernetes_sd_configs:
- role: node
五.在prometheus实现kubernetes-apiserver及coredns服务发现
- 目标发现模式:
1.node:node角色可以发现集群中每个node节点的地址端口,默认为Kubelet的HTTP端口。目标地址默认为Kubernetes节点对象的第一个现有地址,地址类型顺序为NodeInternalIP、NodeExternalIP、NodeLegacyHostIP和NodeHostName。
作用:监控K8S的node节点的服务器相关的指标数据。
2.service
service角色可以发现每个service的ip和port,将其作为target。这对于黑盒监控(blackbox)很有用。
即:一个Service访问到哪个pod,就把哪个pod的数据传上来。使用的场景很少。只是看Service对应业务是否健康的时候可以使用。
3.pod
pod角色可以发现所有pod并将其中的pod ip作为target。如果有多个端口或者多个容器,将生成多个target(例如:80,443这两个端口,pod ip为10.0.244.22,则将10.0.244.22:80,10.0.244.22:443分别作为抓取的target)。
如果容器没有指定的端口,则会为每个容器创建一个无端口target,以便通过relabel手动添加端口。
4.Endpoints
endpoints角色可以从ep(endpoints)列表中发现所有targets
5.Ingress
ingress角色发现ingress的每个路径的target。这通常对黑盒监控很有用。该地址将设置为ingress中指定的host。
- apiserver服务发现
[root@deploy-1 case]# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.100.0.1 <none> 443/TCP 21d
[root@deploy-1 case]# kubectl get ep
NAME ENDPOINTS AGE
kubernetes 10.10.20.17:6443,10.10.20.19:6443,10.10.20.6:6443 21d
[root@deploy-1 case]# cat case3-1-prometheus-cfg.yaml
---
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-node'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-node-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_service_name
- cdn服务发现
注意:在部署dns时,使用的yaml文件必须加上注解。否则可能无法收集数据
注解添加哪里,取决的发现方式的类型。使用什么类型发型,注解就加到那个类型中
annotations:
prometheus.io/port: "9153"
prometheus.io/scrape: "true"
#编写配置prometheus配置
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_service_name
#查看core-dns状态
[root@deploy-1 coredns]# kubectl describe svc kube-dns -n kube-system
Name: kube-dns
Namespace: kube-system
Labels: addonmanager.kubernetes.io/mode=Reconcile
k8s-app=kube-dns
kubernetes.io/cluster-service=true
kubernetes.io/name=CoreDNS
Annotations: prometheus.io/port: 9153 # 注解标签,用于prometheus匹配发现端口
prometheus.io/scrape: true # 注解标签,用于prometheus匹配抓取数据
Selector: k8s-app=kube-dns
Type: ClusterIP
IP Family Policy: SingleStack
IP Families: IPv4
IP: 10.100.0.2
IPs: 10.100.0.2
Port: dns 53/UDP
TargetPort: 53/UDP
Endpoints: 10.200.31.193:53,10.200.35.129:53
Port: dns-tcp 53/TCP
TargetPort: 53/TCP
Endpoints: 10.200.31.193:53,10.200.35.129:53
Port: metrics 9153/TCP
TargetPort: 9153/TCP
Endpoints: 10.200.31.193:9153,10.200.35.129:9153
Session Affinity: None
Events: <none>