说明
promtail 是loki的日志收集agent,也是类似于 promtheus 的服务发现机制,应该是最云原生的日志agent了。赞一个
安装
# helm pull loki/promtail --version=2.0.1
# helm show values promtail-2.0.1.tgz
生成 promtail 配置文件:
配置参考:https://grafana.com/docs/loki/latest/clients/promtail/configuration/
# cat > promtail-config.yaml <<EOF
deploymentStrategy:
rollingUpdate:
maxUnavailable: 100%
type: RollingUpdate
image:
#repository: grafana/promtail
repository: ops-harbor.hupu.io/k8s/promtail
tag: 2.0.0
pullPolicy: IfNotPresent
hostNetwork: true
podSecurityPolicy:
hostNetwork: true
#livenessProbe: {}
readinessProbe:
failureThreshold: 5
httpGet:
path: /ready
port: http-metrics
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
loki:
serviceName: "distributor-loki-headless"
servicePort: 3100
serviceScheme: http
# user: user
# password: pass
pipelineStages:
- docker: {}
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "http-metrics"
resources:
limits:
cpu: 4000m
memory: 4Gi
requests:
cpu: 100m
memory: 100Mi
# Custom scrape_configs to override the default ones in the configmap
scrapeConfigs: []
#- job_name: kubernetes-pods-name
# # 管道阶段用于转换日志条目及其标签。发现过程完成后,将执行管道
# # 支持管道:docker、cri、regex、json、template、match、timestamp、output、labels、metrics、tenant
# pipeline_stages:
# - docker: {}
# kubernetes_sd_configs:
# - role: pod
# relabel_configs:
# - source_labels:
# - __meta_kubernetes_pod_label_name
# target_label: __service__
# - source_labels:
# - __meta_kubernetes_pod_node_name
# target_label: __host__
# - action: drop
# regex: ''
# source_labels:
# - __service__
# - action: labelmap
# regex: __meta_kubernetes_pod_label_(.+)
# - action: replace
# replacement: $1
# separator: /
# source_labels:
# - __meta_kubernetes_namespace
# - __service__
# target_label: job
# - action: replace
# source_labels:
# - __meta_kubernetes_namespace
# target_label: namespace
# - action: replace
# source_labels:
# - __meta_kubernetes_pod_name
# target_label: pod
# - action: replace
# source_labels:
# - __meta_kubernetes_pod_container_name
# target_label: container
# # 示例日志路径:/var/log/pods/kube-system_kube-proxy-master-w5jxb_f395f3be-67a3-43e7-ae99-9d099fe23486/kube-proxy-master/0.log
# - replacement: /var/log/pods/*$1/*.log
# # 指定分隔符为 /,而不是默认的 ;
# separator: /
# source_labels:
# # 取到 pod uid
# - __meta_kubernetes_pod_uid
# # 取到 pod 中的容器名
# - __meta_kubernetes_pod_container_name
# target_label: __path__
# #journal:
# #syslog:
# #loki_push_api:
# #static_configs:
# #file_sd_configs:
# Custom scrape_configs together with the default ones in the configmap
extraScrapeConfigs:
# 收集类似 /data0/log-data/basketball-all-api/stg/basketball-all-api-20201026182255-8dd968e-75c9bc469f-wtfxh/e4179a7b-d505-4b7b-ae66-ed62906fa596/basketball-all-api/ 路径的 msv-access.log
#
# 1. 标准输出 log:
# {app="ad-direct-msv",filename="/var/log/pods/stg_ad-direct-msv-20201116143649-2355d8c-777d74c8b-pqvzn_3e121e4d-16f9-4257-8e35-b05137d60443/ad-direct-msv/0.log"}
#
# 2. 写文件 log:
# {app="ad-direct-msv",filename="/data0/log-data/ad-direct-msv/stg/ad-direct-msv-20201116143649-2355d8c-777d74c8b-pqvzn/3e121e4d-16f9-4257-8e35-b05137d60443/ad-direct-msv/msv-access.log"}
#
# 对于标准输出和写文件的 log 都有以下一致的标签,这样就解决了 pod 元数据问题:
# 1. pod 自带的标签:
# ad-direct-msv -- app
# java-msv -- apptype
# 2020-11-16-14-42-38-362580 -- datetime
# stg -- env
# 777d74c8b -- pod-template-hash
# ad-direct-msv-20201116143649-2355d8c -- version
#
# 2. 额外添加的标签:
# stg -- namespace
# ad-direct-msv -- container
# stg/ad-direct-msv -- job
# ad-direct-msv-20201116143649-2355d8c-777d -- pod
#
- job_name: alllog
pipeline_stages:
static_configs:
- targets:
- localhost
labels:
job: alllog
__path__: /data0/log-data/*/*/*/*/*.log
- job_name: alllog-next
pipeline_stages:
static_configs:
- targets:
- localhost
labels:
job: alllog-next
__path__: /data0/log-data/*/*/*/*/*/*.log
- job_name: kubernetes-pods-name-msv-access
# loki_push_api 将 Promtail 配置为公开 Loki 推送 API 服务器
# 注意:使用 loki_push_api 配置的每个 job 都将公开此 API,并且需要一个单独的端口
#loki_push_api:
# # 服务器配置与服务器相同
# server:
# http_listen_port: 3500
# grpc_listen_port: 3600
# labels:
# pushserver: push1
# use_incoming_timestamp: false
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels:
- __meta_kubernetes_pod_label_name
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
#- replacement: /var/log/pods/*$1/*.log
- replacement: /data0/log-data/*/*/*/$1/msv-access.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- job_name: kubernetes-pods-app-msv-access
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: drop
regex: .+
source_labels:
- __meta_kubernetes_pod_label_name
- source_labels:
- __meta_kubernetes_pod_label_app
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
#- replacement: /var/log/pods/*$1/*.log
- replacement: /data0/log-data/*/*/*/$1/msv-access.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- job_name: kubernetes-pods-direct-controllers-msv-access
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: drop
regex: .+
separator: ''
source_labels:
- __meta_kubernetes_pod_label_name
- __meta_kubernetes_pod_label_app
- action: drop
regex: '[0-9a-z-.]+-[0-9a-f]{8,10}'
source_labels:
- __meta_kubernetes_pod_controller_name
- source_labels:
- __meta_kubernetes_pod_controller_name
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
#- replacement: /var/log/pods/*$1/*.log
- replacement: /data0/log-data/*/*/*/$1/msv-access.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- job_name: kubernetes-pods-indirect-controller-msv-access
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: drop
regex: .+
separator: ''
source_labels:
- __meta_kubernetes_pod_label_name
- __meta_kubernetes_pod_label_app
- action: keep
regex: '[0-9a-z-.]+-[0-9a-f]{8,10}'
source_labels:
- __meta_kubernetes_pod_controller_name
- action: replace
regex: '([0-9a-z-.]+)-[0-9a-f]{8,10}'
source_labels:
- __meta_kubernetes_pod_controller_name
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
#- replacement: /var/log/pods/*$1/*.log
- replacement: /data0/log-data/*/*/*/$1/msv-access.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- job_name: kubernetes-pods-static-msv-access
pipeline_stages:
- docker: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: drop
regex: ''
source_labels:
- __meta_kubernetes_pod_annotation_kubernetes_io_config_mirror
- action: replace
source_labels:
- __meta_kubernetes_pod_label_component
target_label: __service__
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: drop
regex: ''
source_labels:
- __service__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
replacement: $1
separator: /
source_labels:
- __meta_kubernetes_namespace
- __service__
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
#- replacement: /var/log/pods/*$1/*.log
- replacement: /data0/log-data/*/*/*/$1/msv-access.log
separator: /
source_labels:
- __meta_kubernetes_pod_annotation_kubernetes_io_config_mirror
- __meta_kubernetes_pod_container_name
target_label: __path__
tolerations:
- operator: "Exists"
securityContext:
readOnlyRootFilesystem: true
runAsGroup: 0
runAsUser: 0
# Extra volumes to scrape logs from
volumes:
# docker log
- name: docker
hostPath:
path: /var/lib/docker/containers
# pod log
- name: pods
hostPath:
path: /var/log/pods
# Pod 应用写日志文件 log
- name: data0
hostPath:
path: /data0/log-data
volumeMounts:
- name: docker
mountPath: /var/lib/docker/containers
readOnly: true
- name: pods
mountPath: /var/log/pods
readOnly: true
- name: data0
mountPath: /data0/log-data
readOnly: true
config:
# 描述 Promtail 如何连接到 Loki 的多个实例,并向每个实例发送日志。
# 警告:如果其中一台远程 Loki 服务器无法响应或发生任何可重试的错误,这将影响将日志发送到任何其他已配置的远程 Loki 服务器。发送是在单个线程上完成的!
# 如果要发送到多个远程 Loki 实例,通常建议并行运行多个 Promtail 客户端。
# 注意:官方文档中的 clinets 字段是错误的
client:
url: http://distributor-loki-headless:3100/loki/api/v1/push
# 日志条目在被推送到 Loki 之前会根据最大批处理持续时间 batchwait 和大小 batchsize 进行批处理,以先到者为准
# Maximum wait period before sending batch
batchwait: 5s
# Maximum batch size to accrue before sending, unit is byte
# 每个流日志达到 400KB 发送
batchsize: 409600
# Maximum time to wait for server to respond to a request
timeout: 1m
# 注意:Loki 会拒绝接收到的所有顺序不正确的日志行
backoff_config:
# Initial backoff time between retries
min_period: 10s
# Maximum backoff time between retries
max_period: 5m
# Maximum number of retries when sending batches, 0 means infinite retries
# 如果所有重试均失败,则 promtail 会丢弃该批次的日志条目(这些条目将丢失)并继续下一个
max_retries: 10
# The labels to add to any time series or alerts when communicating with loki
external_labels:
env: prod
server:
#http_listen_address:
http_listen_port: 3101
#grpc_listen_address:
grpc_listen_port: 9095
graceful_shutdown_timeout: 60s
http_server_read_timeout: 60s
http_server_write_timeout: 60s
http_server_idle_timeout: 120s
# 设置 200 MB
grpc_server_max_recv_msg_size: 209715200
grpc_server_max_send_msg_size: 209715200
grpc_server_max_concurrent_streams: 1024
log_level: info
health_check_target: true
# 描述如何将读取的文件偏移量保存到磁盘
positions:
filename: /run/promtail/positions.yaml
sync_period: 5s
ignore_invalid_yaml: false
target_config:
# Period to resync directories being watched and files being tailed
sync_period: 5s
serviceMonitor:
enabled: true
interval: 30s
additionalLabels: {}
annotations: {}
scrapeTimeout: 25s
EOF
====================================================================================================================================
备注:
docker: {}
docker 阶段将匹配并解析此格式的日志行:
`{"log":"level=info ts=2019-04-30T02:12:41.844179Z caller=filetargetmanager.go:180 msg=\"Adding target\"\n","stream":"stderr","time":"2019-04-30T02:12:41.8443515Z"}`
自动将时间提取到日志 timestamp 中,stream 传输到标签中,并将日志字段输出到 output 中,因为 docker以这种方式包装您的应用程序日志,这将对其进行解包,以便仅对日志内容进行进一步的管道处理 。
Docker 阶段只是如下定义的包装:
- json:
output: log
stream: stream
timestamp: time
- labels:
stream:
- timestamp:
source: timestamp
format: RFC3339Nano
- output:
source: output
kubernetes_sd_config:
kubernetes_sd_config 发现规则跟 prometheus 一样的包括:
node:
__meta_kubernetes_node_name: The name of the node object.
_meta_kubernetes_node_label<labelname>: Each label from the node object.
_meta_kubernetes_node_labelpresent<labelname>: true for each label from the node object.
_meta_kubernetes_node_annotation<annotationname>: Each annotation from the node object.
_meta_kubernetes_node_annotationpresent<annotationname>: true for each annotation from the node object.
_meta_kubernetes_node_address<address_type>: The first address for each node address type, if it exists.service:
__meta_kubernetes_namespace: The namespace of the service object.
_meta_kubernetes_service_annotation<annotationname>: Each annotation from the service object.
_meta_kubernetes_service_annotationpresent<annotationname>: true for each annotation of the service object.
__meta_kubernetes_service_cluster_ip: The cluster IP address of the service. (Does not apply to services of type ExternalName)
__meta_kubernetes_service_external_name: The DNS name of the service. (Applies to services of type ExternalName)
_meta_kubernetes_service_label<labelname>: Each label from the service object.
_meta_kubernetes_service_labelpresent<labelname>: true for each label of the service object.
__meta_kubernetes_service_name: The name of the service object.
__meta_kubernetes_service_port_name: Name of the service port for the target.
__meta_kubernetes_service_port_protocol: Protocol of the service port for the target.pod:
__meta_kubernetes_namespace: The namespace of the pod object.
__meta_kubernetes_pod_name: The name of the pod object.
__meta_kubernetes_pod_ip: The pod IP of the pod object.
_meta_kubernetes_pod_label<labelname>: Each label from the pod object.
_meta_kubernetes_pod_labelpresent<labelname>: true for each label from the pod object.
_meta_kubernetes_pod_annotation<annotationname>: Each annotation from the pod object.
_meta_kubernetes_pod_annotationpresent<annotationname>: true for each annotation from the pod object.
__meta_kubernetes_pod_container_init: true if the container is an InitContainer
__meta_kubernetes_pod_container_name: Name of the container the target address points to.
__meta_kubernetes_pod_container_port_name: Name of the container port.
__meta_kubernetes_pod_container_port_number: Number of the container port.
__meta_kubernetes_pod_container_port_protocol: Protocol of the container port.
__meta_kubernetes_pod_ready: Set to true or false for the pod’s ready state.
__meta_kubernetes_pod_phase: Set to Pending, Running, Succeeded, Failed or Unknown in the lifecycle.
__meta_kubernetes_pod_node_name: The name of the node the pod is scheduled onto.
__meta_kubernetes_pod_host_ip: The current host IP of the pod object.
__meta_kubernetes_pod_uid: The UID of the pod object.
__meta_kubernetes_pod_controller_kind: Object kind of the pod controller.
__meta_kubernetes_pod_controller_name: Name of the pod controller.endpoints:
__meta_kubernetes_namespace: The namespace of the endpoints object.
__meta_kubernetes_endpoints_name: The names of the endpoints object.
对于直接从端点列表中发现的所有目标(未从基础 pod 中另外推断出的那些目标),将附加以下标签:
__meta_kubernetes_endpoint_hostname: Hostname of the endpoint.
__meta_kubernetes_endpoint_node_name: Name of the node hosting the endpoint.
__meta_kubernetes_endpoint_ready: Set to true or false for the endpoint’s ready state.
__meta_kubernetes_endpoint_port_name: Name of the endpoint port.
__meta_kubernetes_endpoint_port_protocol: Protocol of the endpoint port.
__meta_kubernetes_endpoint_address_target_kind: Kind of the endpoint address target.
__meta_kubernetes_endpoint_address_target_name: Name of the endpoint address target.
注意:如果端点属于服务,则会附加角色:服务发现的所有标签。
注意:对于由 Pod 支持的所有目标,将附加角色的所有标签:Pod 发现的所有标签。
- ingress:
__meta_kubernetes_namespace: The namespace of the ingress object.
__meta_kubernetes_ingress_name: The name of the ingress object.
_meta_kubernetes_ingress_label<labelname>: Each label from the ingress object.
_meta_kubernetes_ingress_labelpresent<labelname>: true for each label from the ingress object.
_meta_kubernetes_ingress_annotation<annotationname>: Each annotation from the ingress object.
_meta_kubernetes_ingress_annotationpresent<annotationname>: true for each annotation from the ingress object.
__meta_kubernetes_ingress_scheme: Protocol scheme of ingress, https if TLS config is set. Defaults to http.
__meta_kubernetes_ingress_path: Path from ingress spec. Defaults to /.
====================================================================================================================================
安装 promtail
# helm install promtail promtail-2.0.1.tgz -f promtail-config.yaml -n grafana
# kubectl get all -n grafana
NAME READY STATUS RESTARTS AGE
pod/loki-0 1/1 Running 0 136m
pod/promtail-2b5rt 1/1 Running 0 49s
pod/promtail-58llq 1/1 Running 0 49s
pod/promtail-bvp6q 1/1 Running 0 49s
pod/promtail-cr5hv 1/1 Running 0 49s
pod/promtail-dfq44 1/1 Running 0 49s
pod/promtail-jm9wc 1/1 Running 0 49s
pod/promtail-kl5wr 1/1 Running 0 49s
pod/promtail-mrxjc 1/1 Running 0 49s
pod/promtail-qklwp 1/1 Running 0 49s
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/loki ClusterIP 10.96.116.194 <none> 3100/TCP 136m
service/loki-headless ClusterIP None <none> 3100/TCP 136m
service/promtail-headless ClusterIP None <none> 3101/TCP 51s
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE
daemonset.apps/promtail 9 9 9 9 9 <none> 51s
NAME READY AGE
statefulset.apps/loki 1/1 136m
修改为 hostNetwork:
# kubectl edit ds -n grafana promtail
...
spec:
dnsPolicy: ClusterFirstWithHostNet
hostNetwork: true
Promtail 公开了几个 URL,可用于了解其服务发现的工作方式:
- /service-discovery 服务发现页面,显示在 relabeling 之前和之后所有发现的目标及其标签,以及删除目标的原因。
- /targets 目标页面,仅显示正在主动抓取的目标及其相应的标签,文件和位置。
- /ready 健康检查接口
- /metrics metrics 接口
- /static/ 一些静态页,没啥用
---
apiVersion: v1
kind: Service
metadata:
labels:
app: promtail
name: promtail
namespace: grafana
spec:
ports:
- name: http-metrics
port: 3101
protocol: TCP
targetPort: http-metrics
selector:
app: promtail
release: promtail
sessionAffinity: None
type: ClusterIP
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
annotations:
kubernetes.io/ingress.class: traefik
name: promtail
namespace: grafana
spec:
rules:
- host: promtail.hupu.io
http:
paths:
- backend:
serviceName: promtail
servicePort: 3101
path: /
查看默认的任务:
# kubectl get cm -n grafana promtail -o yaml |grep job_name
- job_name: kubernetes-pods-name
- job_name: kubernetes-pods-app
- job_name: kubernetes-pods-direct-controllers
- job_name: kubernetes-pods-indirect-controller
- job_name: kubernetes-pods-static
查看配置详情:
# kubectl get cm -n grafana promtail -o yaml
试运行 Promtail:
注意:还可以添加标签
# cat my.log | promtail --stdin --client.url http://127.0.0.1:3100/loki/api/v1/push --dry-run
# cat my.log | promtail --stdin --client.url http://127.0.0.1:3100/loki/api/v1/push -server.grpc-listen-port 9999 -positions.file /tmp/2.yaml --stdin -log.level debug
# cat my.log | promtail --stdin --client.url http://127.0.0.1:3100/loki/api/v1/push --client.external-labels=k1=v1,k2=v2
发现 promtail 性能比 filebeat 好,promtail 还收集了所有的 .log 日志, filebeat 只收集了一部分:
top - 21:54:40 up 104 days, 1:06, 1 user, load average: 30.98, 31.73, 32.47
Tasks: 472 total, 2 running, 470 sleeping, 0 stopped, 0 zombie
%Cpu(s): 67.4/10.8 78[|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
KiB Mem : 65806668 total, 3318964 free, 34830892 used, 27656812 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 30111884 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
22705 root 20 0 18.5g 5.8g 25760 S 389.1 9.3 1317:56 java
46827 root 20 0 2557116 371756 17212 S 238.3 0.6 17288:44 filebeat
44872 root 20 0 19.0g 9.2g 18904 S 169.6 14.6 25432:15 java
55659 root 20 0 8576336 102280 22444 S 160.4 0.2 243:31.13 promtail
监控逻辑
- 您的应用向 Promtail 跟踪的文件发出一条日志行
- Promtail 读取新行并增加其计数器
- Promtail 将日志行转发给 distributor,在该 distributor 中应增加收到的计数器
- distributor 将日志行转发到 ingester,请求持续时间计数器应在此增加