- 使用了 ELK 技术栈的产品,必然面临到从
elasticsearch
查询数据的需求,无论是通过程度访问,还是 kibana,或许简单的 kibana discover 搜索框,并不能满足你的查询需求。这时你会想到写dsl
查询elasticsearch
。- 然而,写惯了 sql 语句的你,可能在面临海量
elasticsearch
英文文档时,感觉即便是写一个等同于select name, max(score) from score where class_id=1 and time>'2020-01-01 00:00:00' and time<'2020-01-02 00:00:00 and type<>'practise' group by name;
都无从下手。- 本文将给出并讲解一个典型的 dsl 语句,即包含了常见的
where 条件
,又包含了常见的聚合分析,如:[dsl]terms([sql]group by)
,同时也包含更高级的painless
脚本,让你快速入门elasticsearch dsl 查询语句
。- 此外,
修行在个人
,入门到放弃
,还需要个人努力,如有不当,烦请支出,谢谢。
1. 数据结构(有改动)
假设下面的结构是我们在 elasticsearch 上存储的 nginx access
日志中一条日志,其 _source
值为:
{
"method":"POST",
"request":"/xxx/xxx/xxx?result_id=1&category=1&ver=1&access-token=111",
"prospector":{
"type":"log"
},
"upstream_cache_status":"-",
"message":"39.191.138.20 - - [17/Jan/2020:14:34:37 +0800] "POST /xxx/xxx/xxx?result_id=1&category=1&ver=1&access-token=111 HTTP/1.1" api2.xxx 200 82 "-" "Mozilla/5.0 (Linux; Android 8.1.0; vivo X20 Build/OPM1.171019.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36 singsound(com.haiyuntian.tz)/2.0.4" "-" unix:/var/run/php/php7.0-fpm.sock 200 - "application/json; charset=UTF-8" 0.064 > 0.062",
"http_referer":"-",
"body_bytes_sent":82,
"upstream_response_time":0.064,
"status":200,
"@timestamp":"2020-01-17T06:34:37.000Z",
"remote_addr":"39.191.138.20",
"tags":[
"beats_input_codec_plain_applied"
],
"request_time":0.062,
"@version":"1",
"fields":{
"type":"nginx_access_caidou"
},
"upstream_addr":"unix:/var/run/php/php7.0-fpm.sock",
"input":{
"type":"log"
},
"http_user_agent":"Mozilla/5.0 (Linux; Android 8.1.0; vivo X20 Build/OPM1.171019.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36 xxxx(com.xxxx.tz)/2.0.4",
"upstream_status":200,
"host":{
"name":"oem04.aliyun.xxxx"
},
"beat":{
"version":"6.3.2",
"hostname":"xxxx",
"name":"xxxx"
},
"http_x_forwarded_for":"-",
"upstream_http_content_type":"application/json; charset=UTF-8",
"read_timestamp":"2020-01-17T06:34:37.829Z",
"offset":4814687,
"http_version":"1.1",
"http_host":"api2.xxxx",
"remote_user":"-",
"source":"/var/log/nginx/api2.xxx_access.log"
}
2. 查询需求
现在需要查询,满足条件:
- 域名以
api2.
为前缀 - 返回的
content_type
为json
- 状态码不是
200
401
404
- 时间范围是
2020-01-01 00:00:00
到2020-01-17 00:00:00
的异常请求,得到:
- 异常请求发生在哪些域名下?(异常请求最多的10个域名)
- 异常请求的状态码都是什么?(异常请求最多的3个状态码)
- 异常请求分布在哪些接口上?(异常请求最多的3个接口)
- 异常请求的耗时都是多久?([0, 0.1] [0.1, 1] [1, 9999])
3. dsl
语句
{
"size":0,
"query":{
"bool":{
"must":[
{
"match_phrase":{
"upstream_http_content_type":"application/json; charset=UTF-8"
}
},
{
"match_phrase_prefix":{
"http_host":"api2."
}
}
],
"must_not":[
{
"match_phrase":{
"status":"200"
}
},
{
"match_phrase":{
"status":"401"
}
},
{
"match_phrase":{
"status":"404"
}
}
],
"filter":{
"range":{
"@timestamp":{
"gte":"2020-01-01 00:00:00",
"lt":"2020-01-17 00:00:00",
"time_zone":"+08:00",
"format":"yyyy-MM-dd HH:mm:ss"
}
}
}
}
},
"aggs":{
"terms_host":{
"terms":{
"field":"http_host.keyword",
"size":10
},
"aggs":{
"terms_status":{
"terms":{
"field":"status",
"size":3
},
"aggs":{
"terms_request":{
"terms":{
"size":3,
"script":{
"lang":"painless",
"source":"String request=doc['request.keyword'].toString();int pindex=request.indexOf('?');if(pindex>0){return request.substring(1,pindex);}else{if(request.length()>0){return request.substring(1,request.length()-1);}else{return request;}}"
}
},
"aggs":{
"range_request_time":{
"range":{
"field":"request_time",
"ranges":[
{
"from":0,
"to":0.1
},
{
"from":0.1,
"to":1
},
{
"from":1,
"to":9999
}
]
}
}
}
}
}
}
}
}
}
}
4. 查询结果(有改动)
{
"took": 398,
"timed_out": false,
"_shards": {
"total": 1000,
"successful": 1000,
"skipped": 915,
"failed": 0
},
"hits": {
"total": 113,
"max_score": 0,
"hits": []
},
"aggregations": {
"terms_host": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 17,
"buckets": [
{
"key": "api2.xxxxxx",
"doc_count": 36,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 500,
"doc_count": 36,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/user/account/login",
"doc_count": 36,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 18
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 18
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
},
{
"key": "api2.xxxxxx",
"doc_count": 11,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 500,
"doc_count": 11,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/",
"doc_count": 11,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 11
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 0
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
},
{
"key": "api2.xxxxxx",
"doc_count": 11,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 500,
"doc_count": 11,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/",
"doc_count": 11,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 8
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 3
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
},
{
"key": "api2.xxxxx",
"doc_count": 9,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 400,
"doc_count": 8,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/business/error/showJump",
"doc_count": 8,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 8
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 0
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
},
{
"key": 499,
"doc_count": 1,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/teacher/task/list",
"doc_count": 1,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 0
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 1
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
},
{
"key": "api2.xxxxxx",
"doc_count": 7,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 415,
"doc_count": 5,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/business/frontDynamic/Config",
"doc_count": 5,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 5
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 0
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
},
{
"key": 499,
"doc_count": 1,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/teacher/task/list",
"doc_count": 1,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 0
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 1
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
},
{
"key": 500,
"doc_count": 1,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/user/account/pclogin",
"doc_count": 1,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 1
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 0
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
},
{
"key": "api2.xxxxxx",
"doc_count": 6,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 500,
"doc_count": 6,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
]
}
},
{
"key": "api2.xxxxxx",
"doc_count": 5,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 499,
"doc_count": 3,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/teacher/task/list",
"doc_count": 3,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 0
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 3
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
},
{
"key": 500,
"doc_count": 2,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/user/account/login",
"doc_count": 2,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 2
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 0
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
},
{
"key": "api2.xxxxx",
"doc_count": 4,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 500,
"doc_count": 4,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/user/account/login",
"doc_count": 4,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 1
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 3
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
},
{
"key": "api2.xxxxxx",
"doc_count": 4,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 415,
"doc_count": 4,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/business/frontDynamic/Config",
"doc_count": 4,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 4
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 0
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
},
{
"key": "api2.xxxxxx",
"doc_count": 3,
"terms_status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 415,
"doc_count": 2,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/business/frontDynamic/Config",
"doc_count": 2,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 2
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 0
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
},
{
"key": 500,
"doc_count": 1,
"terms_request": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "/familywx/message/receive",
"doc_count": 1,
"range_request_time": {
"buckets": [
{
"key": "0.0-0.1",
"from": 0,
"to": 0.1,
"doc_count": 0
},
{
"key": "0.1-1.0",
"from": 0.1,
"to": 1,
"doc_count": 1
},
{
"key": "1.0-9999.0",
"from": 1,
"to": 9999,
"doc_count": 0
}
]
}
}
]
}
}
]
}
}
]
}
}
}
5. 关键点说明
- 上述
典型的 dsl
包含的套路是:通过query
查询符合条件的数据,然后通过nested-aggs
进行聚合分析,必要的时候,通过painless
对数据进行格式化。 - 在
query
查询符合条件的数据时,通过bool
的must
、must not
和filter
指定什么样的数据才是我们想要的结果集 (bool: true/false,想要/不想要)
,可以将第一行的"size": 0
修改一下,便于确认结果集中的数据是否正确。 -
nested aggs
部分,则是对query
部分的结果集进行聚合分析,elasticearch
支持的aggs type
很多,具体用法自行文档。上诉 dsl 中的 aggs 等同于:group by http_host, status, request
然后将request_time
放到 [0, 0.1]、[0.1, 1] 和 [1, 9999] 的区间中。 - 注意
terms_host
中的field
字段,使用了http_host.keyword
。其中的keyword
并非数据的层级关系(http_host 下的 keyword 字段),而是由于mapping
没有指定http_host
为可查询类型
,可能这样的说法并不严谨,自行文档吧。 - 上述 dsl 语句中的
painless
是处理叫复杂数据,进行格式化的利器。其语法与java
一致,而具体在painless
中支持的Java API
在官方文档上有详细说明,此处不再赘述。值得一提的是,弄清楚doc
是什么,往往是painless
的开始。 - 此外,在 kibana 中支持了很多
aggs
图形化的分析表格,如:Visualize
Dashboard
甚至Timelion
,个人理解:这些高级工具,在熟练DSL
语句后才能熟练使用。此处画个饼,如下图(有改动)。
接口请求量分析、状态码数量及占比分析
接口响应时间分析、响应时间占比分析
各域名请求量趋势图