ps. 以下请求体对应es 6.x版本下的使用
假设我们有一个如下mapping的索引
{
"properties": {
"id": {
"type": "keyword"
},
"submitAt": {
"type": "date"
},
"user": {
"properties": {
"id": {
"type": "integer"
}
},
"label": {
"type": "keyword"
},
"comments": {
"type":"nested",
"properties":{
"title":{
"type": "keyword"
},
"submitAt":{
"type": "date"
}
}
}
1、简单词频统计(每个label出现的次数)
"aggs": {
"agg_result": {
"terms": {
"size": 1000,
"field": "label"
}
}
}
2、分组词频统计(按user分组统计每个label出现的次数)
"aggs": {
"agg_result": {
"terms": {
"size": 1000,
"field": "user.id"
},
"aggs": {
"sub_agg_result": {
"terms": {
"size": 1000,
"field": "label"
}
}
}
}
}
3、去重聚合(统计去重后的user人数)
"aggs": {
"agg_distinct": {
"cardinality": {
"field": "user.id"
}
}
}
4、按时间分组聚合(统计每天的数据量)
"aggs": {
"agg_result": {
"date_histogram": {
"field": "submitAt", //这里必须是date类型字段
"format": "yyyy-MM-dd",
"time_zone":"+08:00",
"interval": "day",
"min_doc_count": 0, //默认没有数据时的填充值
"extended_bounds":{"min":1564588800000, "max":1565679600000} //填充的时间范围,毫秒
}
}
}
5、对聚合结果过滤(类似sql中的Having,例如统计出现频率超过3次的label)
"aggs": {
"agg_result": {
"terms": {
"size": 1000,
"field": "label"
},
"aggs": {
"agg_having": {
"bucket_selector": {
"buckets_path": {
"agg_bucket_count" : "_count"
},
"script": "params.agg_bucket_count > 3"
}
}
}
}
}
6、对Nested 嵌套对象聚合
"aggs": {
"agg_result": {
"nested": {
"path": "comments"
},
"aggs": {
"sub_agg_result": {
"terms": {
"field": "comments.title"
}
}
}
}
}