参考:http://events.jianshu.io/p/cd27fe9cb642
https://www.cnblogs.com/heyouxin/p/13865293.html
聚合操作分为指标聚合和分组聚合。RestHighLevelClient可以使用API方法也可以使用script脚本进行聚合。
1.API聚合方法
@Slf4j
public class EsAggsTest {
public static void testAggs() throws IOException {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
/**
* 使用tag字段进行桶分组
* 使用sum、avg进行指标聚合
*/
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.
terms("tag_tr").field("tag"). //桶分组
subAggregation(AggregationBuilders.sum("sum_id").field("id")). //求和
subAggregation(AggregationBuilders.avg("avg_id").field("id")); //求平均值
searchSourceBuilder.aggregation(aggregationBuilder);
/**
* 不输出原始数据
*/
searchSourceBuilder.size(0);
/**
* 打印dsl语句
*/
log.info("dsl:" + searchSourceBuilder.toString());
/**
* 设置索引以及填充语句
*/
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices("test_demo");
searchRequest.source(searchSourceBuilder);
SearchResponse response = EsUtil.getRestHighLevelClient().search(searchRequest, RequestOptions.DEFAULT);
/**
* 解析数据,获取tag_tr的指标聚合参数。
*/
Aggregations aggregations = response.getAggregations();
ParsedStringTerms parsedStringTerms = aggregations.get("tag_tr");
List<? extends Terms.Bucket> buckets = parsedStringTerms.getBuckets();
for (Terms.Bucket bucket : buckets) {
//key的数据
String key = bucket.getKey().toString();
long docCount = bucket.getDocCount();
//获取数据
Aggregations bucketAggregations = bucket.getAggregations();
ParsedSum sumId = bucketAggregations.get("sum_id");
ParsedAvg avgId = bucketAggregations.get("avg_id");
System.out.println(key + ":" + docCount + "-" + sumId.getValue() + "-" + avgId.getValue());
}
}
public static void main(String[] args) throws IOException {
testAggs();
}
}
输出语句:
09:49:54.950 [main] INFO com.tellme.Test.es.EsAggsTest - dsl:{"size":0,"aggregations":{"tag_tr":{"terms":{"field":"tag","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"sum_id":{"sum":{"field":"id"}},"avg_id":{"avg":{"field":"id"}}}}}}
疫情:5-24067.0-4813.4
军事:2-2010.0-1005.0
教育:2-2008.0-1004.0
婚姻:1-9001.0-9001.0
dsl语句:
{
"size":0,
"aggregations":{
"tag_tr":{
"terms":{
"field":"tag",
"size":10,
"min_doc_count":1,
"shard_min_doc_count":0,
"show_term_doc_count_error":false,
"order":[
{
"_count":"desc"
},
{
"_key":"asc"
}
]
},
"aggregations":{
"sum_id":{
"sum":{
"field":"id"
}
},
"avg_id":{
"avg":{
"field":"id"
}
}
}
}
}
}
即先对tag字段进行分组聚合,而后对各组的数据进行指标聚合。
3. script聚合方式
/**
* 脚本聚合
*/
public static void testScript() throws IOException {
Script script = new Script("doc['tag']+'-'+doc['publishTime']");
// CardinalityAggregationBuilder aggregationBuilder = AggregationBuilders.cardinality("user").script(script);
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("tag_tr").script(script);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.aggregation(aggregationBuilder);
//不输出元素数据
searchSourceBuilder.size(0);
SearchRequest searchRequest=new SearchRequest();
searchRequest.indices("test_demo");
searchRequest.source(searchSourceBuilder);
log.info("dsl:" + searchSourceBuilder.toString());
SearchResponse response = EsUtil.getRestHighLevelClient().search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = response.getAggregations();
//获取配置
ParsedStringTerms parsedStringTerms = aggregations.get("tag_tr");
List<? extends Terms.Bucket> buckets = parsedStringTerms.getBuckets();
for (Terms.Bucket bucket : buckets) {
//key的数据
String key = bucket.getKey().toString();
long docCount = bucket.getDocCount();
//获取数据
System.out.println(key + ":" + docCount);
}
}
转换为dsl:
{
"size":0,
"aggregations":{
"tag_tr":{
"terms":{
"script":{
"source":"doc['tag']+'-'+doc['publishTime']",
"lang":"painless"
},
"size":10,
"min_doc_count":1,
"shard_min_doc_count":0,
"show_term_doc_count_error":false,
"order":[
{
"_count":"desc"
},
{
"_key":"asc"
}
]
}
}
}
}
输出结果:
[军事]-[2021-05-13T17:33:14.000Z]:2
[教育]-[2021-05-13T17:33:14.000Z]:2
[疫情]-[2021-05-13T17:33:14.000Z]:2
[婚姻]-[2021-05-17T15:15:07.000Z]:1
[疫情]-[2021-05-14T11:40:06.000Z]:1
[疫情]-[2021-05-14T17:44:37.000Z]:1
[疫情]-[2021-05-14T17:50:42.000Z]:1