Es7.x使用RestHighLevelClient进行聚合操作

参考:http://events.jianshu.io/p/cd27fe9cb642

           https://www.cnblogs.com/heyouxin/p/13865293.html

聚合操作分为指标聚合和分组聚合。RestHighLevelClient可以使用API方法也可以使用script脚本进行聚合。

1.API聚合方法

@Slf4j
public class EsAggsTest {


    public static void testAggs() throws IOException {
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        /**
         * 使用tag字段进行桶分组
         * 使用sum、avg进行指标聚合
         */
        TermsAggregationBuilder aggregationBuilder = AggregationBuilders.
                terms("tag_tr").field("tag").                   //桶分组
                subAggregation(AggregationBuilders.sum("sum_id").field("id")).  //求和
                subAggregation(AggregationBuilders.avg("avg_id").field("id")); //求平均值

        searchSourceBuilder.aggregation(aggregationBuilder);
        /**
         * 不输出原始数据
         */
        searchSourceBuilder.size(0);
        /**
         * 打印dsl语句
         */
        log.info("dsl:" + searchSourceBuilder.toString());
        /**
         * 设置索引以及填充语句
         */
        SearchRequest searchRequest = new SearchRequest();
        searchRequest.indices("test_demo");
        searchRequest.source(searchSourceBuilder);

        SearchResponse response = EsUtil.getRestHighLevelClient().search(searchRequest, RequestOptions.DEFAULT);
        /**
         * 解析数据,获取tag_tr的指标聚合参数。
         */
        Aggregations aggregations = response.getAggregations();
        ParsedStringTerms parsedStringTerms = aggregations.get("tag_tr");
        List<? extends Terms.Bucket> buckets = parsedStringTerms.getBuckets();
        for (Terms.Bucket bucket : buckets) {
            //key的数据
            String key = bucket.getKey().toString();
            long docCount = bucket.getDocCount();
            //获取数据
            Aggregations bucketAggregations = bucket.getAggregations();
            ParsedSum sumId = bucketAggregations.get("sum_id");
            ParsedAvg avgId = bucketAggregations.get("avg_id");
            System.out.println(key + ":" + docCount + "-" + sumId.getValue() + "-" + avgId.getValue());
        }
    }

    public static void main(String[] args) throws IOException {
        testAggs();
    }
}

输出语句:

09:49:54.950 [main] INFO com.tellme.Test.es.EsAggsTest - dsl:{"size":0,"aggregations":{"tag_tr":{"terms":{"field":"tag","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"sum_id":{"sum":{"field":"id"}},"avg_id":{"avg":{"field":"id"}}}}}}
疫情:5-24067.0-4813.4
军事:2-2010.0-1005.0
教育:2-2008.0-1004.0
婚姻:1-9001.0-9001.0

dsl语句:

{
    "size":0,
    "aggregations":{
        "tag_tr":{
            "terms":{
                "field":"tag",
                "size":10,
                "min_doc_count":1,
                "shard_min_doc_count":0,
                "show_term_doc_count_error":false,
                "order":[
                    {
                        "_count":"desc"
                    },
                    {
                        "_key":"asc"
                    }
                ]
            },
            "aggregations":{
                "sum_id":{
                    "sum":{
                        "field":"id"
                    }
                },
                "avg_id":{
                    "avg":{
                        "field":"id"
                    }
                }
            }
        }
    }
}

即先对tag字段进行分组聚合,而后对各组的数据进行指标聚合。

3. script聚合方式

    /**
     * 脚本聚合
     */
    public static void testScript() throws IOException {
        Script script = new Script("doc['tag']+'-'+doc['publishTime']");
//        CardinalityAggregationBuilder aggregationBuilder = AggregationBuilders.cardinality("user").script(script);
        TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("tag_tr").script(script);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder.aggregation(aggregationBuilder);
        //不输出元素数据
        searchSourceBuilder.size(0);

        SearchRequest searchRequest=new SearchRequest();
        searchRequest.indices("test_demo");
        searchRequest.source(searchSourceBuilder);
        log.info("dsl:" + searchSourceBuilder.toString());

        SearchResponse response = EsUtil.getRestHighLevelClient().search(searchRequest, RequestOptions.DEFAULT);

        Aggregations aggregations = response.getAggregations();
        //获取配置
        ParsedStringTerms parsedStringTerms = aggregations.get("tag_tr");
        List<? extends Terms.Bucket> buckets = parsedStringTerms.getBuckets();
        for (Terms.Bucket bucket : buckets) {
            //key的数据
            String key = bucket.getKey().toString();
            long docCount = bucket.getDocCount();
            //获取数据
            System.out.println(key + ":" + docCount);
        }
    }

转换为dsl:

{
    "size":0,
    "aggregations":{
        "tag_tr":{
            "terms":{
                "script":{
                    "source":"doc['tag']+'-'+doc['publishTime']",
                    "lang":"painless"
                },
                "size":10,
                "min_doc_count":1,
                "shard_min_doc_count":0,
                "show_term_doc_count_error":false,
                "order":[
                    {
                        "_count":"desc"
                    },
                    {
                        "_key":"asc"
                    }
                ]
            }
        }
    }
}

输出结果:

[军事]-[2021-05-13T17:33:14.000Z]:2
[教育]-[2021-05-13T17:33:14.000Z]:2
[疫情]-[2021-05-13T17:33:14.000Z]:2
[婚姻]-[2021-05-17T15:15:07.000Z]:1
[疫情]-[2021-05-14T11:40:06.000Z]:1
[疫情]-[2021-05-14T17:44:37.000Z]:1
[疫情]-[2021-05-14T17:50:42.000Z]:1

上一篇:Laravel框架门面Facade源码分析


下一篇:2018-2019 ACM-ICPC, Asia Shenyang Regional Contest C. Insertion Sort(组合计数)