Elasticsearch教程(32) ES 聚合查询后过滤 Distinct Group By Having功能

一、之前写的关于ES聚合的博客

Elasticsearch教程(3) ES聚合查询DSL

Elasticsearch教程(4) High Level REST Client API 查询 聚合 分组

Elasticsearch教程(5) 指标聚合 SQL DSL JavaAPI

Elasticsearch教程(6) 桶聚合Query DSL-Terms Aggregation

Elasticsearch教程(10) ES term terms prefix 搜索 聚合查询 详细总结

Elasticsearch教程(11) elasticsearch 桶聚合 Query DSL

二、测试数据

PUT /pigg/_doc/1
{
  "name": "老亚瑟",
  "age": 30,
  "sex": "男",
  "group": "日落圣殿",
  "tag":["战士", "坦克"],
  "date": "2019-12-26",
  "friend": "安琪拉"
}

PUT /pigg/_doc/2
{
  "name": "安琪拉",
  "age": 16,
  "sex": "女",
  "group": "日落圣殿",
  "tag":["法师"],
  "date": "2019-01-01",
  "friend": ""
}

PUT /pigg/_doc/3
{
  "name": "凯",
  "age": 28,
  "sex": "男",
  "group": "长城守*",
  "tag":["战士"],
  "date": "2020-01-01"
}

PUT /pigg/_doc/4
{
  "name": "盾山",
  "age": 38,
  "sex": "男",
  "group": "长城守*",
  "tag":["辅助", "坦克"],
  "date": "2020-02-02"
}

PUT /pigg/_doc/5
{
  "name": "百里守约",
  "age": 18,
  "sex": "男",
  "group": "长城守*",
  "tag":["射手"],
  "date": "2020-03-03"
}

PUT /pigg/_doc/6
{
  "name": "李元芳",
  "age": 15,
  "sex": "男",
  "group": "长安",
  "tag":["刺客"],
  "date": "2020-03-23"
}

PUT /pigg/_doc/7
{
  "name": "陈咬金",
  "age": 40,
  "sex": "男",
  "group": "长安",
  "tag":["战士", "坦克"]
}

三、分组聚合

  • select count(1) from table group by…
GET /pigg/_search
{
   "size": 0, 
   "aggs": {
     "count_of_group": {
       "terms": {
         "field": "group.keyword"
       }
     }
   }
}

返回如下:

  "aggregations" : {
    "count_of_group" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "长城守*",
          "doc_count" : 3
        },
        {
          "key" : "日落圣殿",
          "doc_count" : 2
        },
        {
          "key" : "长安",
          "doc_count" : 2
        }
      ]
    }
  }

四、先过滤,后聚合

  • select count(1) from table where … group by…

先过滤查询数据的范围,然后再聚合,hits是过滤后的数据

GET /pigg/_search
{
   "size": 10, 
   "query": {
     "bool": {
       "filter": [
         {
           "term": {
             "sex.keyword": "男"
           }
         }
       ]
     }
   }, 
   "aggs": {
     "count_of_group": {
       "terms": {
         "field": "group.keyword"
       }
     }
   }
}

五、先filter聚合,再内嵌terms聚合

这个聚合结果和上面一样,不同的是这个hits里面是filter之前的全部数据

GET /pigg/_search
{
  "size": 10, 
  "aggs": {
    "group_of_man": {
      "filter": {
        "term": {
          "sex.keyword": "男"
        }
      },
      "aggs": {
        "count_of_group": {
          "terms": {
            "field": "group.keyword",
            "size": 10
          }
        }
      }
    }
  }
}

六、先filter聚合,再内嵌terms聚合,再内嵌avg聚合

select count(1) avg(age) from table where … group by…

GET /pigg/_search
{
  "size": 10, 
  "aggs": {
    "group_of_man": {
      "filter": {
        "term": {
          "sex.keyword": "男"
        }
      },
      "aggs": {
        "count_of_group": {
          "terms": {
            "field": "group.keyword",
            "size": 10
          },
          "aggs": {
            "avg_age": {
              "avg": {
                "field": "age"
              }
            }
          }
        }
      }
    }
  }
}
 "buckets" : [
          {
            "key" : "长城守*",
            "doc_count" : 3,
            "avg_age" : {
              "value" : 28.0
            }
          },
          {
            "key" : "长安",
            "doc_count" : 2,
            "avg_age" : {
              "value" : 27.5
            }
          },
          {
            "key" : "日落圣殿",
            "doc_count" : 1,
            "avg_age" : {
              "value" : 30.0
            }
          }
]

七、先filter聚合,再内嵌terms聚合,再内嵌avg聚合,再根据平均年龄排序

select count(1) avg(age) from table where … group by… order by avg_age asc

GET /pigg/_search
{
  "size": 10, 
  "aggs": {
    "group_of_man": {
      "filter": {
        "term": {
          "sex.keyword": "男"
        }
      },
      "aggs": {
        "count_of_group": {
          "terms": {
            "field": "group.keyword",
            "size": 10,
            "order": {
              "avg_age": "asc"
            }
          },
          "aggs": {
            "avg_age": {
              "avg": {
                "field": "age"
              }
            }
          }
        }
      }
    }
  }
}

八、实现Having功能 having count

select count(1) avg(age) from table where … group by… having count > 1

GET /pigg/_search
{
  "size": 10, 
  "aggs": {
    "group_of_man": {
      "filter": {
        "term": {
          "sex.keyword": "男"
        }
      },
      "aggs": {
        "count_of_group": {
          "terms": {
            "field": "group.keyword",
            "size": 10
          },
          "aggs": {
            "having": {
              "bucket_selector": {
                "buckets_path": {
                  "count_of_group": "_count"
                },
                "script": {
                  "source": "params.count_of_group > 1"
                }
              }
            }
          }
        }
      }
    }
  }
}

九、实现Having功能 having avg

select count(1) avg(age) from table where … group by… having avg_age > 28

GET /pigg/_search
{
    "size":10,
    "aggs":{
        "group_of_man":{
            "filter":{
                "term":{
                    "sex.keyword":"男"
                }
            },
            "aggs":{
                "count_of_group":{
                    "terms":{
                        "field":"group.keyword",
                        "size":10
                    },
                    "aggs":{
                        "avg_age":{
                            "avg":{
                                "field":"age"
                            }
                        },
                        "avg_age_filter":{
                            "bucket_selector":{
                                "buckets_path":{
                                    "avg_age":"avg_age"
                                },
                                "script":{
                                    "source":"params.avg_age > 28"
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

十、实现distinct去重

select count(distinct(group)) as count_of_distinct_group from pigg group by sex

GET /pigg/_search
{
    "size":10,
    "aggs":{
        "count_of_group":{
            "terms":{
                "field":"sex.keyword"
            },
            "aggs":{
                "count_of_distinct_group":{
                    "cardinality":{
                        "field":"group.keyword"
                    }
                }
            }
        }
    }
}

上一篇:Leetcode 1484题: Group Sold Products By The Date


下一篇:spark | 手把手教你用spark进行数据预处理