ElasticSearch索引字段检索时使其不区分大小写

官网指南:https://www.elastic.co/guide/en/elasticsearch/reference/current/normalizer.html

 

在 Elasticsearch 中处理字符串类型的数据时,如果我们想把整个字符串作为一个完整的 term 存储,我们通常会将其类型 type 设定为 keyword。但有时这种设定又会给我们带来麻烦,比如同一个数据再写入时由于没有做好清洗,导致大小写不一致,比如 appleApple两个实际都是 apple,但当我们去搜索 apple时却无法返回 Apple的文档。要解决这个问题,就需要 Normalizer出场了。废话不多说,直接上手看!

 

静态映射创建索引

PUT test
{
  "settings": {
    "analysis": {
      "normalizer": {
        "my_normalizer": {
          "type": "custom",
          "char_filter": [],
          "filter": ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "foo": {
        "type": "keyword",
        "normalizer": "my_normalizer"
      }
    }
  }
}

准备数据

PUT test/_doc/1
{
  "foo": "BÀR"
}

PUT test/_doc/2
{
  "foo": "bar"
}

PUT test/_doc/3
{
  "foo": "baz"
}

测试效果

GET test/_search
{
  "query": {
    "term": {
      "foo": "BAR"
    }
  }
}

GET test/_search
{
  "query": {
    "match": {
      "foo": "BAR"
    }
  }
}

实战创建索引demo:

{
  "settings": {
    "number_of_replicas": 1,
    "number_of_shards": 3,
    "refresh_interval": "1s",
    "translog": {
      "flush_threshold_size": "1.6gb"
    },
    "merge": {
      "scheduler": {
        "max_thread_count": "1"
      }
    },
    "index": {
      "routing": {
        "allocation": {
          "total_shards_per_node": "2"
        }
      }
    },
    "analysis": {
      "normalizer": {
        "my_normalizer": {
          "type": "custom",
          "filter": [
            "lowercase",
            "asciifolding"
          ]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "huid": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "standard_name": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      },
      "hcode": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "name": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      },
      "name_segments": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      },
      "name_segments_loc": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      },
      "pcode": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "label": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "hcreatetime": {
        "index": true,
        "store": true,
        "format": "yyyy-MM-dd HH:mm:ss",
        "type": "date"
      },
      "hupdatetime": {
        "index": true,
        "store": true,
        "format": "yyyy-MM-dd HH:mm:ss",
        "type": "date"
      },
      "create_by": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "update_by": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "hisvalid": {
        "index": true,
        "store": true,
        "type": "integer"
      },
      "src": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "SEC_HCODE": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      },
      "SEC_TYPE": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "EXCH_HCODE": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "COMB_SYMBOL": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "CNAME": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      },
      "CSNAME_PINYIN_FSIM": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      },
      "CSNAME": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "ENAME": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "ESNAME": {
        "index": true,
        "store": true,
        "type": "keyword"
      },
      "is_mstr_name": {
        "index": true,
        "store": true,
        "type": "integer"
      },
      "tag": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      },
      "name_rinse": {
        "index": true,
        "store": true,
        "type": "keyword",
        "normalizer": "my_normalizer"
      }
    }
  }
}

 

上一篇:Elasticsearch(4)


下一篇:elasticSearch Alternatively use a keyword field instead.