ES支持中文&&全拼&&拼音首字母搜索

环境

  • Ubuntu18.04
  • ES 6.6.1

搜索模板

新建模板,便于后续创建索引,直接使用模板,省事方便

PUT _template/goods
{
  "index_patterns":"goods*",
  "settings": {
    "index.number_of_replicas": "1",
    "index.number_of_shards": "5",
    "index.translog.flush_threshold_size": "512mb",
    "index.translog.sync_interval": "60s",
    "index.codec": "best_compression",
    "analysis": {
      "filter": {
        "edge_ngram_filter": {
          "type": "edge_ngram",
          "min_gram": 1,
          "max_gram": 50
        },
        "simple_pinyin_filter": {
          "type": "pinyin",
          "keep_first_letter": true,
          "keep_separate_first_letter": false,
          "keep_full_pinyin": false,
          "keep_original": false,
          "limit_first_letter_length": 50,
          "lowercase": true
        },
        "full_pinyin_filter": {
          "type": "pinyin",
          "keep_first_letter": false,
          "keep_separate_first_letter": false,
          "keep_full_pinyin": true,
          "none_chinese_pinyin_tokenize": true,
          "keep_original": false,
          "limit_first_letter_length": 50,
          "lowercase": true
        }
      },
      "char_filter": {
        "charconvert": {
          "type": "mapping",
          "mappings_path": "char_filter_text.txt"
        }
      },
      "tokenizer": {
        "ik_max_word": {
          "type": "ik_max_word",
          "use_smart": true
        }
      },
      "analyzer": {
        "ngramIndexAnalyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "edge_ngram_filter",
            "lowercase"
          ],
          "char_filter": [
            "charconvert"
          ]
        },
        "ngramSearchAnalyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase"
          ],
          "char_filter": [
            "charconvert"
          ]
        },
        "ikIndexAnalyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "char_filter": [
            "charconvert"
          ]
        },
        "ikSearchAnalyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "char_filter": [
            "charconvert"
          ]
        },
        "simplePinyinIndexAnalyzer": {
          "tokenizer": "keyword",
          "filter": [
            "simple_pinyin_filter",
            "edge_ngram_filter",
            "lowercase"
          ]
        },
        "simplePinyinSearchAnalyzer": {
          "tokenizer": "keyword",
          "filter": [
            "simple_pinyin_filter",
            "lowercase"
          ]
        },
        "fullPinyinIndexAnalyzer": {
          "tokenizer": "keyword",
          "filter": [
            "full_pinyin_filter",
            "edge_ngram_filter",
            "lowercase"
          ]
        },
        "fullPinyinSearchAnalyzer": {
          "tokenizer": "keyword",
          "filter": [
            "full_pinyin_filter",
            "lowercase"
          ]
        }
      }
    }
  }
}

新建 char_filter_text.txt 存于 ES_HOME/config 目录下

新建索引

goods_v1 将使用上述模板(goods*),可直接使用模板中定义的 analyzer

DELETE goods_v1

PUT /goods_v1
{
  "mappings": {
    "doc":{
      "properties" : {
        "id":{
          "type":"long"
        },
        "name" : {
          "type": "text", 
          "analyzer": "ikIndexAnalyzer",
          "fields": {
            "ngram": {
              "type": "text", 
              "analyzer": "ngramIndexAnalyzer"
            },
            "SPY": {
              "type": "text", 
              "analyzer": "simplePinyinIndexAnalyzer"
            },
            "FPY": {
              "type": "text", 
              "analyzer": "fullPinyinIndexAnalyzer"
            }
          }
        },
        "update_time" : {
          "type":"date"
        },
        "deleted" : {
          "type":"boolean"
        }
      }
    }
  }
}

PUT /goods_v1/_alias/goods
{ 
  "is_write_index":true
}

filter&&结果id倒序

本搜索只做简单过滤,并且按id倒序返回,不需要计算相似度

GET goods/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "filter": [
            {
              "term": {
                "deleted": {
                  "value": false,
                  "boost": 1
                }
              }
            },
            {
              "bool": {
                "should": [
                  {
                    "match": {
                      "name.ngram": {
                        "query": "水果",
                        "operator": "OR",
                        "analyzer": "ngramSearchAnalyzer",
                        "prefix_length": 0,
                        "max_expansions": 50,
                        "fuzzy_transpositions": true,
                        "lenient": false,
                        "zero_terms_query": "NONE",
                        "auto_generate_synonyms_phrase_query": true,
                        "boost": 5
                      }
                    }
                  },
                  {
                    "term": {
                      "name.SPY": {
                        "value": "水果",
                        "boost": 1
                      }
                    }
                  },
                  {
                    "wildcard": {
                      "name.SPY": {
                        "wildcard": "*水果*",
                        "boost": 0.8
                      }
                    }
                  },
                  {
                    "match_phrase": {
                      "name.FPY": {
                        "query": "水果",
                        "analyzer": "fullPinyinSearchAnalyzer",
                        "slop": 0,
                        "zero_terms_query": "NONE",
                        "boost": 1
                      }
                    }
                  },
                  {
                    "match": {
                      "name": {
                        "query": "水果",
                        "operator": "OR",
                        "analyzer": "ikSearchAnalyzer",
                        "prefix_length": 0,
                        "max_expansions": 50,
                        "minimum_should_match": "100%",
                        "fuzzy_transpositions": true,
                        "lenient": false,
                        "zero_terms_query": "NONE",
                        "auto_generate_synonyms_phrase_query": true,
                        "boost": 1
                      }
                    }
                  }
                ],
                "adjust_pure_negative": true,
                "boost": 1
              }
            }
          ],
          "adjust_pure_negative": true,
          "boost": 1
        }
      },
      "boost": 1
    }
  },
  "sort": [
    {
      "id": {
        "order": "desc"
      }
    }
  ]
}

根据score返回结果

多个query结果,取相似度score最大的返回

GET goods/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "dis_max": {
            "tie_breaker": 0,
            "queries": [
              {
                "match": {
                  "name.ngram": {
                    "query": "水果",
                    "operator": "OR",
                    "analyzer": "ngramSearchAnalyzer",
                    "prefix_length": 0,
                    "max_expansions": 50,
                    "fuzzy_transpositions": true,
                    "lenient": false,
                    "zero_terms_query": "NONE",
                    "auto_generate_synonyms_phrase_query": true,
                    "boost": 5
                  }
                }
              },
              {
                "term": {
                  "name.SPY": {
                    "value": "水果",
                    "boost": 1
                  }
                }
              },
              {
                "wildcard": {
                  "name.SPY": {
                    "wildcard": "*水果*",
                    "boost": 0.8
                  }
                }
              },
              {
                "match_phrase": {
                  "name.FPY": {
                    "query": "水果",
                    "analyzer": "fullPinyinSearchAnalyzer",
                    "slop": 0,
                    "zero_terms_query": "NONE",
                    "boost": 1
                  }
                }
              },
              {
                "match": {
                  "name": {
                    "query": "水果",
                    "operator": "OR",
                    "analyzer": "ikSearchAnalyzer",
                    "prefix_length": 0,
                    "max_expansions": 50,
                    "minimum_should_match": "100%",
                    "fuzzy_transpositions": true,
                    "lenient": false,
                    "zero_terms_query": "NONE",
                    "auto_generate_synonyms_phrase_query": true,
                    "boost": 1
                  }
                }
              }
            ],
            "boost": 1
          }
        }
      ],
      "filter": [
        {
          "term": {
            "deleted": {
              "value": false,
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  }
}

部分相关java代码

public class SearchRequest {
    public int page;
    public int size;
}

public class GoodsRequest extends SearchRequest {
    public String keyword;
}

private BoolQueryBuilder addFilter(QueryBuilder baseQueryBuilder, GoodsRequest request) {
    BoolQueryBuilder boolQueryBuilder = getBaseFilterQueryBuilder(request);
    if (baseQueryBuilder != null) {
        boolQueryBuilder.must(baseQueryBuilder);
    }
    return boolQueryBuilder;
}

private BoolQueryBuilder getBaseFilterQueryBuilder() {
    BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
    boolQueryBuilder.filter(QueryBuilders.termQuery("deleted", false));
    return boolQueryBuilder;
}

private BoolQueryBuilder getKeywordQueryBuilder(String keyword) {
    if (StringUtils.isEmpty(keyword)) {
        return null;
    }

    BoolQueryBuilder keywordQueryBuilder = QueryBuilders.boolQuery();
    QueryBuilder ngramSearchBuilder = QueryBuilders
        .matchQuery("name.ngram", keyword)
        .analyzer("ngramSearchAnalyzer")
        .boost(5f);
    keywordQueryBuilder.should(ngramSearchBuilder);
  
    TermQueryBuilder simplePinYinQueryBuilder = QueryBuilders.termQuery("name.SPY", keyword);
    keywordQueryBuilder.should(simplePinYinQueryBuilder);
        
    QueryBuilder simplePinYinContainQueryBuilder = QueryBuilders
        .wildcardQuery("name.SPY", "*" + keyword + "*")
        .boost(0.8f);
    keywordQueryBuilder.should(simplePinYinContainQueryBuilder);
        
    QueryBuilder fullPingYinQueryBuilder = QueryBuilders
        .matchPhraseQuery("name.FPY", keyword)
        .analyzer("fullPinyinSearchAnalyzer");
    keywordQueryBuilder.should(fullPingYinQueryBuilder);
        
    QueryBuilder containSearchBuilder = QueryBuilders
        .matchQuery("name", keyword)
        .analyzer("ikSearchAnalyzer")
        .minimumShouldMatch("100%");
    keywordQueryBuilder.should(containSearchBuilder);
    return keywordQueryBuilder;
}

// 返回满足搜索条件的结果,按id倒序
private SearchSourceBuilder getSearchBuilder(GoodsRequest request) {
    BoolQueryBuilder boolQueryBuilder =getBaseFilterQueryBuilder();
    BoolQueryBuilder keywordQueryBuilder = getKeywordQueryBuilder(request.keyword);
    if (keywordQueryBuilder != null) {
        boolQueryBuilder.filter(keywordQueryBuilder);
    }
    
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.query(QueryBuilders.constantScoreQuery(boolQueryBuilder));
    searchSourceBuilder.sort(SortBuilders.fieldSort("id").order(SortOrder.DESC));

    return searchSourceBuilder;
}

// 按相关度返回结果
private SearchSourceBuilder getSearchBuilderWithScore() {
    //使用dis_max直接取多个query中,分数最高的那一部分结果返回
    DisMaxQueryBuilder disMaxQueryBuilder = QueryBuilders.disMaxQuery();

    QueryBuilder ngramSearchBuilder = QueryBuilders
        .matchQuery("name.ngram", request.keyword)
        .analyzer("ngramSearchAnalyzer")
        .boost(5f);
    disMaxQueryBuilder.add(ngramSearchBuilder);

    TermQueryBuilder simplePinYinQueryBuilder = QueryBuilders.termQuery("name.SPY", request.keyword);
    disMaxQueryBuilder.add(simplePinYinQueryBuilder);

    QueryBuilder simplePinYinContainQueryBuilder = QueryBuilders
        .wildcardQuery("name.SPY", "*" + request.keyword + "*")
        .boost(0.8f);
    disMaxQueryBuilder.add(simplePinYinContainQueryBuilder);

    QueryBuilder fullPingYinQueryBuilder = QueryBuilders
        .matchPhraseQuery("name.FPY", request.keyword)
        .analyzer("fullPinyinSearchAnalyzer");
    disMaxQueryBuilder.add(fullPingYinQueryBuilder);

    QueryBuilder containSearchBuilder = QueryBuilders
        .matchQuery("name", request.keyword)
        .analyzer("ikSearchAnalyzer")
        .minimumShouldMatch("100%");
    disMaxQueryBuilder.add(containSearchBuilder);

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.query(addFilter(disMaxQueryBuilder, request));

    return searchSourceBuilder;
}

遇到问题

---未完待续---

参考

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 218,122评论 6 505
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 93,070评论 3 395
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 164,491评论 0 354
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 58,636评论 1 293
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 67,676评论 6 392
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 51,541评论 1 305
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 40,292评论 3 418
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 39,211评论 0 276
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 45,655评论 1 314
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,846评论 3 336
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,965评论 1 348
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 35,684评论 5 347
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 41,295评论 3 329
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,894评论 0 22
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 33,012评论 1 269
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 48,126评论 3 370
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,914评论 2 355