中文拼音混合检索案例

1.准备工作

从ik和拼音官方github地址下载并安装ik和拼音插件

ik:
https://github.com/medcl/elasticsearch-analysis-ik

pinyin:
https://github.com/medcl/elasticsearch-analysis-pinyin

2.定义拼音和ik分词机制的索引mapping

创建定义mapping 结构的dsl配置文件

创建xml文件-esmapper/estrace/pinyin.xml,定义名称为createDemoIndice的dsl配置:

  1. <property name="createDemoIndice">
  2. <![CDATA[{
  3. "settings": {
  4. "number_of_shards": 6,
  5. "index.refresh_interval": "5s",
  6. "analysis" : {
  7. "analyzer" : {
  8. "pinyin_analyzer" : {
  9. "tokenizer" : "my_pinyin"
  10. }
  11. },
  12. "tokenizer" : {
  13. "my_pinyin" : {
  14. "type" : "pinyin",
  15. "keep_separate_first_letter" : false,
  16. "keep_full_pinyin" : true,
  17. "keep_original" : true,
  18. "limit_first_letter_length" : 16,
  19. "lowercase" : true,
  20. "remove_duplicated_term" : true
  21. }
  22. }
  23. }
  24. },
  25. "mappings": {
  26. "demo": {
  27. "properties": {
  28. "contentbody": {
  29. "type": "text",
  30. "term_vector": "with_positions_offsets",
  31. "analyzer": "ik_max_word",
  32. "search_analyzer": "ik_max_word",
  33. "fields": {
  34. "keyword": {
  35. "type": "keyword"
  36. }
  37. }
  38. },
  39. "agentStarttime": {
  40. "type": "date",
  41. "format":"yyyy-MM-dd HH:mm:ss.SSS||yyyy-MM-dd'T'HH:mm:ss.SSS||yyyy-MM-dd HH:mm:ss||epoch_millis"
  42. },
  43. "applicationName": {
  44. "type": "text",
  45. "fields": {
  46. "keyword": {
  47. "type": "keyword"
  48. }
  49. }
  50. },
  51. "name": {
  52. "type": "text",
  53. "fields": {
  54. "pinyin": { ## 定义内置pinyin字段,采用拼音分词器
  55. "type": "text",
  56. "store": false,
  57. "term_vector": "with_offsets",
  58. "analyzer": "pinyin_analyzer",
  59. "boost": 10
  60. }
  61. }
  62. }
  63. }
  64. }
  65. }
  66. }]]>
  67. </property>

加载配置文件并创建demo索引结构

  1. public void testCreateDemoMapping(){
  2. ClientInterface clientUtil = ElasticSearchHelper.getConfigRestClientUtil("esmapper/estrace/pinyin.xml");
  3. try {
  4. //可以先删除索引mapping,重新初始化数据
  5. clientUtil.dropIndice("demo");
  6. } catch (ElasticSearchException e) {
  7. // TODO Auto-generated catch block
  8. e.printStackTrace();
  9. }
  10. //
  11. //创建索引表结构
  12. String response = clientUtil.createIndiceMapping("demo","createDemoIndice");
  13. // 获取并打印创建的索引表结构
  14. System.out.println(clientUtil.getIndice("demo"));
  15. }

3.添加测试数据

  1. ClientInterface clientUtil = ElasticSearchHelper.getRestClientUtil();
  2. List<Demo> demos = new ArrayList<>();
  3. Demo demo = new Demo();
  4. demo.setDemoId(2l);
  5. demo.setAgentStarttime(new Date());
  6. demo.setApplicationName("blackcatdemo2");
  7. demo.setContentbody("this is content body2");
  8. demo.setName("刘德华");
  9. demos.add(demo);
  10. demo = new Demo();
  11. demo.setDemoId(3l);
  12. demo.setAgentStarttime(new Date());
  13. demo.setApplicationName("blackcatdemo3");
  14. demo.setContentbody("四大天王,这种文化很好,中华人民共和国");
  15. demo.setName("张学友");
  16. demos.add(demo);
  17. //创建模板
  18. String response = clientUtil.addDocuments("demo",//索引表
  19. "demo",//索引类型
  20. demos);
  21. System.out.println("addDocuments-------------------------");
  22. System.out.println(response);
  23. //验证创建的两条索引记录
  24. response = clientUtil.getDocument("demo",//索引表
  25. "demo",//索引类型
  26. "2");
  27. System.out.println("getDocument-------------------------");
  28. System.out.println(response);
  29. demo = clientUtil.getDocument("demo",//索引表
  30. "demo",//索引类型
  31. "3",//文档id
  32. Demo.class);

4.拼音检索

定义拼音检索dsl

在之前定义的pinyin.xml文件中新增dsl配置-searchPinyinDemo

  1. <property name="searchPinyinDemo"><![CDATA[{
  2. "size": 100,
  3. "query": {
  4. "bool": {
  5. "must": [
  6. {
  7. "match_phrase_prefix" : {
  8. "name" : {
  9. "query" : #[name],
  10. "max_expansions" : 10
  11. }
  12. }
  13. }
  14. ]
  15. }
  16. }
  17. }]]></property>

执行拼音检索操作

  1. @Test
  2. public void searchPinyinDemo(){
  3. ClientInterface clientUtil = ElasticSearchHelper.getConfigRestClientUtil("esmapper/estrace/pinyin.xml");
  4. Map<String,String> params = new HashMap<String,String>();
  5. params.put("name","zhang学友");//设置中文拼音混合检索条件
  6. ESDatas<Map> esDatas = clientUtil.searchList("demo/_search","searchPinyinDemo",params,Map.class);
  7. List<Map> datas = esDatas.getDatas();
  8. long totalSize = esDatas.getTotalSize();
  9. }