springboot集成elasticsearch实现文字和文档搜索

// 分页搜索es的用户信息 @GetMapping("/page") public TableDataInfo page(@RequestParam(name = "username", required = false) String username) { PageDomain pageDomain = TableSupport.buildPageRequest(); Integer pageNum = pageDomain.getPageNum(); Integer pageSize = pageDomain.getPageSize(); Pageable pageable = PageRequest.of(pageNum-1, pageSize); BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); boolQueryBuilder // 模糊搜索 .should(QueryBuilders.multiMatchQuery(userName,"username")); //高亮 HighlightBuilder highlightBuilder = new HighlightBuilder(); highlightBuilder.field(new HighlightBuilder.Field("userName")); highlightBuilder.requireFieldMatch(false);//多个高亮关闭 highlightBuilder.preTags("<span style='color:red'>"); highlightBuilder.postTags("</span>"); NativeSearchQueryBuilder builder = new NativeSearchQueryBuilder() .withHighlightBuilder(highlightBuilder) .withSort(SortBuilders.scoreSort().order(SortOrder.DESC)) .withPageable(pageable); if (StringUtils.isNotBlank(diagnosisParam)) { builder.withQuery(boolQueryBuilder); } NativeSearchQuery query = builder.build(); // 返回实际命中数 query.setTrackTotalHits(true); SearchHits<User> search = elasticsearchRestTemplate.search(query, User.class); List<User> list = new ArrayList<>(); for (org.springframework.data.elasticsearch.core.SearchHit<User> searchHit : search.getSearchHits()) { User user = searchHit.getContent(); if (CollectionUtils.isNotEmpty(searchHit.getHighlightField("username"))) { user.setUserName(String.join("", searchHit.getHighlightField("username"))); } list.add(user); } long totalHits = search.getTotalHits(); // 解决分页查询10000条数据后报错 if (totalHits > 10000) { totalHits = 10000; } TableDataInfo rspData = new TableDataInfo(); rspData.setCode(HttpStatus.SUCCESS); rspData.setMsg("成功"); rspData.setRows(list); rspData.setTotal(totalHits); return rspData; } // 分全文搜索文档内容 @GetMapping("/page") @ApiOperation("分全文搜索文档内容") public TableDataInfo page(@RequestParam(name = "content", required = false) String content) throws IOException, IllegalAccessException { List<FileSearchElastic> list = new ArrayList<>(); SearchSourceBuilder builder = new SearchSourceBuilder(); // 返回实际命中数 builder.trackTotalHits(true); //高亮 HighlightBuilder highlightBuilder = new HighlightBuilder(); highlightBuilder.field("attachment.content"); highlightBuilder.field("title"); highlightBuilder.requireFieldMatch(false);//多个高亮关闭 highlightBuilder.preTags("<span style='color:red'>"); highlightBuilder.postTags("</span>"); builder.highlighter(highlightBuilder); SearchRequest searchRequest = new SearchRequest("idx_file"); if (StringUtils.isNotBlank(content)) { BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); boolQueryBuilder.should(QueryBuilders.matchQuery("title", content)) .should(QueryBuilders.multiMatchQuery(content, "attachment.content")); builder.query(boolQueryBuilder); searchRequest.source(builder); } SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); long totalHits = 0L; if (search.getHits() != null) { for (SearchHit documentFields : search.getHits().getHits()) { Map<String, HighlightField> highlightFields = documentFields.getHighlightFields(); Map<String, Object> sourceAsMap = documentFields.getSourceAsMap(); sourceAsMap.putAll(BeanUtil.beanToMap(sourceAsMap.get("attachment"))); String content = getHighlightFieldInfo(highlightFields.get("attachment.content")); if (StringUtils.isNotBlank(content)) { sourceAsMap.put("content", content); } String title = getHighlightFieldInfo(highlightFields.get("title")); if (StringUtils.isNotBlank(title)) { sourceAsMap.put("title", title); } FileSearchElastic fileSearchElastic = new FileSearchElastic(); fileSearchElastic.setTitle(Objects.isNull(sourceAsMap.get("title")) ? "" : sourceAsMap.get("title").toString()); fileSearchElastic.setContent(Objects.isNull(sourceAsMap.get("content")) ? "" : sourceAsMap.get("content").toString()); list.add(fileSearchElastic); } totalHits = search.getHits().getTotalHits().value; } // 解决分页查询10000条数据后报错 if (totalHits > 10000) { totalHits = 10000; } TableDataInfo rspData = new TableDataInfo(); rspData.setCode(HttpStatus.SUCCESS); rspData.setMsg("成功"); rspData.setRows(list); rspData.setTotal(totalHits); return rspData; } private String getHighlightFieldInfo(HighlightField highlightField) { if (Objects.isNull(highlightField)) { return null; } Text[] fragments = highlightField.fragments(); StringBuilder nTitle = new StringBuilder(); for (Text fragment : fragments) { nTitle.append(fragment); } return nTitle.toString(); } // 实体 @Data @Document(indexName = "idx_file") public class FileSearchElastic { @Id private Long id; @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart") @ApiModelProperty(value = "标题", name = "title") private String title; @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart") @ApiModelProperty(value = "内容", name = "内容") private String content; }
上一篇:【AI知识】人工智能、机器学习、深度学习的概念与联系


下一篇:在AWS EMR上用Hive、Spark、Airflow构建一个高效的ETL程序-内存配置:合理配置 Spark 任务的内存分配,避免因内存不足导致的垃圾回收频繁或 OOM 错误。调整以下参数: spark.executor.memory:每个 Executor 的内存大小。 spark.driver.memory:Driver 的内存大小。 spark.memory.fraction:用于存储缓存数据的内存比例。 数据缓存:对于重复使用的数据集,可以使用 cache 或 persist 将数据存储在内存中,减少磁盘 I/O 操作。 垃圾回收调优:Spark 的 JVM 堆内存管理可能影响性能,调节 JVM 的垃圾回收参数可以提高性能。比如,增加 spark.executor.extraJavaOptions 来优化 GC(垃圾回收)设置。 避免不必要的 Shuffle 操作