1. elasticsearch 配置类
public class ElasticSearchClientConfig { @Bean public RestHighLevelClient restHighLevelClient(){ RestHighLevelClient client = new RestHighLevelClient( RestClient.builder(new HttpHost("127.0.0.1", 9200, "http")) ); return client; } }1. 实体类
@Data @NoArgsConstructor @AllArgsConstructor @Builder public class Content { private String img; private String price; private String title; }2. 爬虫数据代码
public class HtmlParseUtil { public List<Content> parse(String keyWord)throws Exception{ String url = "https://search.jd.com/Search?keyword="+ keyWord; //解析网页 Document document = Jsoup.parse(new URL(url), 30000); //js中可以使用的方法 Element element = document.getElementById("J_goodsList"); ArrayList<Content> contentList = new ArrayList<>(); //获取元素中的内容 Elements elements = element.getElementsByTag("li"); for (Element el : elements) { String img = el.getElementsByTag("img").eq(0).attr("src"); String price = el.getElementsByClass("p-price").eq(0).text(); String title = el.getElementsByClass("p-name").eq(0).text(); Content build = Content.builder() .img(img) .price(price) .title(title) .build(); contentList.add(build); } return contentList; } }3.业务层
@Autowired private RestHighLevelClient restHighLevelClient; public Boolean parse(String keyWords) throws Exception { HtmlParseUtil htmlParseUtil = new HtmlParseUtil(); //把查询的数据放入到es中 BulkRequest bulkRequest = new BulkRequest(); List<Content> contents = htmlParseUtil.parse(keyWords); for (int i = 0; i < contents.size(); i++) { bulkRequest.add( new IndexRequest("goods") .id(""+(i+1)) .source(JSON.toJSONString(contents.get(i)), XContentType.JSON) ); } BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); return !bulk.hasFailures(); } }5. Controller层
@RestController public class ContentController { @Autowired private ContentService contentService; @RequestMapping("/parse") public Boolean parse(String keyWords) throws Exception { return contentService.parse(keyWords); } }6.测试结果
7.es查询数据
public List<Map<String,Object>> searchGoods(String keyWords,int pageNo,int pageSize) throws IOException { //创建搜索条件 SearchRequest searchRequest = new SearchRequest("goods"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); //分页 sourceBuilder.from(pageNo); sourceBuilder.size(pageSize); //精确匹配 TermQueryBuilder queryBuilder = QueryBuilders.termQuery("title", keyWords); sourceBuilder.query(queryBuilder); //执行搜索 searchRequest.source(sourceBuilder); SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); //解析结果 ArrayList<Map<String,Object>> list = new ArrayList<>(); for (SearchHit searchHit : searchResponse.getHits().getHits()) { list.add(searchHit.getSourceAsMap()); } return list; }8.访问测试
@RequestMapping("/searchGoods") public List<Map<String,Object>> searchGoods(@RequestParam("keyWords")String keyWords, @RequestParam("pageNo")int pageNo, @RequestParam("pageSize")int pageSize) throws IOException { return contentService.searchGoods(keyWords,pageNo,pageSize); }9.实现高亮效果
public List<Map<String,Object>> searchHigth(String keyWords,int pageNo,int pageSize) throws IOException { //创建搜索条件 SearchRequest searchRequest = new SearchRequest("goods"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); //分页 sourceBuilder.from(pageNo); sourceBuilder.size(pageSize); //高亮 HighlightBuilder highlightBuilder = new HighlightBuilder(); highlightBuilder.field("title"); //多个高亮显示 highlightBuilder.requireFieldMatch(false); highlightBuilder.preTags("<span style='color:red'>"); highlightBuilder.postTags("</span>"); //精确匹配 TermQueryBuilder queryBuilder = QueryBuilders.termQuery("title", keyWords); sourceBuilder.query(queryBuilder); //执行搜索 searchRequest.source(sourceBuilder); SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); //解析结果 ArrayList<Map<String,Object>> list = new ArrayList<>(); for (SearchHit searchHit : searchResponse.getHits().getHits()) { Map<String, HighlightField> highlightFields = searchHit.getHighlightFields(); HighlightField title = highlightFields.get("title"); Map<String, Object> sourceAsMap = searchHit.getSourceAsMap(); //解析高亮的字段,将原来的字段换为我们高亮的字段 if (title != null){ Text[] fragments = title.fragments(); String nTitle = ""; for (Text text : fragments) { nTitle += text; } sourceAsMap.put("title",nTitle); } list.add(sourceAsMap); } return list; }