org.elasticsearch.client elasticsearch-rest-high-level-client 7.9.3
无账号密码版
@Configuration
public class MyEsConfig {public static final RequestOptions COMMON_OPTIONS;static {RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();COMMON_OPTIONS = builder.build();}@Beanpublic RestHighLevelClient esRestClient() {RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(//new HttpHost("localhost", 9200, "http"),//集群配置法new HttpHost("localhost", 9200, "http")));return client;}
}
有密码版
@Configuration
public class MyEsConfig {public static final RequestOptions COMMON_OPTIONS;/*** 配置文件的配置* elasticsearch:* urls: 192.168.183.130:9200,192.168.183.131:9200* account: elastic* cypher: 123456*/@Value("${elasticsearch.urls}")private String urls;@Value("${elasticsearch.account}")private String account;@Value("${elasticsearch.cypher}")private String cypher;static {RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();COMMON_OPTIONS = builder.build();}@Beanpublic RestHighLevelClient esRestClient() {RestClientBuilder builder = null;String ipAddr = null;String[] urlArr;Integer port = null;if (!StringUtils.isBlank(urls)) {String[] urlsArr = urls.split(",");for (int i = 0; i < urlsArr.length; i++) {String url = urlsArr[i];urlArr = url.split(":");ipAddr = urlArr[0];port = (urlArr[1] == null ? 0 : Integer.parseInt(urlArr[1]));builder = RestClient.builder(new HttpHost(ipAddr, port, "http"));}}CredentialsProvider credentialsProvider = new BasicCredentialsProvider();credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(account, cypher));builder.setHttpClientConfigCallback(f -> f.setDefaultCredentialsProvider(credentialsProvider));RestHighLevelClient restHighLevelClient = new RestHighLevelClient(builder);return restHighLevelClient;}
}
Map map = new HashMap<>();map.put("recordDate", "data1");map.put("recordDateMillis", "data2");IndexRequest request = new IndexRequest("eslog");request.source(map);IndexResponse response = null;try {response = restHighLevelClient.index(request, MyEsConfig.COMMON_OPTIONS);if ("created".equalsIgnoreCase(response.getResult().name())) {return true;} else {return false;}} catch (IOException e) {e.printStackTrace();return false;}
String index = "eslog";BulkRequest request = new BulkRequest();list.stream().forEach(item ->{request.add(new IndexRequest(index).source(item));});try {BulkResponse bulk = restHighLevelClient.bulk(request, MyEsConfig.COMMON_OPTIONS);if(bulk.status().getStatus()==200){return R.ok("批量插入成功");}} catch (IOException e) {e.printStackTrace();return R.error("批量插入失败!");}
//1.创建DeleteByQueryRequestDeleteByQueryRequest request = new DeleteByQueryRequest();request.indices("eslog");//2、指定检索条件 身高 低于 170request.setQuery(QueryBuilders.rangeQuery("height").lte(170));try {//执行删除BulkByScrollResponse response =restHighLevelClient.deleteByQuery(request, ElasticsearchConfig.COMMON_OPTIONS);log.info("删除成功,返回:"+response);} catch (IOException e) {e.printStackTrace();}
public R queryByKeyword(Map map) {String keyWord = map.get("keyWord") == null ? null : (String) map.get("keyWord");Integer start = map.get("start") == null ? 0 : Integer.parseInt((String) map.get("start"));Integer limit = map.get("limit") == null ? 0 : Integer.parseInt((String) map.get("limit"));String startTime = map.get("startTime") == null ? null : (String) map.get("startTime");String endTime = map.get("endTime") == null ? null : (String) map.get("endTime");Long startMillis = 0L;Long endMillis = 0L;if (!StringUtils.isBlank(startTime)) {try {Date startTimeDate = sdf.parse(startTime);startMillis = startTimeDate.getTime();} catch (ParseException e) {e.printStackTrace();startMillis = 0L;}}if (!StringUtils.isBlank(endTime)) {try {Date endTimeDate = sdf.parse(endTime);endMillis = endTimeDate.getTime();} catch (ParseException e) {e.printStackTrace();endMillis = 0L;}}//1.创建请求SearchRequest request = new SearchRequest();request.indices("logsrecords");//2、创建请求参数SearchSourceBuilder ssb = new SearchSourceBuilder();//3、分页排序ssb.from(start - 1).size(limit).sort("recordDateMillis", SortOrder.DESC)//超时时间.timeout(new TimeValue(60, TimeUnit.SECONDS))//返回总数.trackTotalHits(true);//4.布尔条件查询BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();//关键字模糊查询--类似于 like %keyword%if (!StringUtils.isBlank(keyWord)) {//短语查询boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("name", keyWord)).should(QueryBuilders.matchPhraseQuery("job", keyWord));;}if (startMillis != null && startMillis != 0L) {//范围查询boolQueryBuilder.filter(QueryBuilders.rangeQuery("recordDateMillis").gt(startMillis).lte(endMillis));//精确匹配boolQueryBuilder.filter(QueryBuilders.termQuery("address", "北京"));}ssb.query(boolQueryBuilder);request.source(ssb);SearchResponse response = null;RestStatus status = null;Map data = new HashMap<>();List
根据单个id查询::类似于mysql的 where id=id1
GetRequest request = new GetRequest("eslog","oI9GVHQBH0SEUrtlhvX7");try {log.info("======"+request);GetResponse response = restHighLevelClient.get(request, ElasticsearchConfig.COMMON_OPTIONS);Map sourceAsMap = response.getSourceAsMap();return R.ok("查询成功").put("sourceAsMap",sourceAsMap);} catch (IOException e) {e.printStackTrace();}
根据多个id查询:类似于mysql的 where id in (id1,id2,id3)
SearchRequest request = new SearchRequest();request.indices("eslog");SearchSourceBuilder builder = new SearchSourceBuilder();BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();//核心部分boolQueryBuilder.filter(QueryBuilders.idsQuery().addIds("oI9GVHQBH0SEUrtlhvX7", "oY9HVHQBH0SEUrtlaPUO", "3Fz9aHQBxI7zG-AK_rLc"));builder.query(boolQueryBuilder);request.source(builder);List
前缀查询,可以通过一个关键字去指定一个field的前缀,从而查询到指定文档
SearchRequest request = new SearchRequest();request.indices("eslog");SearchSourceBuilder builder = new SearchSourceBuilder();BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();boolQueryBuilder.filter(QueryBuilders.prefixQuery("name.keyword","王"));builder.query(boolQueryBuilder);request.source(builder);List
通配查询: 与MYSQL中的like是一样的,在查询时可以指定通配符和占位符
*是通配符 ?是占位符
SearchRequest request =new SearchRequest();request.indices("eslog");SearchSourceBuilder builder = new SearchSourceBuilder();BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();//boolQueryBuilder.filter(QueryBuilders.wildcardQuery("name.keyword","王*"));boolQueryBuilder.filter(QueryBuilders.wildcardQuery("name.keyword","王??"));builder.query(boolQueryBuilder);request.source(builder);log.info("======="+builder);List
高亮就是用户输入关键字,以一定特殊样式展示给用户,让用户知道为什么这个结果被检索出来
高亮展示数据,本身就是文档中的一个field,单独将field以highlight的形式返回给你
ES提供一个highlight属性,和query同级别
1、fragment_size:指定高亮数据展示多少个字符回来
2、pre_tags:指定前缀标签
3、post_tags:指定后缀标签
4、fields:指定哪个field以高亮形式返回
//查询方法public Map getKeyWordHighLightQuery(String keyWord) {Integer start = 0;Integer limit = 10;SearchRequest request = new SearchRequest();request.indices("eslog");SearchSourceBuilder ssb = new SearchSourceBuilder();BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();if (!StringUtils.isBlank(keyWord)) {/*** QueryBuilders.multiMatchQuery 多个字段field匹配一个值,模糊查询:* 第一参数是输入的要查询的值,后面的都是要匹配的字段*/boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("companyCode", keyWord)).should(QueryBuilders.matchPhraseQuery("loginType", keyWord)).should(QueryBuilders.matchPhraseQuery("userId", keyWord)).should(QueryBuilders.matchPhraseQuery("appid", keyWord)).should(QueryBuilders.matchPhraseQuery("loginIp", keyWord)).should(QueryBuilders.matchPhraseQuery("unitCode", keyWord)).should(QueryBuilders.matchPhraseQuery("id", keyWord)).should(QueryBuilders.matchPhraseQuery("userType", keyWord)).should(QueryBuilders.matchPhraseQuery("isInWhitlelist", keyWord)).should(QueryBuilders.matchPhraseQuery("env", keyWord));}ssb.query(boolQueryBuilder);ssb.trackTotalHits(true);/***设置高亮查询**/ssb.highlighter(getHighlightBuilder());request.source(ssb);SearchResponse response = null;List
构建高亮字段
/*** 高亮设置** @return*/private HighlightBuilder getHighlightBuilder() {HighlightBuilder highlightBuilder = new HighlightBuilder();highlightBuilder.preTags("");highlightBuilder.postTags("");/*** highlighterType可选:unified,plain和fvh* unified : 使用Lucene的统一highlighter。* 这个突出显示器将文本分成句子,并使用BM25算法对单个句子进行评分,* 就好像它们是语料库中的文档一样。它还支持准确的短语和多项(模糊,前缀,正则表达式)突出显示**plain highlighter最适合在单一领域突出简单的查询匹配。* 为了准确反映查询逻辑,它会创建一个微小的内存索引,* 并通过Lucene的查询执行计划程序重新运行原始查询条件,* 以访问当前文档的低级匹配信息。对于需要突出显示的每个字段和每个文档都会* 重复此操作。如果要在复杂查询的大量文档中突出显示很多字段,* 我们建议使用unified highlighter postings或term_vector字段**fvh highlighter使用Lucene的Fast Vector highlighter。此突出显示器可用于映射中term_vector设置为的* 字段with_positions_offsets。Fast Vector highlighter*/highlightBuilder.highlighterType("unified");/*** 这只高亮字段,我这里设置为要查询的字段一致*/highlightBuilder.field("companyCode").field("loginType").field("userId").field("appid").field("loginIp").field("unitCode").field("id").field("userType");//如果要多个字段高亮,这项要为falsehighlightBuilder.requireFieldMatch(false);/*** fragmentSize 设置要显示出来的fragment文本判断的长度,默认是100* numOfFragments 代表要显示几处高亮(可能会搜出多段高亮片段)。默认是5* noMatchSize 即使字段中没有关键字命中,也可以返回一段文字,该参数表示从开始多少个字符被返回*///highlightBuilder.fragmentSize(size).numOfFragments(3).noMatchSize(100);return highlightBuilder;}
返回值得高亮数据覆盖原来的数据
/**
*高亮结果返回:这里要做字段的部分高亮,比如 “我是中国人” 如果只匹配上“是中”,那么就只高亮这两个字其他不亮
*正常查询和高亮查询分开返回的,也就是高亮部分数据不会影响正常数据
*这里要用高亮数据覆盖正常返回数据,这样返回前端就是匹配的都是高亮显示了
**/private Map getHighLightMap(Map map, Map highlightFields) {HighlightField highlightCompanyCode = highlightFields.get("companyCode");HighlightField highlightLoginType = highlightFields.get("loginType");HighlightField highlightUserId = highlightFields.get("userId");HighlightField highlightAppid = highlightFields.get("appid");HighlightField highlightLoginIp = highlightFields.get("loginIp");HighlightField highlightUnitCode = highlightFields.get("unitCode");HighlightField highlightId = highlightFields.get("id");HighlightField highlightUserType = highlightFields.get("userType");boolean flag = false;if (highlightCompanyCode != null) {map.put("companyCode", highlightCompanyCode.fragments()[0].string());flag = true;}if (highlightLoginType != null) {map.put("loginType", highlightLoginType.fragments()[0].string());flag = true;}if (highlightUserId != null) {map.put("userId", highlightUserId.fragments()[0].string());flag = true;}if (highlightAppid != null) {map.put("appid", highlightAppid.fragments()[0].string());flag = true;}if (highlightLoginIp != null) {map.put("loginIp", highlightLoginIp.fragments()[0].string());flag = true;}if (highlightUnitCode != null) {map.put("unitCode", highlightUnitCode.fragments()[0].string());flag = true;}if (highlightId != null) {map.put("id", highlightId.fragments()[0].string());flag = true;}if (highlightUserType != null) {map.put("userType", highlightUserType.fragments()[0].string());flag = true;}/***从这里就是开始字段部分内容的高亮显示,如果并不需要这么细,注释掉下面if里面的内容和它调用的内容即可。**/if (map.size() > 0) {//遍历mapString replaceMentvalue = null;String mapValue = null;for (String key : map.keySet()) {mapValue = map.get(key) + "";if (!StringUtils.isBlank(mapValue)) {replaceMentvalue = subGetManyStr(mapValue, keyWord);map.put(key, replaceMentvalue);}}}return map;}/*** @Author liaochao* @Param input* @Param regex* 作用:关键字匹配不区分大小写,且给原字符加上样式* 获取字符串中包含另一个字符串(多个),且为原字符串加上样式,即高亮显示。* 例如:* 原字符串: "I like Java,jAva is very easy and jaVa is so popular.";* 关键字: "java";* 加样式后:* I like Java,jAva is very easy* and jaVa is so popular.*/public static String subGetManyStr(String input, String regex) {int length = regex.length();int indexNum;String substring;List originList = new ArrayList<>();originList.clear();List cssList = new ArrayList<>();cssList.clear();String strCss;String substr;while (true) {indexNum = input.toUpperCase().indexOf(regex.toUpperCase());if (indexNum != -1) {substring = input.substring(indexNum, indexNum + length);strCss = "" + substring + "";cssList.add(strCss);//获取每次新输入的第一段substr = input.substring(0, indexNum);originList.add(substr);} else {//最后一段originList.add(input);break;}input = input.substring(indexNum + length);}StringBuilder sb = new StringBuilder();for (int i = 0; i < originList.size(); i++) {if (i != originList.size() - 1) {sb.append(new StringBuilder(originList.get(i) + (cssList.get(i) == null ? "" : cssList.get(i))));} else {sb.append(new StringBuilder(originList.get(i)));}}return new String(sb);}
earchRequest request = new SearchRequest();request.indices("eslog");SearchSourceBuilder builder = new SearchSourceBuilder();//按照名字聚合builder.aggregation(AggregationBuilders.terms("agg").field("name.keyword").size(10));//按照平均身高聚合builder.aggregation(AggregationBuilders.avg("heightAvg").field("height"));request.source(builder);List
应用场景:数据清洗。例如:我要查询索引中的某些字段的长度大于20的全部数据,在数据清洗时常常要求删除字段长度大于多少的数据。怎么整
SearchRequest request = new SearchRequest();request.indices("eslog");SearchSourceBuilder ssb = new SearchSourceBuilder();Script script1 = new Script("doc['address.keyword'].value.length()>"+20);Script script2 = new Script("doc['job.keyword'].value.length()>"+20);BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();/***should是或者的意思**/boolQueryBuilder.should(QueryBuilders.scriptQuery(script1));boolQueryBuilder.should(QueryBuilders.scriptQuery(script2));ssb.query(boolQueryBuilder);request.source(ssb);SearchResponse response = null;List
在数据清洗时常常要求删除字段为空数据,这里包括索引中**没有这个字段** 和 **有这个字段但没值**。怎么查?mustNot,查出来的数据包括: 不存在字段 和 字段存在但没值
注:它的使用最好单独用不与其他条件混合,要不然不起作用,至于原因我也没查出来
/*** 1、创建请求,指定索引*/SearchRequest request = new SearchRequest();request.indices("eslog");/*** 2、创建请求参数*/SearchSourceBuilder ssb = new SearchSourceBuilder();ssb.from(0).size(100);/*** 3、组装查询条件:这里查出来的数据包括: 不存在字段 和字段存在但没值*/BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();boolQueryBuilder.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("age")));boolQueryBuilder.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("salary")));/*** 4、查询*/ssb.query(boolQueryBuilder);request.source(ssb);SearchResponse response = null;List
如果是es库里面已经重复,比较难处理。我这里是提供的从源头上去掉数据重复的办法
在es中有个自动生成的_id,每条数据都会自动生成,且不重复!
所以在要插入的数据里面拿出id(这里默认id是不重复的,类似mysql的主键),在插入数据时,用数据中的id替换掉es的自己生的_id,而es的_id是不可重复的,如果遇到重复的数据就插不进去(自动忽略),那么插入的数据就是不重复数据了。
用数据内容替换es的_id的办法:
插入数据时指定es的_id,其值id来源于业务数据中的id
request.id(id);