/* * SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ /* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. */ package org.opensearch.analysis.common; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.settings.Settings; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.index.IndexSettings; import org.opensearch.index.query.Operator; import org.opensearch.plugins.Plugin; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import static org.opensearch.client.Requests.searchRequest; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; import static org.opensearch.index.query.QueryBuilders.boolQuery; import static org.opensearch.index.query.QueryBuilders.matchAllQuery; import static org.opensearch.index.query.QueryBuilders.matchPhrasePrefixQuery; import static org.opensearch.index.query.QueryBuilders.matchPhraseQuery; import static org.opensearch.index.query.QueryBuilders.matchQuery; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.search.builder.SearchSourceBuilder.highlight; import static org.opensearch.search.builder.SearchSourceBuilder.searchSource; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHighlight; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.startsWith; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; public class HighlighterWithAnalyzersTests extends OpenSearchIntegTestCase { @Override protected Collection> nodePlugins() { return Arrays.asList(CommonAnalysisModulePlugin.class); } public void testNgramHighlightingWithBrokenPositions() throws IOException { assertAcked( prepareCreate("test").setMapping( jsonBuilder().startObject() .startObject("properties") .startObject("name") .field("type", "text") .startObject("fields") .startObject("autocomplete") .field("type", "text") .field("analyzer", "autocomplete") .field("search_analyzer", "search_autocomplete") .field("term_vector", "with_positions_offsets") .endObject() .endObject() .endObject() .endObject() .endObject() ) .setSettings( Settings.builder() .put(indexSettings()) .put(IndexSettings.MAX_NGRAM_DIFF_SETTING.getKey(), 19) .put("analysis.tokenizer.autocomplete.max_gram", 20) .put("analysis.tokenizer.autocomplete.min_gram", 1) .put("analysis.tokenizer.autocomplete.token_chars", "letter,digit") .put("analysis.tokenizer.autocomplete.type", "ngram") .put("analysis.filter.wordDelimiter.type", "word_delimiter") .putList( "analysis.filter.wordDelimiter.type_table", "& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM", "? => ALPHANUM", ". => ALPHANUM", "- => ALPHANUM", "# => ALPHANUM", "% => ALPHANUM", "+ => ALPHANUM", ", => ALPHANUM", "~ => ALPHANUM", ": => ALPHANUM", "/ => ALPHANUM", "^ => ALPHANUM", "$ => ALPHANUM", "@ => ALPHANUM", ") => ALPHANUM", "( => ALPHANUM", "] => ALPHANUM", "[ => ALPHANUM", "} => ALPHANUM", "{ => ALPHANUM" ) .put("analysis.filter.wordDelimiter.type.split_on_numerics", false) .put("analysis.filter.wordDelimiter.generate_word_parts", true) .put("analysis.filter.wordDelimiter.generate_number_parts", false) .put("analysis.filter.wordDelimiter.catenate_words", true) .put("analysis.filter.wordDelimiter.catenate_numbers", true) .put("analysis.filter.wordDelimiter.catenate_all", false) .put("analysis.analyzer.autocomplete.tokenizer", "autocomplete") .putList("analysis.analyzer.autocomplete.filter", "lowercase", "wordDelimiter") .put("analysis.analyzer.search_autocomplete.tokenizer", "whitespace") .putList("analysis.analyzer.search_autocomplete.filter", "lowercase", "wordDelimiter") ) ); client().prepareIndex("test").setId("1").setSource("name", "ARCOTEL Hotels Deutschland").get(); refresh(); SearchResponse search = client().prepareSearch("test") .setQuery(matchQuery("name.autocomplete", "deut tel").operator(Operator.OR)) .highlighter(new HighlightBuilder().field("name.autocomplete")) .get(); assertHighlight(search, 0, "name.autocomplete", 0, equalTo("ARCOTEL Hotels Deutschland")); } public void testMultiPhraseCutoff() throws IOException { /* * MultiPhraseQuery can literally kill an entire node if there are too many terms in the * query. We cut off and extract terms if there are more than 16 terms in the query */ assertAcked( prepareCreate("test").setMapping( "body", "type=text,analyzer=custom_analyzer," + "search_analyzer=custom_analyzer,term_vector=with_positions_offsets" ) .setSettings( Settings.builder() .put(indexSettings()) .put("analysis.filter.wordDelimiter.type", "word_delimiter") .put("analysis.filter.wordDelimiter.type.split_on_numerics", false) .put("analysis.filter.wordDelimiter.generate_word_parts", true) .put("analysis.filter.wordDelimiter.generate_number_parts", true) .put("analysis.filter.wordDelimiter.catenate_words", true) .put("analysis.filter.wordDelimiter.catenate_numbers", true) .put("analysis.filter.wordDelimiter.catenate_all", false) .put("analysis.analyzer.custom_analyzer.tokenizer", "whitespace") .putList("analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter") ) ); ensureGreen(); client().prepareIndex("test") .setId("1") .setSource( "body", "Test: http://www.facebook.com http://elasticsearch.org " + "http://xing.com http://cnn.com http://quora.com http://twitter.com this is " + "a test for highlighting feature Test: http://www.facebook.com " + "http://elasticsearch.org http://xing.com http://cnn.com http://quora.com " + "http://twitter.com this is a test for highlighting feature" ) .get(); refresh(); SearchResponse search = client().prepareSearch() .setQuery(matchPhraseQuery("body", "Test: http://www.facebook.com ")) .highlighter(new HighlightBuilder().field("body").highlighterType("fvh")) .get(); assertHighlight(search, 0, "body", 0, startsWith("Test: http://www.facebook.com")); search = client().prepareSearch() .setQuery( matchPhraseQuery( "body", "Test: http://www.facebook.com " + "http://elasticsearch.org http://xing.com http://cnn.com " + "http://quora.com http://twitter.com this is a test for highlighting " + "feature Test: http://www.facebook.com http://elasticsearch.org " + "http://xing.com http://cnn.com http://quora.com http://twitter.com this " + "is a test for highlighting feature" ) ) .highlighter(new HighlightBuilder().field("body").highlighterType("fvh")) .execute() .actionGet(); assertHighlight( search, 0, "body", 0, equalTo( "Test: " + "http://www.facebook.com http://elasticsearch.org " + "http://xing.com http://cnn.com http://quora.com" ) ); } public void testSynonyms() throws IOException { Settings.Builder builder = Settings.builder() .put(indexSettings()) .put("index.analysis.analyzer.synonym.tokenizer", "standard") .putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase") .put("index.analysis.filter.synonym.type", "synonym") .putList("index.analysis.filter.synonym.synonyms", "fast,quick"); assertAcked( prepareCreate("test").setSettings(builder.build()) .setMapping( "field1", "type=text,term_vector=with_positions_offsets,search_analyzer=synonym," + "analyzer=standard,index_options=offsets" ) ); ensureGreen(); client().prepareIndex("test").setId("0").setSource("field1", "The quick brown fox jumps over the lazy dog").get(); refresh(); for (String highlighterType : new String[] { "plain", "fvh", "unified" }) { logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1"); SearchSourceBuilder source = searchSource().query(matchQuery("field1", "quick brown fox").operator(Operator.AND)) .highlighter(highlight().field("field1").order("score").preTags("").postTags("").highlighterType(highlighterType)); SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); source = searchSource().query(matchQuery("field1", "fast brown fox").operator(Operator.AND)) .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("test").source(source)).actionGet(); assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); } } public void testPhrasePrefix() throws IOException { Settings.Builder builder = Settings.builder() .put(indexSettings()) .put("index.analysis.analyzer.synonym.tokenizer", "standard") .putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase") .put("index.analysis.filter.synonym.type", "synonym") .putList("index.analysis.filter.synonym.synonyms", "quick => fast"); assertAcked(prepareCreate("first_test_index").setSettings(builder.build()).setMapping(type1TermVectorMapping())); ensureGreen(); client().prepareIndex("first_test_index") .setId("0") .setSource("field0", "The quick brown fox jumps over the lazy dog", "field1", "The quick brown fox jumps over the lazy dog") .get(); client().prepareIndex("first_test_index") .setId("1") .setSource("field1", "The quick browse button is a fancy thing, right bro?") .get(); refresh(); logger.info("--> highlighting and searching on field0"); SearchSourceBuilder source = searchSource().query(matchAllQuery()); SearchResponse searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); assertHitCount(searchResponse, 2); source = searchSource().query(matchPhrasePrefixQuery("field0", "bro")) .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); source = searchSource().query(matchPhrasePrefixQuery("field0", "quick bro")) .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); logger.info("--> highlighting and searching on field1"); source = searchSource().query( boolQuery().should(matchPhrasePrefixQuery("field1", "test")).should(matchPhrasePrefixQuery("field1", "bro")) ).highlighter(highlight().field("field1").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); assertThat(searchResponse.getHits().getTotalHits().value, equalTo(2L)); for (int i = 0; i < 2; i++) { assertHighlight( searchResponse, i, "field1", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), equalTo("The quick brown fox jumps over the lazy dog") ) ); } source = searchSource().query(matchPhrasePrefixQuery("field1", "quick bro")) .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); assertHighlight( searchResponse, 0, "field1", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), equalTo("The quick brown fox jumps over the lazy dog") ) ); assertHighlight( searchResponse, 1, "field1", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), equalTo("The quick brown fox jumps over the lazy dog") ) ); assertAcked( prepareCreate("second_test_index").setSettings(builder.build()) .setMapping( "field4", "type=text,term_vector=with_positions_offsets,analyzer=synonym", "field3", "type=text,analyzer=synonym" ) ); // with synonyms client().prepareIndex("second_test_index") .setId("0") .setSource( "type", "type2", "field4", "The quick brown fox jumps over the lazy dog", "field3", "The quick brown fox jumps over the lazy dog" ) .get(); client().prepareIndex("second_test_index") .setId("1") .setSource("type", "type2", "field4", "The quick browse button is a fancy thing, right bro?") .get(); client().prepareIndex("second_test_index").setId("2").setSource("type", "type2", "field4", "a quick fast blue car").get(); refresh(); source = searchSource().postFilter(termQuery("type", "type2")) .query(matchPhrasePrefixQuery("field3", "fast bro")) .highlighter(highlight().field("field3").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); logger.info("--> highlighting and searching on field4"); source = searchSource().postFilter(termQuery("type", "type2")) .query(matchPhrasePrefixQuery("field4", "the fast bro")) .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); assertHighlight( searchResponse, 0, "field4", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), equalTo("The quick brown fox jumps over the lazy dog") ) ); assertHighlight( searchResponse, 1, "field4", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), equalTo("The quick brown fox jumps over the lazy dog") ) ); logger.info("--> highlighting and searching on field4"); source = searchSource().postFilter(termQuery("type", "type2")) .query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")) .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); assertHighlight( searchResponse, 0, "field4", 0, 1, anyOf(equalTo("a quick fast blue car"), equalTo("a quick fast blue car")) ); } public static XContentBuilder type1TermVectorMapping() throws IOException { return XContentFactory.jsonBuilder() .startObject() .startObject("_doc") .startObject("properties") .startObject("field1") .field("type", "text") .field("term_vector", "with_positions_offsets") .endObject() .startObject("field2") .field("type", "text") .field("term_vector", "with_positions_offsets") .endObject() .endObject() .endObject() .endObject(); } }