/* * SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ /* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. */ package org.opensearch.index.mapper; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.tests.analysis.MockLowerCaseFilter; import org.apache.lucene.tests.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.util.BytesRef; import org.opensearch.common.Strings; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.CharFilterFactory; import org.opensearch.index.analysis.CustomAnalyzer; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.LowercaseNormalizer; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.analysis.PreConfiguredTokenFilter; import org.opensearch.index.analysis.TokenFilterFactory; import org.opensearch.index.analysis.TokenizerFactory; import org.opensearch.index.termvectors.TermVectorsService; import org.opensearch.indices.analysis.AnalysisModule; import org.opensearch.plugins.AnalysisPlugin; import org.opensearch.plugins.Plugin; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Map; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; public class KeywordFieldMapperTests extends MapperTestCase { /** * Creates a copy of the lowercase token filter which we use for testing merge errors. */ public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin { @Override public List getPreConfiguredTokenFilters() { return singletonList(PreConfiguredTokenFilter.singleton("mock_other_lowercase", true, MockLowerCaseFilter::new)); } @Override public Map> getTokenizers() { return singletonMap( "keyword", (indexSettings, environment, name, settings) -> TokenizerFactory.newFactory( name, () -> new MockTokenizer(MockTokenizer.KEYWORD, false) ) ); } } @Override protected void writeFieldValue(XContentBuilder builder) throws IOException { builder.value("value"); } public final void testExistsQueryDocValuesDisabled() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); b.field("doc_values", false); if (randomBoolean()) { b.field("norms", false); } })); assertExistsQuery(mapperService); assertParseMinimalWarnings(); } public final void testExistsQueryDocValuesDisabledWithNorms() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); b.field("doc_values", false); b.field("norms", true); })); assertExistsQuery(mapperService); assertParseMinimalWarnings(); } @Override protected Collection getPlugins() { return singletonList(new MockAnalysisPlugin()); } @Override protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) { return new IndexAnalyzers( singletonMap("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())), Map.of( "lowercase", new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()), "other_lowercase", new NamedAnalyzer("other_lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()) ), singletonMap( "lowercase", new NamedAnalyzer( "lowercase", AnalyzerScope.INDEX, new CustomAnalyzer( TokenizerFactory.newFactory("lowercase", WhitespaceTokenizer::new), new CharFilterFactory[0], new TokenFilterFactory[] { new TokenFilterFactory() { @Override public String name() { return "lowercase"; } @Override public TokenStream create(TokenStream tokenStream) { return new LowerCaseFilter(tokenStream); } } } ) ) ) ); } @Override protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "keyword"); } @Override protected void assertParseMaximalWarnings() { assertWarnings("Parameter [boost] on field [field] is deprecated and will be removed in 3.0"); } protected void registerParameters(ParameterChecker checker) throws IOException { checker.registerConflictCheck("doc_values", b -> b.field("doc_values", false)); checker.registerConflictCheck("index", b -> b.field("index", false)); checker.registerConflictCheck("store", b -> b.field("store", true)); checker.registerConflictCheck("index_options", b -> b.field("index_options", "freqs")); checker.registerConflictCheck("null_value", b -> b.field("null_value", "foo")); checker.registerConflictCheck("similarity", b -> b.field("similarity", "boolean")); checker.registerConflictCheck("normalizer", b -> b.field("normalizer", "lowercase")); checker.registerUpdateCheck(b -> b.field("eager_global_ordinals", true), m -> assertTrue(m.fieldType().eagerGlobalOrdinals())); checker.registerUpdateCheck(b -> b.field("ignore_above", 256), m -> assertEquals(256, ((KeywordFieldMapper) m).ignoreAbove())); checker.registerUpdateCheck( b -> b.field("split_queries_on_whitespace", true), m -> assertEquals("_whitespace", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name()) ); // norms can be set from true to false, but not vice versa checker.registerConflictCheck("norms", b -> b.field("norms", true)); checker.registerUpdateCheck(b -> { b.field("type", "keyword"); b.field("norms", true); }, b -> { b.field("type", "keyword"); b.field("norms", false); }, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms())); checker.registerUpdateCheck(b -> b.field("boost", 2.0), m -> assertEquals(m.fieldType().boost(), 2.0, 0)); } public void testDefaults() throws Exception { XContentBuilder mapping = fieldMapping(this::minimalMapping); DocumentMapper mapper = createDocumentMapper(mapping); assertEquals(Strings.toString(mapping), mapper.mappingSource().toString()); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); assertEquals(new BytesRef("1234"), fields[0].binaryValue()); IndexableFieldType fieldType = fields[0].fieldType(); assertThat(fieldType.omitNorms(), equalTo(true)); assertFalse(fieldType.tokenized()); assertFalse(fieldType.stored()); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); assertThat(fieldType.storeTermVectors(), equalTo(false)); assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); assertEquals(DocValuesType.NONE, fieldType.docValuesType()); assertEquals(new BytesRef("1234"), fields[1].binaryValue()); fieldType = fields[1].fieldType(); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE)); assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); // used by TermVectorsService assertArrayEquals(new String[] { "1234" }, TermVectorsService.getValues(doc.rootDoc().getFields("field"))); } public void testIgnoreAbove() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "keyword").field("ignore_above", 5))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "elk"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); doc = mapper.parse(source(b -> b.field("field", "opensearch"))); fields = doc.rootDoc().getFields("field"); assertEquals(0, fields.length); } public void testNullValue() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); ParsedDocument doc = mapper.parse(source(b -> b.nullField("field"))); assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "keyword").field("null_value", "uri"))); doc = mapper.parse(source(b -> {})); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(0, fields.length); doc = mapper.parse(source(b -> b.nullField("field"))); fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); assertEquals(new BytesRef("uri"), fields[0].binaryValue()); } public void testEnableStore() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "keyword").field("store", true))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); assertTrue(fields[0].fieldType().stored()); } public void testDisableIndex() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "keyword").field("index", false))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); assertEquals(IndexOptions.NONE, fields[0].fieldType().indexOptions()); assertEquals(DocValuesType.SORTED_SET, fields[0].fieldType().docValuesType()); } public void testDisableDocValues() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "keyword").field("doc_values", false))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType()); } public void testIndexOptions() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "keyword").field("index_options", "freqs"))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); assertEquals(IndexOptions.DOCS_AND_FREQS, fields[0].fieldType().indexOptions()); for (String indexOptions : Arrays.asList("positions", "offsets")) { MapperParsingException e = expectThrows( MapperParsingException.class, () -> createMapperService(fieldMapping(b -> b.field("type", "keyword").field("index_options", indexOptions))) ); assertThat( e.getMessage(), containsString("Unknown value [" + indexOptions + "] for field [index_options] - accepted values are [docs, freqs]") ); } } public void testBoost() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> b.field("type", "keyword").field("boost", 2f))); assertThat(mapperService.fieldType("field").boost(), equalTo(2f)); assertWarnings("Parameter [boost] on field [field] is deprecated and will be removed in 3.0"); } public void testEnableNorms() throws IOException { DocumentMapper mapper = createDocumentMapper( fieldMapping(b -> b.field("type", "keyword").field("doc_values", false).field("norms", true)) ); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); assertFalse(fields[0].fieldType().omitNorms()); IndexableField[] fieldNamesFields = doc.rootDoc().getFields(FieldNamesFieldMapper.NAME); assertEquals(0, fieldNamesFields.length); } public void testConfigureSimilarity() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> b.field("type", "keyword").field("similarity", "boolean"))); MappedFieldType ft = mapperService.documentMapper().fieldTypes().get("field"); assertEquals("boolean", ft.getTextSearchInfo().getSimilarity().name()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> merge(mapperService, fieldMapping(b -> b.field("type", "keyword").field("similarity", "BM25"))) ); assertThat(e.getMessage(), containsString("Cannot update parameter [similarity] from [boolean] to [BM25]")); } public void testNormalizer() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "keyword").field("normalizer", "lowercase"))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "AbC"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); assertEquals(new BytesRef("abc"), fields[0].binaryValue()); IndexableFieldType fieldType = fields[0].fieldType(); assertThat(fieldType.omitNorms(), equalTo(true)); assertFalse(fieldType.tokenized()); assertFalse(fieldType.stored()); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); assertThat(fieldType.storeTermVectors(), equalTo(false)); assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); assertEquals(DocValuesType.NONE, fieldType.docValuesType()); assertEquals(new BytesRef("abc"), fields[1].binaryValue()); fieldType = fields[1].fieldType(); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE)); assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); } public void testParsesKeywordNestedEmptyObjectStrict() throws IOException { DocumentMapper defaultMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); MapperParsingException ex = expectThrows( MapperParsingException.class, () -> defaultMapper.parse(source(b -> b.startObject("field").endObject())) ); assertEquals( "failed to parse field [field] of type [keyword] in document with id '1'. " + "Preview of field's value: '{}'", ex.getMessage() ); } public void testParsesKeywordNestedListStrict() throws IOException { DocumentMapper defaultMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); MapperParsingException ex = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source(b -> { b.startArray("field"); { b.startObject(); { b.startArray("array_name").value("inner_field_first").value("inner_field_second").endArray(); } b.endObject(); } b.endArray(); }))); assertEquals( "failed to parse field [field] of type [keyword] in document with id '1'. " + "Preview of field's value: '{array_name=[inner_field_first, inner_field_second]}'", ex.getMessage() ); } public void testParsesKeywordNullStrict() throws IOException { DocumentMapper defaultMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); Exception e = expectThrows( MapperParsingException.class, () -> defaultMapper.parse(source(b -> b.startObject("field").nullField("field_name").endObject())) ); assertEquals( "failed to parse field [field] of type [keyword] in document with id '1'. " + "Preview of field's value: '{field_name=null}'", e.getMessage() ); } public void testUpdateNormalizer() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> b.field("type", "keyword").field("normalizer", "lowercase"))); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> merge(mapperService, fieldMapping(b -> b.field("type", "keyword").field("normalizer", "other_lowercase"))) ); assertEquals( "Mapper for [field] conflicts with existing mapper:\n" + "\tCannot update parameter [normalizer] from [lowercase] to [other_lowercase]", e.getMessage() ); } public void testSplitQueriesOnWhitespace() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { b.startObject("field").field("type", "keyword").endObject(); b.startObject("field_with_normalizer"); { b.field("type", "keyword"); b.field("normalizer", "lowercase"); b.field("split_queries_on_whitespace", true); } b.endObject(); })); MappedFieldType fieldType = mapperService.fieldType("field"); assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class)); KeywordFieldMapper.KeywordFieldType ft = (KeywordFieldMapper.KeywordFieldType) fieldType; Analyzer a = ft.getTextSearchInfo().getSearchAnalyzer(); assertTokenStreamContents(a.tokenStream("", "Hello World"), new String[] { "Hello World" }); fieldType = mapperService.fieldType("field_with_normalizer"); assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class)); ft = (KeywordFieldMapper.KeywordFieldType) fieldType; assertThat(ft.getTextSearchInfo().getSearchAnalyzer().name(), equalTo("lowercase")); assertTokenStreamContents( ft.getTextSearchInfo().getSearchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] { "hello", "world" } ); mapperService = createMapperService(mapping(b -> { b.startObject("field").field("type", "keyword").field("split_queries_on_whitespace", true).endObject(); b.startObject("field_with_normalizer"); { b.field("type", "keyword"); b.field("normalizer", "lowercase"); b.field("split_queries_on_whitespace", false); } b.endObject(); })); fieldType = mapperService.fieldType("field"); assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class)); ft = (KeywordFieldMapper.KeywordFieldType) fieldType; assertTokenStreamContents( ft.getTextSearchInfo().getSearchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] { "Hello", "World" } ); fieldType = mapperService.fieldType("field_with_normalizer"); assertThat(fieldType, instanceOf(KeywordFieldMapper.KeywordFieldType.class)); ft = (KeywordFieldMapper.KeywordFieldType) fieldType; assertThat(ft.getTextSearchInfo().getSearchAnalyzer().name(), equalTo("lowercase")); assertTokenStreamContents( ft.getTextSearchInfo().getSearchAnalyzer().analyzer().tokenStream("", "Hello World"), new String[] { "hello world" } ); } }