/* SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. * * Licensed to Elasticsearch B.V. under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch B.V. licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ using System; using System.Collections.Generic; using OpenSearch.Net.Utf8Json; namespace OpenSearch.Client.Specification.IndicesApi { /// /// A list of string references to stored token filters and/or inline token filter definitions /// [JsonFormatter(typeof(UnionListFormatter))] public class AnalyzeTokenFilters : List> { public AnalyzeTokenFilters() { } public AnalyzeTokenFilters(List> tokenFilters) { if (tokenFilters == null) return; foreach (var v in tokenFilters) this.AddIfNotNull(v); } public AnalyzeTokenFilters(string[] tokenFilters) { if (tokenFilters == null) return; foreach (var v in tokenFilters) this.AddIfNotNull(v); } public void Add(ITokenFilter filter) => Add(new Union(filter)); public static implicit operator AnalyzeTokenFilters(TokenFilterBase tokenFilter) => tokenFilter == null ? null : new AnalyzeTokenFilters { tokenFilter }; public static implicit operator AnalyzeTokenFilters(string tokenFilter) => tokenFilter == null ? null : new AnalyzeTokenFilters { tokenFilter }; public static implicit operator AnalyzeTokenFilters(string[] tokenFilters) => tokenFilters == null ? null : new AnalyzeTokenFilters(tokenFilters); } public class AnalyzeTokenFiltersDescriptor : DescriptorPromiseBase { public AnalyzeTokenFiltersDescriptor() : base(new AnalyzeTokenFilters()) { } /// /// A reference to a token filter that is part of the mapping /// public AnalyzeTokenFiltersDescriptor Name(string tokenFilter) => Assign(tokenFilter, (a, v) => a.AddIfNotNull(v)); private AnalyzeTokenFiltersDescriptor AssignIfNotNull(ITokenFilter filter) => Assign(filter, (a, v) => { if (v != null) a.Add(v); }); /// /// Token filters that allow to decompose compound words using a dictionary /// public AnalyzeTokenFiltersDescriptor DictionaryDecompounder( Func selector ) => AssignIfNotNull(selector?.Invoke(new DictionaryDecompounderTokenFilterDescriptor())); /// /// Token filters that allow to decompose compound words using FOP XML /// public AnalyzeTokenFiltersDescriptor HyphenationDecompounder( Func selector ) => AssignIfNotNull(selector?.Invoke(new HyphenationDecompounderTokenFilterDescriptor())); /// /// A token filter of type edgeNGram. /// public AnalyzeTokenFiltersDescriptor EdgeNGram(Func selector) => AssignIfNotNull(selector?.Invoke(new EdgeNGramTokenFilterDescriptor())); /// /// The phonetic token filter is provided as a plugin. /// public AnalyzeTokenFiltersDescriptor Phonetic(Func selector) => AssignIfNotNull(selector?.Invoke(new PhoneticTokenFilterDescriptor())); /// /// A token filter of type shingle that constructs shingles (token n-grams) from a token stream. /// In other words, it creates combinations of tokens as a single token. /// public AnalyzeTokenFiltersDescriptor Shingle(Func selector) => AssignIfNotNull(selector?.Invoke(new ShingleTokenFilterDescriptor())); /// /// A token filter of type stop that removes stop words from token streams. /// public AnalyzeTokenFiltersDescriptor Stop(Func selector) => AssignIfNotNull(selector?.Invoke(new StopTokenFilterDescriptor())); /// /// The synonym token filter allows to easily handle synonyms during the analysis process. /// public AnalyzeTokenFiltersDescriptor Synonym(Func selector) => AssignIfNotNull(selector?.Invoke(new SynonymTokenFilterDescriptor())); /// /// The synonym_graph token filter allows to easily handle synonyms, /// including multi-word synonyms correctly during the analysis process. /// public AnalyzeTokenFiltersDescriptor SynonymGraph(Func selector) => AssignIfNotNull(selector?.Invoke(new SynonymGraphTokenFilterDescriptor())); /// /// A token filter of type asciifolding that converts alphabetic, numeric, and symbolic Unicode characters which are /// not in the first 127 ASCII characters (the “Basic Latin” Unicode block) into their ASCII equivalents, if one exists. /// public AnalyzeTokenFiltersDescriptor WordDelimiter(Func selector) => AssignIfNotNull(selector?.Invoke(new WordDelimiterTokenFilterDescriptor())); /// /// A token filter of type asciifolding that converts alphabetic, numeric, and symbolic Unicode characters which are /// not in the first 127 ASCII characters (the “Basic Latin” Unicode block) into their ASCII equivalents, if one exists. /// public AnalyzeTokenFiltersDescriptor WordDelimiterGraph(Func selector ) => AssignIfNotNull(selector?.Invoke(new WordDelimiterGraphTokenFilterDescriptor())); /// /// A token filter of type asciifolding that converts alphabetic, numeric, and symbolic Unicode characters which are /// not in the first 127 ASCII characters (the “Basic Latin” Unicode block) into their ASCII equivalents, if one exists. /// public AnalyzeTokenFiltersDescriptor AsciiFolding(Func selector) => AssignIfNotNull(selector?.Invoke(new AsciiFoldingTokenFilterDescriptor())); /// /// Token filter that generates bigrams for frequently occuring terms. Single terms are still indexed. /// Note, common_words or common_words_path field is required. /// public AnalyzeTokenFiltersDescriptor CommonGrams(Func selector) => AssignIfNotNull(selector?.Invoke(new CommonGramsTokenFilterDescriptor())); /// /// Splits tokens into tokens and payload whenever a delimiter character is found. /// public AnalyzeTokenFiltersDescriptor DelimitedPayload(Func selector) => AssignIfNotNull(selector?.Invoke(new DelimitedPayloadTokenFilterDescriptor())); /// /// A token filter which removes elisions. For example, “l’avion” (the plane) will tokenized as “avion” (plane). /// public AnalyzeTokenFiltersDescriptor Elision(Func selector) => AssignIfNotNull(selector?.Invoke(new ElisionTokenFilterDescriptor())); /// /// Basic support for hunspell stemming. /// Hunspell dictionaries will be picked up from a dedicated hunspell directory on the filesystem. /// public AnalyzeTokenFiltersDescriptor Hunspell(Func selector) => AssignIfNotNull(selector?.Invoke(new HunspellTokenFilterDescriptor())); /// /// A token filter of type keep that only keeps tokens with text contained in a predefined set of words. /// public AnalyzeTokenFiltersDescriptor KeepTypes(Func selector) => AssignIfNotNull(selector?.Invoke(new KeepTypesTokenFilterDescriptor())); /// /// A token filter of type keep that only keeps tokens with text contained in a predefined set of words. /// public AnalyzeTokenFiltersDescriptor KeepWords(Func selector) => AssignIfNotNull(selector?.Invoke(new KeepWordsTokenFilterDescriptor())); /// /// Protects words from being modified by stemmers. Must be placed before any stemming filters. /// public AnalyzeTokenFiltersDescriptor KeywordMarker(Func selector) => AssignIfNotNull(selector?.Invoke(new KeywordMarkerTokenFilterDescriptor())); /// /// The kstem token filter is a high performance filter for english. /// All terms must already be lowercased (use lowercase filter) for this filter to work correctly. /// public AnalyzeTokenFiltersDescriptor KStem(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new KStemTokenFilterDescriptor())); /// /// A token filter of type length that removes words that are too long or too short for the stream. /// public AnalyzeTokenFiltersDescriptor Length(Func selector) => AssignIfNotNull(selector?.Invoke(new LengthTokenFilterDescriptor())); /// /// Limits the number of tokens that are indexed per document and field. /// public AnalyzeTokenFiltersDescriptor LimitTokenCount(Func selector) => AssignIfNotNull(selector?.Invoke(new LimitTokenCountTokenFilterDescriptor())); /// /// A token filter of type lowercase that normalizes token text to lower case. /// Lowercase token filter supports Greek and Turkish lowercase token filters through the language parameter. /// public AnalyzeTokenFiltersDescriptor Lowercase(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new LowercaseTokenFilterDescriptor())); /// /// A token filter of type nGram. /// public AnalyzeTokenFiltersDescriptor NGram(Func selector) => AssignIfNotNull(selector?.Invoke(new NGramTokenFilterDescriptor())); /// /// The pattern_capture token filter, unlike the pattern tokenizer, emits a token for every capture group in the regular expression. /// public AnalyzeTokenFiltersDescriptor PatternCapture(Func selector) => AssignIfNotNull(selector?.Invoke(new PatternCaptureTokenFilterDescriptor())); /// /// The pattern_replace token filter allows to easily handle string replacements based on a regular expression. /// public AnalyzeTokenFiltersDescriptor PatternReplace(Func selector) => AssignIfNotNull(selector?.Invoke(new PatternReplaceTokenFilterDescriptor())); /// /// A token filter of type porterStem that transforms the token stream as per the Porter stemming algorithm. /// public AnalyzeTokenFiltersDescriptor PorterStem(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new PorterStemTokenFilterDescriptor())); /// /// A token filter of type reverse that simply reverses the tokens. /// public AnalyzeTokenFiltersDescriptor Reverse(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new ReverseTokenFilterDescriptor())); /// /// A filter that stems words using a Snowball-generated stemmer. /// public AnalyzeTokenFiltersDescriptor Snowball(Func selector) => AssignIfNotNull(selector?.Invoke(new SnowballTokenFilterDescriptor())); /// public AnalyzeTokenFiltersDescriptor Condition(Func selector) => AssignIfNotNull(selector?.Invoke(new ConditionTokenFilterDescriptor())); /// /// A filter that stems words (similar to snowball, but with more options). /// public AnalyzeTokenFiltersDescriptor Stemmer(Func selector) => AssignIfNotNull(selector?.Invoke(new StemmerTokenFilterDescriptor())); /// public AnalyzeTokenFiltersDescriptor Predicate(Func selector) => AssignIfNotNull(selector?.Invoke(new PredicateTokenFilterDescriptor())); /// /// Overrides stemming algorithms, by applying a custom mapping, then protecting these terms from being modified by stemmers. Must be placed /// before any stemming filters. /// public AnalyzeTokenFiltersDescriptor StemmerOverride(Func selector) => AssignIfNotNull(selector?.Invoke(new StemmerOverrideTokenFilterDescriptor())); /// /// The trim token filter trims surrounding whitespaces around a token. /// public AnalyzeTokenFiltersDescriptor Trim(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new TrimTokenFilterDescriptor())); /// /// The truncate token filter can be used to truncate tokens into a specific length. This can come in handy with keyword (single token) /// based mapped fields that are used for sorting in order to reduce memory usage. /// public AnalyzeTokenFiltersDescriptor Truncate(Func selector) => AssignIfNotNull(selector?.Invoke(new TruncateTokenFilterDescriptor())); /// /// The unique token filter can be used to only index unique tokens during analysis. By default it is applied on all the token stream /// public AnalyzeTokenFiltersDescriptor Unique(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new UniqueTokenFilterDescriptor())); /// /// A token filter of type uppercase that normalizes token text to upper case. /// public AnalyzeTokenFiltersDescriptor Uppercase(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new UppercaseTokenFilterDescriptor())); /// /// A token filter of type fingerprint The fingerprint token filter that emits a single token which is useful /// for fingerprinting a body of text, and/or providing a token that can be clustered on. /// It does this by sorting the tokens, deduplicating and then concatenating them back into a single token. /// public AnalyzeTokenFiltersDescriptor Fingerprint(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new FingerprintTokenFilterDescriptor())); /// /// The kuromoji_stemmer token filter normalizes common katakana spelling variations ending in a /// long sound character by removing this character (U+30FC). Only full-width katakana characters are supported. /// Part of the `analysis-kuromoji` plugin: /// public AnalyzeTokenFiltersDescriptor KuromojiStemmer(Func selector = null) => AssignIfNotNull(selector.InvokeOrDefault(new KuromojiStemmerTokenFilterDescriptor())); /// /// The kuromoji_readingform token filter replaces the token with its reading form in either katakana or romaji. /// Part of the `analysis-kuromoji` plugin: /// public AnalyzeTokenFiltersDescriptor KuromojiReadingForm( Func selector ) => AssignIfNotNull(selector.Invoke(new KuromojiReadingFormTokenFilterDescriptor())); /// /// The kuromoji_part_of_speech token filter removes tokens that match a set of part-of-speech tags. /// Part of the `analysis-kuromoji` plugin: /// public AnalyzeTokenFiltersDescriptor KuromojiPartOfSpeech( Func selector ) => AssignIfNotNull(selector.Invoke(new KuromojiPartOfSpeechTokenFilterDescriptor())); /// /// Collations are used for sorting documents in a language-specific word order. The icu_collation token filter is available to all indices and /// defaults to using the DUCET collation, which is a best-effort attempt at language-neutral sorting. /// Part of the `analysis-icu` plugin: /// public AnalyzeTokenFiltersDescriptor IcuCollation(Func selector) => AssignIfNotNull(selector.Invoke(new IcuCollationTokenFilterDescriptor())); /// /// Case folding of Unicode characters based on UTR#30, like the ASCII-folding token filter on steroids. /// Part of the `analysis-icu` plugin: /// public AnalyzeTokenFiltersDescriptor IcuFolding(Func selector) => AssignIfNotNull(selector.Invoke(new IcuFoldingTokenFilterDescriptor())); /// /// Normalizes as defined here: http://userguide.icu-project.org/transforms/normalization /// Part of the `analysis-icu` plugin: /// public AnalyzeTokenFiltersDescriptor IcuNormalization(Func selector) => AssignIfNotNull(selector.Invoke(new IcuNormalizationTokenFilterDescriptor())); /// /// Transforms are used to process Unicode text in many different ways, such as case mapping, /// normalization, transliteration and bidirectional text handling. /// Part of the `analysis-icu` plugin: /// public AnalyzeTokenFiltersDescriptor IcuTransform(Func selector) => AssignIfNotNull(selector.Invoke(new IcuTransformTokenFilterDescriptor())); /// public AnalyzeTokenFiltersDescriptor NoriPartOfSpeech(Func selector) => AssignIfNotNull(selector.Invoke(new NoriPartOfSpeechTokenFilterDescriptor())); /// public AnalyzeTokenFiltersDescriptor Multiplexer(Func selector) => AssignIfNotNull(selector.Invoke(new MultiplexerTokenFilterDescriptor())); /// public AnalyzeTokenFiltersDescriptor RemoveDuplicates(Func selector) => AssignIfNotNull(selector.Invoke(new RemoveDuplicatesTokenFilterDescriptor())); } }