/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
using System;
using System.Collections.Generic;
using OpenSearch.Net.Utf8Json;
namespace OpenSearch.Client.Specification.IndicesApi
{
///
/// A list of string references to stored token filters and/or inline token filter definitions
///
[JsonFormatter(typeof(UnionListFormatter))]
public class AnalyzeTokenFilters : List>
{
public AnalyzeTokenFilters() { }
public AnalyzeTokenFilters(List> tokenFilters)
{
if (tokenFilters == null) return;
foreach (var v in tokenFilters) this.AddIfNotNull(v);
}
public AnalyzeTokenFilters(string[] tokenFilters)
{
if (tokenFilters == null) return;
foreach (var v in tokenFilters) this.AddIfNotNull(v);
}
public void Add(ITokenFilter filter) => Add(new Union(filter));
public static implicit operator AnalyzeTokenFilters(TokenFilterBase tokenFilter) =>
tokenFilter == null ? null : new AnalyzeTokenFilters { tokenFilter };
public static implicit operator AnalyzeTokenFilters(string tokenFilter) =>
tokenFilter == null ? null : new AnalyzeTokenFilters { tokenFilter };
public static implicit operator AnalyzeTokenFilters(string[] tokenFilters) =>
tokenFilters == null ? null : new AnalyzeTokenFilters(tokenFilters);
}
public class AnalyzeTokenFiltersDescriptor : DescriptorPromiseBase
{
public AnalyzeTokenFiltersDescriptor() : base(new AnalyzeTokenFilters()) { }
///
/// A reference to a token filter that is part of the mapping
///
public AnalyzeTokenFiltersDescriptor Name(string tokenFilter) => Assign(tokenFilter, (a, v) => a.AddIfNotNull(v));
private AnalyzeTokenFiltersDescriptor AssignIfNotNull(ITokenFilter filter) =>
Assign(filter, (a, v) => { if (v != null) a.Add(v); });
///
/// Token filters that allow to decompose compound words using a dictionary
///
public AnalyzeTokenFiltersDescriptor DictionaryDecompounder(
Func selector
) =>
AssignIfNotNull(selector?.Invoke(new DictionaryDecompounderTokenFilterDescriptor()));
///
/// Token filters that allow to decompose compound words using FOP XML
///
public AnalyzeTokenFiltersDescriptor HyphenationDecompounder(
Func selector
) =>
AssignIfNotNull(selector?.Invoke(new HyphenationDecompounderTokenFilterDescriptor()));
///
/// A token filter of type edgeNGram.
///
public AnalyzeTokenFiltersDescriptor EdgeNGram(Func selector) =>
AssignIfNotNull(selector?.Invoke(new EdgeNGramTokenFilterDescriptor()));
///
/// The phonetic token filter is provided as a plugin.
///
public AnalyzeTokenFiltersDescriptor Phonetic(Func selector) =>
AssignIfNotNull(selector?.Invoke(new PhoneticTokenFilterDescriptor()));
///
/// A token filter of type shingle that constructs shingles (token n-grams) from a token stream.
/// In other words, it creates combinations of tokens as a single token.
///
public AnalyzeTokenFiltersDescriptor Shingle(Func selector) =>
AssignIfNotNull(selector?.Invoke(new ShingleTokenFilterDescriptor()));
///
/// A token filter of type stop that removes stop words from token streams.
///
public AnalyzeTokenFiltersDescriptor Stop(Func selector) =>
AssignIfNotNull(selector?.Invoke(new StopTokenFilterDescriptor()));
///
/// The synonym token filter allows to easily handle synonyms during the analysis process.
///
public AnalyzeTokenFiltersDescriptor Synonym(Func selector) =>
AssignIfNotNull(selector?.Invoke(new SynonymTokenFilterDescriptor()));
///
/// The synonym_graph token filter allows to easily handle synonyms,
/// including multi-word synonyms correctly during the analysis process.
///
public AnalyzeTokenFiltersDescriptor SynonymGraph(Func selector) =>
AssignIfNotNull(selector?.Invoke(new SynonymGraphTokenFilterDescriptor()));
///
/// A token filter of type asciifolding that converts alphabetic, numeric, and symbolic Unicode characters which are
/// not in the first 127 ASCII characters (the “Basic Latin” Unicode block) into their ASCII equivalents, if one exists.
///
public AnalyzeTokenFiltersDescriptor WordDelimiter(Func selector) =>
AssignIfNotNull(selector?.Invoke(new WordDelimiterTokenFilterDescriptor()));
///
/// A token filter of type asciifolding that converts alphabetic, numeric, and symbolic Unicode characters which are
/// not in the first 127 ASCII characters (the “Basic Latin” Unicode block) into their ASCII equivalents, if one exists.
///
public AnalyzeTokenFiltersDescriptor WordDelimiterGraph(Func selector
) =>
AssignIfNotNull(selector?.Invoke(new WordDelimiterGraphTokenFilterDescriptor()));
///
/// A token filter of type asciifolding that converts alphabetic, numeric, and symbolic Unicode characters which are
/// not in the first 127 ASCII characters (the “Basic Latin” Unicode block) into their ASCII equivalents, if one exists.
///
public AnalyzeTokenFiltersDescriptor AsciiFolding(Func selector) =>
AssignIfNotNull(selector?.Invoke(new AsciiFoldingTokenFilterDescriptor()));
///
/// Token filter that generates bigrams for frequently occuring terms. Single terms are still indexed.
/// Note, common_words or common_words_path field is required.
///
public AnalyzeTokenFiltersDescriptor CommonGrams(Func selector) =>
AssignIfNotNull(selector?.Invoke(new CommonGramsTokenFilterDescriptor()));
///
/// Splits tokens into tokens and payload whenever a delimiter character is found.
///
public AnalyzeTokenFiltersDescriptor DelimitedPayload(Func selector) =>
AssignIfNotNull(selector?.Invoke(new DelimitedPayloadTokenFilterDescriptor()));
///
/// A token filter which removes elisions. For example, “l’avion” (the plane) will tokenized as “avion” (plane).
///
public AnalyzeTokenFiltersDescriptor Elision(Func selector) =>
AssignIfNotNull(selector?.Invoke(new ElisionTokenFilterDescriptor()));
///
/// Basic support for hunspell stemming.
/// Hunspell dictionaries will be picked up from a dedicated hunspell directory on the filesystem.
///
public AnalyzeTokenFiltersDescriptor Hunspell(Func selector) =>
AssignIfNotNull(selector?.Invoke(new HunspellTokenFilterDescriptor()));
///
/// A token filter of type keep that only keeps tokens with text contained in a predefined set of words.
///
public AnalyzeTokenFiltersDescriptor KeepTypes(Func selector) =>
AssignIfNotNull(selector?.Invoke(new KeepTypesTokenFilterDescriptor()));
///
/// A token filter of type keep that only keeps tokens with text contained in a predefined set of words.
///
public AnalyzeTokenFiltersDescriptor KeepWords(Func selector) =>
AssignIfNotNull(selector?.Invoke(new KeepWordsTokenFilterDescriptor()));
///
/// Protects words from being modified by stemmers. Must be placed before any stemming filters.
///
public AnalyzeTokenFiltersDescriptor KeywordMarker(Func selector) =>
AssignIfNotNull(selector?.Invoke(new KeywordMarkerTokenFilterDescriptor()));
///
/// The kstem token filter is a high performance filter for english.
/// All terms must already be lowercased (use lowercase filter) for this filter to work correctly.
///
public AnalyzeTokenFiltersDescriptor KStem(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new KStemTokenFilterDescriptor()));
///
/// A token filter of type length that removes words that are too long or too short for the stream.
///
public AnalyzeTokenFiltersDescriptor Length(Func selector) =>
AssignIfNotNull(selector?.Invoke(new LengthTokenFilterDescriptor()));
///
/// Limits the number of tokens that are indexed per document and field.
///
public AnalyzeTokenFiltersDescriptor LimitTokenCount(Func selector) =>
AssignIfNotNull(selector?.Invoke(new LimitTokenCountTokenFilterDescriptor()));
///
/// A token filter of type lowercase that normalizes token text to lower case.
/// Lowercase token filter supports Greek and Turkish lowercase token filters through the language parameter.
///
public AnalyzeTokenFiltersDescriptor Lowercase(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new LowercaseTokenFilterDescriptor()));
///
/// A token filter of type nGram.
///
public AnalyzeTokenFiltersDescriptor NGram(Func selector) =>
AssignIfNotNull(selector?.Invoke(new NGramTokenFilterDescriptor()));
///
/// The pattern_capture token filter, unlike the pattern tokenizer, emits a token for every capture group in the regular expression.
///
public AnalyzeTokenFiltersDescriptor PatternCapture(Func selector) =>
AssignIfNotNull(selector?.Invoke(new PatternCaptureTokenFilterDescriptor()));
///
/// The pattern_replace token filter allows to easily handle string replacements based on a regular expression.
///
public AnalyzeTokenFiltersDescriptor PatternReplace(Func selector) =>
AssignIfNotNull(selector?.Invoke(new PatternReplaceTokenFilterDescriptor()));
///
/// A token filter of type porterStem that transforms the token stream as per the Porter stemming algorithm.
///
public AnalyzeTokenFiltersDescriptor PorterStem(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new PorterStemTokenFilterDescriptor()));
///
/// A token filter of type reverse that simply reverses the tokens.
///
public AnalyzeTokenFiltersDescriptor Reverse(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new ReverseTokenFilterDescriptor()));
///
/// A filter that stems words using a Snowball-generated stemmer.
///
public AnalyzeTokenFiltersDescriptor Snowball(Func selector) =>
AssignIfNotNull(selector?.Invoke(new SnowballTokenFilterDescriptor()));
///
public AnalyzeTokenFiltersDescriptor Condition(Func selector) =>
AssignIfNotNull(selector?.Invoke(new ConditionTokenFilterDescriptor()));
///
/// A filter that stems words (similar to snowball, but with more options).
///
public AnalyzeTokenFiltersDescriptor Stemmer(Func selector) =>
AssignIfNotNull(selector?.Invoke(new StemmerTokenFilterDescriptor()));
///
public AnalyzeTokenFiltersDescriptor Predicate(Func selector) =>
AssignIfNotNull(selector?.Invoke(new PredicateTokenFilterDescriptor()));
///
/// Overrides stemming algorithms, by applying a custom mapping, then protecting these terms from being modified by stemmers. Must be placed
/// before any stemming filters.
///
public AnalyzeTokenFiltersDescriptor StemmerOverride(Func selector) =>
AssignIfNotNull(selector?.Invoke(new StemmerOverrideTokenFilterDescriptor()));
///
/// The trim token filter trims surrounding whitespaces around a token.
///
public AnalyzeTokenFiltersDescriptor Trim(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new TrimTokenFilterDescriptor()));
///
/// The truncate token filter can be used to truncate tokens into a specific length. This can come in handy with keyword (single token)
/// based mapped fields that are used for sorting in order to reduce memory usage.
///
public AnalyzeTokenFiltersDescriptor Truncate(Func selector) =>
AssignIfNotNull(selector?.Invoke(new TruncateTokenFilterDescriptor()));
///
/// The unique token filter can be used to only index unique tokens during analysis. By default it is applied on all the token stream
///
public AnalyzeTokenFiltersDescriptor Unique(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new UniqueTokenFilterDescriptor()));
///
/// A token filter of type uppercase that normalizes token text to upper case.
///
public AnalyzeTokenFiltersDescriptor Uppercase(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new UppercaseTokenFilterDescriptor()));
///
/// A token filter of type fingerprint The fingerprint token filter that emits a single token which is useful
/// for fingerprinting a body of text, and/or providing a token that can be clustered on.
/// It does this by sorting the tokens, deduplicating and then concatenating them back into a single token.
///
public AnalyzeTokenFiltersDescriptor Fingerprint(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new FingerprintTokenFilterDescriptor()));
///
/// The kuromoji_stemmer token filter normalizes common katakana spelling variations ending in a
/// long sound character by removing this character (U+30FC). Only full-width katakana characters are supported.
/// Part of the `analysis-kuromoji` plugin:
///
public AnalyzeTokenFiltersDescriptor
KuromojiStemmer(Func selector = null) =>
AssignIfNotNull(selector.InvokeOrDefault(new KuromojiStemmerTokenFilterDescriptor()));
///
/// The kuromoji_readingform token filter replaces the token with its reading form in either katakana or romaji.
/// Part of the `analysis-kuromoji` plugin:
///
public AnalyzeTokenFiltersDescriptor KuromojiReadingForm(
Func selector
) =>
AssignIfNotNull(selector.Invoke(new KuromojiReadingFormTokenFilterDescriptor()));
///
/// The kuromoji_part_of_speech token filter removes tokens that match a set of part-of-speech tags.
/// Part of the `analysis-kuromoji` plugin:
///
public AnalyzeTokenFiltersDescriptor KuromojiPartOfSpeech(
Func selector
) =>
AssignIfNotNull(selector.Invoke(new KuromojiPartOfSpeechTokenFilterDescriptor()));
///
/// Collations are used for sorting documents in a language-specific word order. The icu_collation token filter is available to all indices and
/// defaults to using the DUCET collation, which is a best-effort attempt at language-neutral sorting.
/// Part of the `analysis-icu` plugin:
///
public AnalyzeTokenFiltersDescriptor IcuCollation(Func selector) =>
AssignIfNotNull(selector.Invoke(new IcuCollationTokenFilterDescriptor()));
///
/// Case folding of Unicode characters based on UTR#30, like the ASCII-folding token filter on steroids.
/// Part of the `analysis-icu` plugin:
///
public AnalyzeTokenFiltersDescriptor IcuFolding(Func selector) =>
AssignIfNotNull(selector.Invoke(new IcuFoldingTokenFilterDescriptor()));
///
/// Normalizes as defined here: http://userguide.icu-project.org/transforms/normalization
/// Part of the `analysis-icu` plugin:
///
public AnalyzeTokenFiltersDescriptor IcuNormalization(Func selector) =>
AssignIfNotNull(selector.Invoke(new IcuNormalizationTokenFilterDescriptor()));
///
/// Transforms are used to process Unicode text in many different ways, such as case mapping,
/// normalization, transliteration and bidirectional text handling.
/// Part of the `analysis-icu` plugin:
///
public AnalyzeTokenFiltersDescriptor IcuTransform(Func selector) =>
AssignIfNotNull(selector.Invoke(new IcuTransformTokenFilterDescriptor()));
///
public AnalyzeTokenFiltersDescriptor NoriPartOfSpeech(Func selector) =>
AssignIfNotNull(selector.Invoke(new NoriPartOfSpeechTokenFilterDescriptor()));
///
public AnalyzeTokenFiltersDescriptor Multiplexer(Func selector) =>
AssignIfNotNull(selector.Invoke(new MultiplexerTokenFilterDescriptor()));
///
public AnalyzeTokenFiltersDescriptor RemoveDuplicates(Func selector) =>
AssignIfNotNull(selector.Invoke(new RemoveDuplicatesTokenFilterDescriptor()));
}
}