/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
using System.Runtime.Serialization;
using OpenSearch.Net;
using OpenSearch.Net.Utf8Json;
namespace OpenSearch.Client
{
///
/// The term suggester suggests terms based on edit distance. The provided suggest text is analyzed before terms are suggested.
/// The suggested terms are provided per analyzed suggest text token.
/// The term suggester doesn’t take the query into account that is part of request.
///
[InterfaceDataContract]
[ReadAs(typeof(TermSuggester))]
public interface ITermSuggester : ISuggester
{
///
/// Lower cases the suggest text terms after text analysis.
///
[DataMember(Name = "lowercase_terms")]
bool? LowercaseTerms { get; set; }
///
/// The maximum edit distance candidate suggestions can have in order to be considered as a suggestion.
/// Can only be a value between 1 and 2. Any other value result in an bad request error being thrown.
/// Defaults to 2.
///
[DataMember(Name = "max_edits")]
int? MaxEdits { get; set; }
///
/// A factor that is used to multiply with the shards_size in order to inspect more candidate spell
/// corrections on the shard level. Can improve accuracy at the cost of performance. Defaults to 5.
///
[DataMember(Name = "max_inspections")]
int? MaxInspections { get; set; }
///
/// The maximum threshold in number of documents a suggest text token can exist in order to be included.
/// Can be a relative percentage number (e.g 0.4) or an absolute number to represent document frequencies.
/// If an value higher than 1 is specified then fractional can not be specified. Defaults to 0.01f. This can be used to
/// exclude high frequency terms from being spellchecked. High frequency terms are usually spelled correctly on top of this
/// also improves the spellcheck performance. The shard level document frequencies are used for this option.
///
[DataMember(Name = "max_term_freq")]
float? MaxTermFrequency { get; set; }
///
/// The minimal threshold in number of documents a suggestion should appear in. This can be specified as an
/// absolute number or as a relative percentage of number of documents. This can improve quality by only
/// suggesting high frequency terms. Defaults to 0f and is not enabled. If a value higher than 1 is specified then the
/// number cannot be fractional. The shard level document frequencies are used for this option.
///
[DataMember(Name = "min_doc_freq")]
float? MinDocFrequency { get; set; }
///
/// The minimum length a suggest text term must have in order to be included. Defaults to 4.
///
[DataMember(Name = "min_word_length")]
int? MinWordLength { get; set; }
///
/// The number of minimal prefix characters that must match in order be a candidate suggestions. Defaults to 1.
/// Increasing this number improves spellcheck performance. Usually misspellings don’t occur in the
/// beginning of terms.
///
[DataMember(Name = "prefix_length")]
int? PrefixLength { get; set; }
///
/// Sets the maximum number of suggestions to be retrieved from each individual shard.
/// During the reduce phase only the top N suggestions are returned based on the size option.
/// Defaults to the size option. Setting this to a value higher than the size can be useful in order to
/// get a more accurate document frequency for spelling corrections at the cost of performance.
/// Due to the fact that terms are partitioned amongst shards, the shard level document frequencies of spelling corrections
/// may not be precise. Increasing this will make these document frequencies more precise.
///
[DataMember(Name = "shard_size")]
int? ShardSize { get; set; }
///
/// Defines how suggestions should be sorted per suggest text term
///
[DataMember(Name = "sort")]
SuggestSort? Sort { get; set; }
///
/// Which string distance implementation to use for comparing how similar suggested terms are.
///
[DataMember(Name = "string_distance")]
StringDistance? StringDistance { get; set; }
///
/// Controls what suggestions are included or controls for what suggest text terms, suggestions should be suggested.
///
[DataMember(Name = "suggest_mode")]
SuggestMode? SuggestMode { get; set; }
///
/// The suggest text
///
[IgnoreDataMember]
string Text { get; set; }
}
///
public class TermSuggester : SuggesterBase, ITermSuggester
{
///
public bool? LowercaseTerms { get; set; }
///
public int? MaxEdits { get; set; }
///
public int? MaxInspections { get; set; }
///
public float? MaxTermFrequency { get; set; }
///
public float? MinDocFrequency { get; set; }
///
public int? MinWordLength { get; set; }
///
public int? PrefixLength { get; set; }
///
public int? ShardSize { get; set; }
///
public SuggestSort? Sort { get; set; }
///
public StringDistance? StringDistance { get; set; }
///
public SuggestMode? SuggestMode { get; set; }
///
public string Text { get; set; }
}
///
public class TermSuggesterDescriptor
: SuggestDescriptorBase, ITermSuggester, T>, ITermSuggester
where T : class
{
bool? ITermSuggester.LowercaseTerms { get; set; }
int? ITermSuggester.MaxEdits { get; set; }
int? ITermSuggester.MaxInspections { get; set; }
float? ITermSuggester.MaxTermFrequency { get; set; }
float? ITermSuggester.MinDocFrequency { get; set; }
int? ITermSuggester.MinWordLength { get; set; }
int? ITermSuggester.PrefixLength { get; set; }
int? ITermSuggester.ShardSize { get; set; }
SuggestSort? ITermSuggester.Sort { get; set; }
StringDistance? ITermSuggester.StringDistance { get; set; }
SuggestMode? ITermSuggester.SuggestMode { get; set; }
string ITermSuggester.Text { get; set; }
///
public TermSuggesterDescriptor Text(string text) => Assign(text, (a, v) => a.Text = v);
///
public TermSuggesterDescriptor ShardSize(int? shardSize) => Assign(shardSize, (a, v) => a.ShardSize = v);
///
public TermSuggesterDescriptor SuggestMode(SuggestMode? mode) => Assign(mode, (a, v) => a.SuggestMode = v);
///
public TermSuggesterDescriptor MinWordLength(int? length) => Assign(length, (a, v) => a.MinWordLength = v);
///
public TermSuggesterDescriptor PrefixLength(int? length) => Assign(length, (a, v) => a.PrefixLength = v);
///
public TermSuggesterDescriptor MaxEdits(int? maxEdits) => Assign(maxEdits, (a, v) => a.MaxEdits = v);
///
public TermSuggesterDescriptor MaxInspections(int? maxInspections) => Assign(maxInspections, (a, v) => a.MaxInspections = v);
///
public TermSuggesterDescriptor MinDocFrequency(float? frequency) => Assign(frequency, (a, v) => a.MinDocFrequency = v);
///
public TermSuggesterDescriptor MaxTermFrequency(float? frequency) => Assign(frequency, (a, v) => a.MaxTermFrequency = v);
///
public TermSuggesterDescriptor Sort(SuggestSort? sort) => Assign(sort, (a, v) => a.Sort = v);
///
public TermSuggesterDescriptor LowercaseTerms(bool? lowercaseTerms = true) => Assign(lowercaseTerms, (a, v) => a.LowercaseTerms = v);
///
public TermSuggesterDescriptor StringDistance(StringDistance? distance) => Assign(distance, (a, v) => a.StringDistance = v);
}
}