/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.Serialization;
using OpenSearch.Net.Utf8Json;
namespace OpenSearch.Client
{
///
/// The phrase suggester adds additional logic on top of the term suggester to select entire corrected phrases instead of
/// individual tokens weighted based on ngram-language models. In practice this suggester will be able to make better decisions
/// about which tokens to pick based on co-occurrence and frequencies.
///
[InterfaceDataContract]
[ReadAs(typeof(PhraseSuggester))]
public interface IPhraseSuggester : ISuggester
{
///
/// Checks each suggestion against the specified query to prune suggestions for which no matching docs exist in the index.
/// The collate query for a suggestion is run only on the local shard from which the suggestion has been generated from.
/// The query must be specified and can be templated. The current suggestion is automatically made available as
/// the {{suggestion}} variable, which should be used in your query. You can still specify your own template params ,
/// the suggestion value will be added to the variables you specify.
/// Additionally, you can specify a prune to control if all phrase suggestions will be returned; when set to true the
/// suggestions will have an additional option collate_match, which will be true if matching documents for the phrase was found,
/// false otherwise. The default value for prune is false.
///
[DataMember(Name = "collate")]
IPhraseSuggestCollate Collate { get; set; }
///
/// The confidence level defines a factor applied to the input phrases score which is used as a threshold for other suggest candidates.
/// Only candidates that score higher than the threshold will be included in the result.
/// For instance a confidence level of 1.0 will only return suggestions that score higher than the input phrase.
/// If set to 0.0 the top N candidates are returned. The default is 1.0.
///
[DataMember(Name = "confidence")]
double? Confidence { get; set; }
///
/// Candidate generators used to produce a list of possible terms per term in the given text.
/// A single candidate generator is similar to a term suggester called for each individual term in the text.
/// The output of the generators is subsequently scored in combination with the candidates from the other terms for suggestion candidates.
/// Each of the generators is called per term in the original text.
///
[DataMember(Name = "direct_generator")]
IEnumerable DirectGenerator { get; set; }
///
///
[DataMember(Name = "force_unigrams")]
bool? ForceUnigrams { get; set; }
///
/// Sets max size of the n-grams (shingles) in the field. If the field doesn’t contain n-grams (shingles), this should be omitted or set to 1.
/// Note that OpenSearch tries to detect the gram size based on the specified field. If the field uses a shingle filter,
/// the gram_size is set to the max_shingle_size if not explicitly set.
///
[DataMember(Name = "gram_size")]
int? GramSize { get; set; }
///
/// Sets up suggestion highlighting. If not provided then no highlighted field is returned.
/// If provided must contain exactly pre_tag and post_tag, which are wrapped around the changed tokens.
/// If multiple tokens in a row are changed the entire phrase of changed tokens is wrapped rather than each token.
///
[DataMember(Name = "highlight")]
IPhraseSuggestHighlight Highlight { get; set; }
///
/// The maximum percentage of the terms considered to be misspellings in order to form a correction.
/// This method accepts a float value in the range [0..1) as a fraction of the actual query terms or a number >=1 as an absolute number
/// of query terms. The default is set to 1.0, meaning only corrections with at most one misspelled term are returned.
/// Note that setting this too high can negatively impact performance. Low values like 1 or 2 are recommended; otherwise the time spend
/// in suggest calls might exceed the time spend in query execution.
///
[DataMember(Name = "max_errors")]
double? MaxErrors { get; set; }
///
/// The likelihood of a term being a misspelled even if the term exists in the dictionary.
/// The default is 0.95, meaning 5% of the real words are misspelled.
///
[DataMember(Name = "real_word_error_likelihood")]
double? RealWordErrorLikelihood { get; set; }
///
/// The separator that is used to separate terms in the bigram field.
/// If not set the whitespace character is used as a separator.
///
[DataMember(Name = "separator")]
char? Separator { get; set; }
///
/// Sets the maximum number of suggested terms to be retrieved from each individual shard.
/// During the reduce phase, only the top N suggestions are returned based on the size option. Defaults to 5.
///
[DataMember(Name = "shard_size")]
int? ShardSize { get; set; }
///
/// Smoothing model to balance weight between infrequent grams (grams (shingles) are not existing in the index)
/// and frequent grams (appear at least once in the index).
///
[DataMember(Name = "smoothing")]
SmoothingModelContainer Smoothing { get; set; }
///
/// Sets the text / query to provide suggestions for.
///
[IgnoreDataMember]
string Text { get; set; }
///
///
[DataMember(Name = "token_limit")]
int? TokenLimit { get; set; }
}
///
public class PhraseSuggester : SuggesterBase, IPhraseSuggester
{
///
public IPhraseSuggestCollate Collate { get; set; }
///
public double? Confidence { get; set; }
///
public IEnumerable DirectGenerator { get; set; }
///
public bool? ForceUnigrams { get; set; }
///
public int? GramSize { get; set; }
///
public IPhraseSuggestHighlight Highlight { get; set; }
///
public double? MaxErrors { get; set; }
///
public double? RealWordErrorLikelihood { get; set; }
///
public char? Separator { get; set; }
///
public int? ShardSize { get; set; }
///
public SmoothingModelContainer Smoothing { get; set; }
///
public string Text { get; set; }
///
public int? TokenLimit { get; set; }
}
///
public class PhraseSuggesterDescriptor : SuggestDescriptorBase, IPhraseSuggester, T>, IPhraseSuggester
where T : class
{
IPhraseSuggestCollate IPhraseSuggester.Collate { get; set; }
double? IPhraseSuggester.Confidence { get; set; }
IEnumerable IPhraseSuggester.DirectGenerator { get; set; }
bool? IPhraseSuggester.ForceUnigrams { get; set; }
int? IPhraseSuggester.GramSize { get; set; }
IPhraseSuggestHighlight IPhraseSuggester.Highlight { get; set; }
double? IPhraseSuggester.MaxErrors { get; set; }
double? IPhraseSuggester.RealWordErrorLikelihood { get; set; }
char? IPhraseSuggester.Separator { get; set; }
int? IPhraseSuggester.ShardSize { get; set; }
SmoothingModelContainer IPhraseSuggester.Smoothing { get; set; }
string IPhraseSuggester.Text { get; set; }
int? IPhraseSuggester.TokenLimit { get; set; }
///
public PhraseSuggesterDescriptor Text(string text) => Assign(text, (a, v) => a.Text = v);
///
public PhraseSuggesterDescriptor ShardSize(int? shardSize) => Assign(shardSize, (a, v) => a.ShardSize = v);
///
public PhraseSuggesterDescriptor GramSize(int? gramSize) => Assign(gramSize, (a, v) => a.GramSize = v);
///
public PhraseSuggesterDescriptor Confidence(double? confidence) => Assign(confidence, (a, v) => a.Confidence = v);
///
public PhraseSuggesterDescriptor MaxErrors(double? maxErrors) => Assign(maxErrors, (a, v) => a.MaxErrors = v);
///
public PhraseSuggesterDescriptor Separator(char? separator) => Assign(separator, (a, v) => a.Separator = v);
///
public PhraseSuggesterDescriptor DirectGenerator(params Func, IDirectGenerator>[] generators) =>
Assign(generators.Select(g => g(new DirectGeneratorDescriptor())).ToList(), (a, v) => a.DirectGenerator = v);
///
public PhraseSuggesterDescriptor RealWordErrorLikelihood(double? realWordErrorLikelihood) =>
Assign(realWordErrorLikelihood, (a, v) => a.RealWordErrorLikelihood = v);
///
public PhraseSuggesterDescriptor Highlight(Func selector) =>
Assign(selector, (a, v) => a.Highlight = v?.Invoke(new PhraseSuggestHighlightDescriptor()));
///
public PhraseSuggesterDescriptor Collate(Func, IPhraseSuggestCollate> selector) =>
Assign(selector, (a, v) => a.Collate = v?.Invoke(new PhraseSuggestCollateDescriptor()));
///
public PhraseSuggesterDescriptor Smoothing(Func selector) =>
Assign(selector, (a, v) => a.Smoothing = v?.Invoke(new SmoothingModelContainerDescriptor()));
///
public PhraseSuggesterDescriptor TokenLimit(int? tokenLimit) => Assign(tokenLimit, (a, v) => a.TokenLimit = v);
///
public PhraseSuggesterDescriptor ForceUnigrams(bool? forceUnigrams = true) => Assign(forceUnigrams, (a, v) => a.ForceUnigrams = v);
}
}