/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
using System.Runtime.Serialization;
// ReSharper disable InconsistentNaming
namespace OpenSearch.Client.Specification.IndicesApi
{
///
/// Information based model similarity.
/// The algorithm is based on the concept that the information content in any symbolic distribution sequence
/// is primarily determined by the repetitive usage of its basic elements.
/// For written texts this challenge would correspond to comparing the writing styles of different authors.
///
public interface IIBSimilarity : ISimilarity
{
///
/// The distribution
///
[DataMember(Name ="distribution")]
IBDistribution? Distribution { get; set; }
///
/// The lambda
///
[DataMember(Name ="lambda")]
IBLambda? Lambda { get; set; }
///
/// The normalization
///
[DataMember(Name ="normalization")]
Normalization? Normalization { get; set; }
///
/// Normalization model that assumes a uniform distribution of the term frequency.
///
[DataMember(Name ="normalization.h1.c")]
double? NormalizationH1C { get; set; }
///
/// Normalization model in which the term frequency is inversely related to the length.
///
[DataMember(Name ="normalization.h2.c")]
double? NormalizationH2C { get; set; }
///
/// Dirichlet Priors normalization
///
[DataMember(Name ="normalization.h3.c")]
double? NormalizationH3C { get; set; }
///
/// Pareto-Zipf Normalization
///
[DataMember(Name ="normalization.z.z")]
double? NormalizationZZ { get; set; }
}
///
public class IBSimilarity : IIBSimilarity
{
///
public IBDistribution? Distribution { get; set; }
///
public IBLambda? Lambda { get; set; }
///
public Normalization? Normalization { get; set; }
///
public double? NormalizationH1C { get; set; }
///
public double? NormalizationH2C { get; set; }
///
public double? NormalizationH3C { get; set; }
///
public double? NormalizationZZ { get; set; }
public string Type => "IB";
}
///
public class IBSimilarityDescriptor
: DescriptorBase, IIBSimilarity
{
IBDistribution? IIBSimilarity.Distribution { get; set; }
IBLambda? IIBSimilarity.Lambda { get; set; }
Normalization? IIBSimilarity.Normalization { get; set; }
double? IIBSimilarity.NormalizationH1C { get; set; }
double? IIBSimilarity.NormalizationH2C { get; set; }
double? IIBSimilarity.NormalizationH3C { get; set; }
double? IIBSimilarity.NormalizationZZ { get; set; }
string ISimilarity.Type => "IB";
///
public IBSimilarityDescriptor Distribution(IBDistribution? distribution) => Assign(distribution, (a, v) => a.Distribution = v);
///
public IBSimilarityDescriptor Lambda(IBLambda? lambda) => Assign(lambda, (a, v) => a.Lambda = v);
///
public IBSimilarityDescriptor NoNormalization() => Assign(Normalization.No, (a, v) => a.Normalization = v);
///
/// Normalization model that assumes a uniform distribution of the term frequency.
///
/// hyper-parameter that controls the term frequency normalization with respect to the document length.
public IBSimilarityDescriptor NormalizationH1(double? c) => Assign(c, (a, v) =>
{
a.Normalization = Normalization.H1;
a.NormalizationH1C = v;
});
///
/// Normalization model in which the term frequency is inversely related to the length.
///
/// hyper-parameter that controls the term frequency normalization with respect to the document length.
public IBSimilarityDescriptor NormalizationH2(double? c) => Assign(c, (a, v) =>
{
a.Normalization = Normalization.H2;
a.NormalizationH1C = v;
});
///
/// Dirichlet Priors normalization
///
/// smoothing parameter μ.
public IBSimilarityDescriptor NormalizationH3(double? mu) => Assign(mu, (a, v) =>
{
a.Normalization = Normalization.H3;
a.NormalizationH1C = v;
});
///
/// Pareto-Zipf Normalization
///
/// represents A/(A+1) where A measures the specificity of the language..
public IBSimilarityDescriptor NormalizationZ(double? z) => Assign(z, (a, v) =>
{
a.Normalization = Normalization.Z;
a.NormalizationH1C = v;
});
}
}