/* SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. * * Licensed to Elasticsearch B.V. under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch B.V. licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ using System.Runtime.Serialization; // ReSharper disable InconsistentNaming namespace OpenSearch.Client.Specification.IndicesApi { /// /// Information based model similarity. /// The algorithm is based on the concept that the information content in any symbolic distribution sequence /// is primarily determined by the repetitive usage of its basic elements. /// For written texts this challenge would correspond to comparing the writing styles of different authors. /// public interface IIBSimilarity : ISimilarity { /// /// The distribution /// [DataMember(Name ="distribution")] IBDistribution? Distribution { get; set; } /// /// The lambda /// [DataMember(Name ="lambda")] IBLambda? Lambda { get; set; } /// /// The normalization /// [DataMember(Name ="normalization")] Normalization? Normalization { get; set; } /// /// Normalization model that assumes a uniform distribution of the term frequency. /// [DataMember(Name ="normalization.h1.c")] double? NormalizationH1C { get; set; } /// /// Normalization model in which the term frequency is inversely related to the length. /// [DataMember(Name ="normalization.h2.c")] double? NormalizationH2C { get; set; } /// /// Dirichlet Priors normalization /// [DataMember(Name ="normalization.h3.c")] double? NormalizationH3C { get; set; } /// /// Pareto-Zipf Normalization /// [DataMember(Name ="normalization.z.z")] double? NormalizationZZ { get; set; } } /// public class IBSimilarity : IIBSimilarity { /// public IBDistribution? Distribution { get; set; } /// public IBLambda? Lambda { get; set; } /// public Normalization? Normalization { get; set; } /// public double? NormalizationH1C { get; set; } /// public double? NormalizationH2C { get; set; } /// public double? NormalizationH3C { get; set; } /// public double? NormalizationZZ { get; set; } public string Type => "IB"; } /// public class IBSimilarityDescriptor : DescriptorBase, IIBSimilarity { IBDistribution? IIBSimilarity.Distribution { get; set; } IBLambda? IIBSimilarity.Lambda { get; set; } Normalization? IIBSimilarity.Normalization { get; set; } double? IIBSimilarity.NormalizationH1C { get; set; } double? IIBSimilarity.NormalizationH2C { get; set; } double? IIBSimilarity.NormalizationH3C { get; set; } double? IIBSimilarity.NormalizationZZ { get; set; } string ISimilarity.Type => "IB"; /// public IBSimilarityDescriptor Distribution(IBDistribution? distribution) => Assign(distribution, (a, v) => a.Distribution = v); /// public IBSimilarityDescriptor Lambda(IBLambda? lambda) => Assign(lambda, (a, v) => a.Lambda = v); /// public IBSimilarityDescriptor NoNormalization() => Assign(Normalization.No, (a, v) => a.Normalization = v); /// /// Normalization model that assumes a uniform distribution of the term frequency. /// /// hyper-parameter that controls the term frequency normalization with respect to the document length. public IBSimilarityDescriptor NormalizationH1(double? c) => Assign(c, (a, v) => { a.Normalization = Normalization.H1; a.NormalizationH1C = v; }); /// /// Normalization model in which the term frequency is inversely related to the length. /// /// hyper-parameter that controls the term frequency normalization with respect to the document length. public IBSimilarityDescriptor NormalizationH2(double? c) => Assign(c, (a, v) => { a.Normalization = Normalization.H2; a.NormalizationH1C = v; }); /// /// Dirichlet Priors normalization /// /// smoothing parameter μ. public IBSimilarityDescriptor NormalizationH3(double? mu) => Assign(mu, (a, v) => { a.Normalization = Normalization.H3; a.NormalizationH1C = v; }); /// /// Pareto-Zipf Normalization /// /// represents A/(A+1) where A measures the specificity of the language.. public IBSimilarityDescriptor NormalizationZ(double? z) => Assign(z, (a, v) => { a.Normalization = Normalization.Z; a.NormalizationH1C = v; }); } }