/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
using System.Runtime.Serialization;
namespace OpenSearch.Client.Specification.IndicesApi
{
///
/// Implements the divergence from randomness (DFR) framework introduced in Gianni Amati and Cornelis Joost Van Rijsbergen.
/// 2002.
/// Probabilistic models of information retrieval based on measuring the divergence from randomness. ACM Trans. Inf. Syst.
/// 20, 4 (October
/// 2002), 357-389.
/// The DFR scoring formula is composed of three separate components: the basic model, the aftereffect and an additional
/// normalization
/// component,
/// represented by the classes BasicModel, AfterEffect and Normalization, respectively.The names of these classes were
/// chosen to match the
/// names of their counterparts in the Terrier IR engine.
///
public interface IDFRSimilarity : ISimilarity
{
///
/// The after effect
///
[DataMember(Name = "after_effect")]
DFRAfterEffect? AfterEffect { get; set; }
///
/// The basic model
///
[DataMember(Name = "basic_model")]
DFRBasicModel? BasicModel { get; set; }
///
/// The normalization
///
[DataMember(Name = "normalization")]
Normalization? Normalization { get; set; }
///
/// Normalization model that assumes a uniform distribution of the term frequency.
///
[DataMember(Name = "normalization.h1.c")]
double? NormalizationH1C { get; set; }
///
/// Normalization model in which the term frequency is inversely related to the length.
///
[DataMember(Name = "normalization.h2.c")]
double? NormalizationH2C { get; set; }
///
/// Dirichlet Priors normalization
///
[DataMember(Name = "normalization.h3.c")]
double? NormalizationH3C { get; set; }
///
/// Pareto-Zipf Normalization
///
[DataMember(Name = "normalization.z.z")]
// ReSharper disable once InconsistentNaming
double? NormalizationZZ { get; set; }
}
///
public class DFRSimilarity : IDFRSimilarity
{
///
public DFRAfterEffect? AfterEffect { get; set; }
///
public DFRBasicModel? BasicModel { get; set; }
///
public Normalization? Normalization { get; set; }
///
public double? NormalizationH1C { get; set; }
///
public double? NormalizationH2C { get; set; }
///
public double? NormalizationH3C { get; set; }
///
public double? NormalizationZZ { get; set; }
public string Type => "DFR";
}
///
public class DFRSimilarityDescriptor
: DescriptorBase, IDFRSimilarity
{
DFRAfterEffect? IDFRSimilarity.AfterEffect { get; set; }
DFRBasicModel? IDFRSimilarity.BasicModel { get; set; }
Normalization? IDFRSimilarity.Normalization { get; set; }
double? IDFRSimilarity.NormalizationH1C { get; set; }
double? IDFRSimilarity.NormalizationH2C { get; set; }
double? IDFRSimilarity.NormalizationH3C { get; set; }
double? IDFRSimilarity.NormalizationZZ { get; set; }
string ISimilarity.Type => "DFR";
///
public DFRSimilarityDescriptor BasicModel(DFRBasicModel? model) => Assign(model, (a, v) => a.BasicModel = v);
///
public DFRSimilarityDescriptor AfterEffect(DFRAfterEffect? afterEffect) => Assign(afterEffect, (a, v) => a.AfterEffect = v);
///
public DFRSimilarityDescriptor NoNormalization() => Assign(Normalization.No, (a, v) => a.Normalization = v);
///
/// Normalization model that assumes a uniform distribution of the term frequency.
///
/// hyper-parameter that controls the term frequency normalization with respect to the document length.
public DFRSimilarityDescriptor NormalizationH1(double? c) => Assign(c, (a, v) =>
{
a.Normalization = v == null ? (Normalization?)null : Normalization.H1;
a.NormalizationH1C = v;
});
///
/// Normalization model in which the term frequency is inversely related to the length.
///
/// hyper-parameter that controls the term frequency normalization with respect to the document length.
public DFRSimilarityDescriptor NormalizationH2(double? c) => Assign(c, (a, v) =>
{
a.Normalization = v == null ? (Normalization?)null : Normalization.H2;
a.NormalizationH1C = v;
});
///
/// Dirichlet Priors normalization
///
/// smoothing parameter μ.
public DFRSimilarityDescriptor NormalizationH3(double? mu) => Assign(mu, (a, v) =>
{
a.Normalization = v == null ? (Normalization?)null : Normalization.H3;
a.NormalizationH1C = v;
});
/// Pareto-Zipf Normalization
/// represents A/(A+1) where A measures the specificity of the language..
public DFRSimilarityDescriptor NormalizationZ(double? z) => Assign(z, (a, v) =>
{
a.Normalization = v == null ? (Normalization?)null : Normalization.Z;
a.NormalizationH1C = v;
});
}
}