/* SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. * * Licensed to Elasticsearch B.V. under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch B.V. licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ using System; using System.Collections.Generic; using System.Linq.Expressions; using System.Runtime.Serialization; using OpenSearch.Net.Utf8Json; namespace OpenSearch.Client { [InterfaceDataContract] [ReadAs(typeof(SignificantTermsAggregation))] public interface ISignificantTermsAggregation : IBucketAggregation { /// /// The default source of statistical information for background term /// frequencies is the entire index. This scope can be narrowed /// through the use of a background filter to focus in on significant /// terms within a narrower context /// [DataMember(Name ="background_filter")] QueryContainer BackgroundFilter { get; set; } /// /// Use chi square to calculate significance score /// [DataMember(Name ="chi_square")] IChiSquareHeuristic ChiSquare { get; set; } /// /// Exclude term values for which buckets will be created. /// [DataMember(Name ="exclude")] IncludeExclude Exclude { get; set; } /// /// Determines the mechanism by which aggregations are executed /// [DataMember(Name ="execution_hint")] TermsAggregationExecutionHint? ExecutionHint { get; set; } /// /// The field on which to run the aggregation /// [DataMember(Name ="field")] Field Field { get; set; } /// /// Use Google normalized distance to calculate significance score /// [DataMember(Name ="gnd")] IGoogleNormalizedDistanceHeuristic GoogleNormalizedDistance { get; set; } /// /// Include term values for which buckets will be created. /// [DataMember(Name ="include")] IncludeExclude Include { get; set; } /// /// Return only terms that match equal to or more than a configurable /// number of hits /// [DataMember(Name ="min_doc_count")] long? MinimumDocumentCount { get; set; } /// /// Use mutual information to calculate significance score /// [DataMember(Name ="mutual_information")] IMutualInformationHeuristic MutualInformation { get; set; } /// /// Use percentage to calculate significance score. /// /// A simple calculation of the number of documents in the foreground /// sample with a term divided by the number of documents in the background /// with the term. By default this produces a score greater than zero /// and less than one. /// /// [DataMember(Name ="percentage")] IPercentageScoreHeuristic PercentageScore { get; set; } /// /// Use a script to calculate a custom significance score. /// [DataMember(Name ="script_heuristic")] IScriptedHeuristic Script { get; set; } /// /// Regulates the certainty a shard has if the term should actually be added to the candidate /// list or not with respect to the . /// Terms will only be considered if their local shard frequency within /// the set is higher than the . /// [DataMember(Name ="shard_min_doc_count")] long? ShardMinimumDocumentCount { get; set; } /// /// Controls the number of candidate terms produced by each shard from which /// the of terms is selected. /// [DataMember(Name ="shard_size")] int? ShardSize { get; set; } /// /// Defines how many term buckets should be returned out of the overall /// terms list /// [DataMember(Name ="size")] int? Size { get; set; } } public class SignificantTermsAggregation : BucketAggregationBase, ISignificantTermsAggregation { internal SignificantTermsAggregation() { } public SignificantTermsAggregation(string name) : base(name) { } /// public QueryContainer BackgroundFilter { get; set; } /// public IChiSquareHeuristic ChiSquare { get; set; } /// public IncludeExclude Exclude { get; set; } /// public TermsAggregationExecutionHint? ExecutionHint { get; set; } /// public Field Field { get; set; } /// public IGoogleNormalizedDistanceHeuristic GoogleNormalizedDistance { get; set; } /// public IncludeExclude Include { get; set; } /// public long? MinimumDocumentCount { get; set; } /// public IMutualInformationHeuristic MutualInformation { get; set; } /// public IPercentageScoreHeuristic PercentageScore { get; set; } /// public IScriptedHeuristic Script { get; set; } /// public long? ShardMinimumDocumentCount { get; set; } /// public int? ShardSize { get; set; } /// public int? Size { get; set; } internal override void WrapInContainer(AggregationContainer c) => c.SignificantTerms = this; } public class SignificantTermsAggregationDescriptor : BucketAggregationDescriptorBase, ISignificantTermsAggregation, T> , ISignificantTermsAggregation where T : class { QueryContainer ISignificantTermsAggregation.BackgroundFilter { get; set; } IChiSquareHeuristic ISignificantTermsAggregation.ChiSquare { get; set; } IncludeExclude ISignificantTermsAggregation.Exclude { get; set; } TermsAggregationExecutionHint? ISignificantTermsAggregation.ExecutionHint { get; set; } Field ISignificantTermsAggregation.Field { get; set; } IGoogleNormalizedDistanceHeuristic ISignificantTermsAggregation.GoogleNormalizedDistance { get; set; } IncludeExclude ISignificantTermsAggregation.Include { get; set; } long? ISignificantTermsAggregation.MinimumDocumentCount { get; set; } IMutualInformationHeuristic ISignificantTermsAggregation.MutualInformation { get; set; } IPercentageScoreHeuristic ISignificantTermsAggregation.PercentageScore { get; set; } IScriptedHeuristic ISignificantTermsAggregation.Script { get; set; } long? ISignificantTermsAggregation.ShardMinimumDocumentCount { get; set; } int? ISignificantTermsAggregation.ShardSize { get; set; } int? ISignificantTermsAggregation.Size { get; set; } /// public SignificantTermsAggregationDescriptor Field(Field field) => Assign(field, (a, v) => a.Field = v); /// public SignificantTermsAggregationDescriptor Field(Expression> field) => Assign(field, (a, v) => a.Field = v); /// public SignificantTermsAggregationDescriptor Size(int? size) => Assign(size, (a, v) => a.Size = v); /// public SignificantTermsAggregationDescriptor ExecutionHint(TermsAggregationExecutionHint? hint) => Assign(hint, (a, v) => a.ExecutionHint = v); /// public SignificantTermsAggregationDescriptor Include(string includePattern) => Assign(new IncludeExclude(includePattern), (a, v) => a.Include = v); /// public SignificantTermsAggregationDescriptor Include(IEnumerable values) => Assign(new IncludeExclude(values), (a, v) => a.Include = v); /// public SignificantTermsAggregationDescriptor Exclude(string excludePattern) => Assign(new IncludeExclude(excludePattern), (a, v) => a.Exclude = v); /// public SignificantTermsAggregationDescriptor Exclude(IEnumerable values) => Assign(new IncludeExclude(values), (a, v) => a.Exclude = v); /// public SignificantTermsAggregationDescriptor ShardSize(int? shardSize) => Assign(shardSize, (a, v) => a.ShardSize = v); /// public SignificantTermsAggregationDescriptor MinimumDocumentCount(long? minimumDocumentCount) => Assign(minimumDocumentCount, (a, v) => a.MinimumDocumentCount = v); /// public SignificantTermsAggregationDescriptor ShardMinimumDocumentCount(long? shardMinimumDocumentCount) => Assign(shardMinimumDocumentCount, (a, v) => a.ShardMinimumDocumentCount = v); /// public SignificantTermsAggregationDescriptor MutualInformation( Func mutualInformationSelector = null ) => Assign(mutualInformationSelector.InvokeOrDefault(new MutualInformationHeuristicDescriptor()), (a, v) => a.MutualInformation = v); /// public SignificantTermsAggregationDescriptor ChiSquare(Func chiSquareSelector) => Assign(chiSquareSelector.InvokeOrDefault(new ChiSquareHeuristicDescriptor()), (a, v) => a.ChiSquare = v); /// public SignificantTermsAggregationDescriptor GoogleNormalizedDistance( Func gndSelector ) => Assign(gndSelector.InvokeOrDefault(new GoogleNormalizedDistanceHeuristicDescriptor()), (a, v) => a.GoogleNormalizedDistance = v); /// public SignificantTermsAggregationDescriptor PercentageScore( Func percentageScoreSelector ) => Assign(percentageScoreSelector.InvokeOrDefault(new PercentageScoreHeuristicDescriptor()), (a, v) => a.PercentageScore = v); /// public SignificantTermsAggregationDescriptor Script(Func scriptSelector) => Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptedHeuristicDescriptor())); /// public SignificantTermsAggregationDescriptor BackgroundFilter(Func, QueryContainer> selector) => Assign(selector, (a, v) => a.BackgroundFilter = v?.Invoke(new QueryContainerDescriptor())); } }