/* SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. * * Licensed to Elasticsearch B.V. under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch B.V. licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ using System; using System.Collections.Generic; using System.Linq.Expressions; using System.Runtime.Serialization; using OpenSearch.Net.Utf8Json; namespace OpenSearch.Client.Specification.IngestApi { /// /// The ingest attachment plugin lets OpenSearch extract file attachments in common formats /// (such as PPT, XLS, and PDF) by using the Apache text extraction library Tika. /// You can use the ingest attachment plugin as a replacement for the mapper attachment plugin. /// /// /// Requires the Ingest Attachment Processor Plugin to be installed on the cluster. /// [InterfaceDataContract] public interface IAttachmentProcessor : IProcessor { /// The field to get the base64 encoded field from. [DataMember(Name ="field")] Field Field { get; set; } /// If `true` and `field` does not exist, the processor quietly exits without modifying the document. [DataMember(Name ="ignore_missing")] bool? IgnoreMissing { get; set; } /// /// The number of chars being used for extraction to prevent huge fields. Use -1 for no limit. /// Defaults to 100000. /// [DataMember(Name ="indexed_chars")] long? IndexedCharacters { get; set; } /// Field name from which you can overwrite the number of chars being used for extraction. [DataMember(Name ="indexed_chars_field")] Field IndexedCharactersField { get; set; } /// /// Properties to select to be stored. Can be content, title, name, author, /// keywords, date, content_type, content_length, language. Defaults to all. /// [DataMember(Name ="properties")] IEnumerable Properties { get; set; } /// The field that will hold the attachment information. [DataMember(Name ="target_field")] Field TargetField { get; set; } /// The field containing the name of the resource to decode. /// If specified, the processor passes this resource name to the underlying /// Tika library to enable 'Resource Name Based Detection'. [DataMember(Name = "resource_name")] Field ResourceName { get; set; } } /// public class AttachmentProcessor : ProcessorBase, IAttachmentProcessor { /// public Field Field { get; set; } /// /// public bool? IgnoreMissing { get; set; } /// public long? IndexedCharacters { get; set; } /// public Field IndexedCharactersField { get; set; } /// public IEnumerable Properties { get; set; } /// public Field TargetField { get; set; } /// public Field ResourceName { get; set; } protected override string Name => "attachment"; } /// public class AttachmentProcessorDescriptor : ProcessorDescriptorBase, IAttachmentProcessor>, IAttachmentProcessor where T : class { protected override string Name => "attachment"; Field IAttachmentProcessor.Field { get; set; } bool? IAttachmentProcessor.IgnoreMissing { get; set; } long? IAttachmentProcessor.IndexedCharacters { get; set; } Field IAttachmentProcessor.IndexedCharactersField { get; set; } IEnumerable IAttachmentProcessor.Properties { get; set; } Field IAttachmentProcessor.TargetField { get; set; } Field IAttachmentProcessor.ResourceName { get; set; } /// public AttachmentProcessorDescriptor Field(Field field) => Assign(field, (a, v) => a.Field = v); /// public AttachmentProcessorDescriptor Field(Expression> objectPath) => Assign(objectPath, (a, v) => a.Field = v); /// public AttachmentProcessorDescriptor TargetField(Field field) => Assign(field, (a, v) => a.TargetField = v); /// public AttachmentProcessorDescriptor TargetField(Expression> objectPath) => Assign(objectPath, (a, v) => a.TargetField = v); /// public AttachmentProcessorDescriptor IndexedCharacters(long? indexedCharacters) => Assign(indexedCharacters, (a, v) => a.IndexedCharacters = v); /// public AttachmentProcessorDescriptor IndexedCharactersField(Field field) => Assign(field, (a, v) => a.IndexedCharactersField = v); /// public AttachmentProcessorDescriptor IndexedCharactersField(Expression> objectPath) => Assign(objectPath, (a, v) => a.IndexedCharactersField = v); /// public AttachmentProcessorDescriptor IgnoreMissing(bool? ignoreMissing = true) => Assign(ignoreMissing, (a, v) => a.IgnoreMissing = v); /// public AttachmentProcessorDescriptor Properties(IEnumerable properties) => Assign(properties, (a, v) => a.Properties = v); /// public AttachmentProcessorDescriptor Properties(params string[] properties) => Assign(properties, (a, v) => a.Properties = v); /// public AttachmentProcessorDescriptor ResourceName(Field field) => Assign(field, (a, v) => a.ResourceName = v); /// public AttachmentProcessorDescriptor ResourceName(Expression> objectPath) => Assign(objectPath, (a, v) => a.ResourceName = v); } }