/* * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with * the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions * and limitations under the License. */ package com.amazonaws.services.comprehend.model; import java.io.Serializable; import javax.annotation.Generated; import com.amazonaws.protocol.StructuredPojo; import com.amazonaws.protocol.ProtocolMarshaller; /** *

* Provides configuration parameters to override the default actions for extracting text from PDF documents and image * files. *

*

* By default, Amazon Comprehend performs the following actions to extract text from files, based on the input file * type: *

* *

* DocumentReaderConfig does not apply to plain text files or Word files. *

*

* For image files and PDF documents, you can override these default actions using the fields listed below. For more * information, see Setting * text extraction options in the Comprehend Developer Guide. *

* * @see AWS * API Documentation */ @Generated("com.amazonaws:aws-java-sdk-code-generator") public class DocumentReaderConfig implements Serializable, Cloneable, StructuredPojo { /** *

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

* */ private String documentReadAction; /** *

* Determines the text extraction actions for PDF files. Enter one of the following values: *

* */ private String documentReadMode; /** *

* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

* */ private java.util.List featureTypes; /** *

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

* * * @param documentReadAction * This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF * files and image files. Enter one of the following values:

*