/* * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with * the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions * and limitations under the License. */ package com.amazonaws.services.comprehend.model; import java.io.Serializable; import javax.annotation.Generated; import com.amazonaws.protocol.StructuredPojo; import com.amazonaws.protocol.ProtocolMarshaller; /** *

* Provides configuration parameters to override the default actions for extracting text from PDF documents and image * files. *

* By default, Amazon Comprehend performs the following actions to extract text from files, based on the input file * type: *

*
* Word files - Amazon Comprehend parser extracts the text. *
*
*
* Digital PDF files - Amazon Comprehend parser extracts the text. *
*
*
* Image files and scanned PDF files - Amazon Comprehend uses the Amazon Textract DetectDocumentText * API to extract the text. *
*

* DocumentReaderConfig does not apply to plain text files or Word files. *

* For image files and PDF documents, you can override these default actions using the fields listed below. For more * information, see Setting * text extraction options in the Comprehend Developer Guide. *

* * @see AWS * API Documentation */ @Generated("com.amazonaws:aws-java-sdk-code-generator") public class DocumentReaderConfig implements Serializable, Cloneable, StructuredPojo { /** *

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *
*

*/ private String documentReadAction; /** *

* Determines the text extraction actions for PDF files. Enter one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *
*

*/ private String documentReadMode; /** *

* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input document. *
*

*/ private java.util.List featureTypes; /** *

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *
*

* * @param documentReadAction * This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF * files and image files. Enter one of the following values:

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the * AnalyzeDocument API operation. *
*

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *
*

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the * AnalyzeDocument API operation. *
*

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *
*

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the * AnalyzeDocument API operation. *
*

* This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files * and image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the AnalyzeDocument API * operation. *
*

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend service uses the * DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service uses the * AnalyzeDocument API operation. *
*

* Determines the text extraction actions for PDF files. Enter one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *
*

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by * DocumentReadAction for all PDF files, including digital PDF files. *
*

* Determines the text extraction actions for PDF files. Enter one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *
*

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by * DocumentReadAction for all PDF files, including digital PDF files. *
*

* Determines the text extraction actions for PDF files. Enter one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *
*

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by * DocumentReadAction for all PDF files, including digital PDF files. *
*

* Determines the text extraction actions for PDF files. Enter one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by DocumentReadAction * for all PDF files, including digital PDF files. *
*

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract API specified by * DocumentReadAction for all PDF files, including digital PDF files. *
*

* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input document. *
*

TEXTRACT_ANALYZE_DOCUMENT

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input * document. *
*

getFeatureTypes() { return featureTypes; } /** *

* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input document. *
*

* * @param featureTypes * Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one or both of the following * values:

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input * document. *
*

featureTypes) { if (featureTypes == null) { this.featureTypes = null; return; } this.featureTypes = new java.util.ArrayList(featureTypes); } /** *

* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input document. *
*

* NOTE: This method appends the values to the existing list (if any). Use * {@link #setFeatureTypes(java.util.Collection)} or {@link #withFeatureTypes(java.util.Collection)} if you want to * override the existing values. *

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input * document. *
*

(featureTypes.length)); } for (String ele : featureTypes) { this.featureTypes.add(ele); } return this; } /** *

* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input document. *
*

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input * document. *
*

featureTypes) { setFeatureTypes(featureTypes); return this; } /** *

* Specifies the type of Amazon Textract features to apply. If you chose TEXTRACT_ANALYZE_DOCUMENT as * the read action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input document. *
*

*
* TABLES - Returns information about any tables that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that are detected in the input * document. *
*

featureTypesCopy = new java.util.ArrayList(featureTypes.length); for (DocumentReadFeatureTypes value : featureTypes) { featureTypesCopy.add(value.toString()); } if (getFeatureTypes() == null) { setFeatureTypes(featureTypesCopy); } else { getFeatureTypes().addAll(featureTypesCopy); } return this; } /** * Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be * redacted from this string using a placeholder value. * * @return A string representation of this object. * * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("{"); if (getDocumentReadAction() != null) sb.append("DocumentReadAction: ").append(getDocumentReadAction()).append(","); if (getDocumentReadMode() != null) sb.append("DocumentReadMode: ").append(getDocumentReadMode()).append(","); if (getFeatureTypes() != null) sb.append("FeatureTypes: ").append(getFeatureTypes()); sb.append("}"); return sb.toString(); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (obj instanceof DocumentReaderConfig == false) return false; DocumentReaderConfig other = (DocumentReaderConfig) obj; if (other.getDocumentReadAction() == null ^ this.getDocumentReadAction() == null) return false; if (other.getDocumentReadAction() != null && other.getDocumentReadAction().equals(this.getDocumentReadAction()) == false) return false; if (other.getDocumentReadMode() == null ^ this.getDocumentReadMode() == null) return false; if (other.getDocumentReadMode() != null && other.getDocumentReadMode().equals(this.getDocumentReadMode()) == false) return false; if (other.getFeatureTypes() == null ^ this.getFeatureTypes() == null) return false; if (other.getFeatureTypes() != null && other.getFeatureTypes().equals(this.getFeatureTypes()) == false) return false; return true; } @Override public int hashCode() { final int prime = 31; int hashCode = 1; hashCode = prime * hashCode + ((getDocumentReadAction() == null) ? 0 : getDocumentReadAction().hashCode()); hashCode = prime * hashCode + ((getDocumentReadMode() == null) ? 0 : getDocumentReadMode().hashCode()); hashCode = prime * hashCode + ((getFeatureTypes() == null) ? 0 : getFeatureTypes().hashCode()); return hashCode; } @Override public DocumentReaderConfig clone() { try { return (DocumentReaderConfig) super.clone(); } catch (CloneNotSupportedException e) { throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e); } } @com.amazonaws.annotation.SdkInternalApi @Override public void marshall(ProtocolMarshaller protocolMarshaller) { com.amazonaws.services.comprehend.model.transform.DocumentReaderConfigMarshaller.getInstance().marshall(this, protocolMarshaller); } }