/* * Copyright 2010-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.comprehend.model; import java.io.Serializable; /** *

* Provides configuration parameters to override the default actions for * extracting text from PDF documents and image files. *

* By default, Amazon Comprehend performs the following actions to extract text * from files, based on the input file type: *

*
* Word files - Amazon Comprehend parser extracts the text. *
*
*
* Digital PDF files - Amazon Comprehend parser extracts the text. *
*
*
* Image files and scanned PDF files - Amazon Comprehend uses the Amazon * Textract DetectDocumentText API to extract the text. *
*

* DocumentReaderConfig does not apply to plain text files or Word * files. *

* For image files and PDF documents, you can override these default actions * using the fields listed below. For more information, see Setting text extraction options in the Comprehend Developer Guide. *

*/ public class DocumentReaderConfig implements Serializable { /** *

* This field defines the Amazon Textract API operation that Amazon * Comprehend uses to extract text from PDF files and image files. Enter one * of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend * service uses the DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service * uses the AnalyzeDocument API operation. *
*

* Constraints:
* Allowed Values: TEXTRACT_DETECT_DOCUMENT_TEXT, * TEXTRACT_ANALYZE_DOCUMENT */ private String documentReadAction; /** *

* Determines the text extraction actions for PDF files. Enter one of the * following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults * for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the * Textract API specified by DocumentReadAction for all PDF files, including * digital PDF files. *
*

* Constraints:
* Allowed Values: SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION */ private String documentReadMode; /** *

* Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must * specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are * detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that * are detected in the input document. *
*

*/ private java.util.List featureTypes; /** *

* This field defines the Amazon Textract API operation that Amazon * Comprehend uses to extract text from PDF files and image files. Enter one * of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend * service uses the DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service * uses the AnalyzeDocument API operation. *
*

* Constraints:
* Allowed Values: TEXTRACT_DETECT_DOCUMENT_TEXT, * TEXTRACT_ANALYZE_DOCUMENT * * @return

* This field defines the Amazon Textract API operation that Amazon * Comprehend uses to extract text from PDF files and image files. * Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon * Comprehend service uses the DetectDocumentText API * operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend * service uses the AnalyzeDocument API operation. *
*

* @see DocumentReadAction */ public String getDocumentReadAction() { return documentReadAction; } /** *

* This field defines the Amazon Textract API operation that Amazon * Comprehend uses to extract text from PDF files and image files. Enter one * of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend * service uses the DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service * uses the AnalyzeDocument API operation. *
*

* Constraints:
* Allowed Values: TEXTRACT_DETECT_DOCUMENT_TEXT, * TEXTRACT_ANALYZE_DOCUMENT * * @param documentReadAction

* This field defines the Amazon Textract API operation that * Amazon Comprehend uses to extract text from PDF files and * image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon * Comprehend service uses the DetectDocumentText * API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend * service uses the AnalyzeDocument API operation. *
*

* @see DocumentReadAction */ public void setDocumentReadAction(String documentReadAction) { this.documentReadAction = documentReadAction; } /** *

* This field defines the Amazon Textract API operation that Amazon * Comprehend uses to extract text from PDF files and image files. Enter one * of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend * service uses the DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service * uses the AnalyzeDocument API operation. *
*

* Returns a reference to this object so that method calls can be chained * together. *

* Constraints:
* Allowed Values: TEXTRACT_DETECT_DOCUMENT_TEXT, * TEXTRACT_ANALYZE_DOCUMENT * * @param documentReadAction

* This field defines the Amazon Textract API operation that * Amazon Comprehend uses to extract text from PDF files and * image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon * Comprehend service uses the DetectDocumentText * API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend * service uses the AnalyzeDocument API operation. *
*

* @return A reference to this updated object so that method calls can be * chained together. * @see DocumentReadAction */ public DocumentReaderConfig withDocumentReadAction(String documentReadAction) { this.documentReadAction = documentReadAction; return this; } /** *

* This field defines the Amazon Textract API operation that Amazon * Comprehend uses to extract text from PDF files and image files. Enter one * of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend * service uses the DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service * uses the AnalyzeDocument API operation. *
*

* Constraints:
* Allowed Values: TEXTRACT_DETECT_DOCUMENT_TEXT, * TEXTRACT_ANALYZE_DOCUMENT * * @param documentReadAction

* This field defines the Amazon Textract API operation that * Amazon Comprehend uses to extract text from PDF files and * image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon * Comprehend service uses the DetectDocumentText * API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend * service uses the AnalyzeDocument API operation. *
*

* @see DocumentReadAction */ public void setDocumentReadAction(DocumentReadAction documentReadAction) { this.documentReadAction = documentReadAction.toString(); } /** *

* This field defines the Amazon Textract API operation that Amazon * Comprehend uses to extract text from PDF files and image files. Enter one * of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon Comprehend * service uses the DetectDocumentText API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend service * uses the AnalyzeDocument API operation. *
*

* Returns a reference to this object so that method calls can be chained * together. *

* Constraints:
* Allowed Values: TEXTRACT_DETECT_DOCUMENT_TEXT, * TEXTRACT_ANALYZE_DOCUMENT * * @param documentReadAction

* This field defines the Amazon Textract API operation that * Amazon Comprehend uses to extract text from PDF files and * image files. Enter one of the following values: *

*
* TEXTRACT_DETECT_DOCUMENT_TEXT - The Amazon * Comprehend service uses the DetectDocumentText * API operation. *
*
*
* TEXTRACT_ANALYZE_DOCUMENT - The Amazon Comprehend * service uses the AnalyzeDocument API operation. *
*

* @return A reference to this updated object so that method calls can be * chained together. * @see DocumentReadAction */ public DocumentReaderConfig withDocumentReadAction(DocumentReadAction documentReadAction) { this.documentReadAction = documentReadAction.toString(); return this; } /** *

* Determines the text extraction actions for PDF files. Enter one of the * following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults * for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the * Textract API specified by DocumentReadAction for all PDF files, including * digital PDF files. *
*

* Constraints:
* Allowed Values: SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION * * @return

* Determines the text extraction actions for PDF files. Enter one * of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service * defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses * the Textract API specified by DocumentReadAction for all PDF * files, including digital PDF files. *
*

* @see DocumentReadMode */ public String getDocumentReadMode() { return documentReadMode; } /** *

* Determines the text extraction actions for PDF files. Enter one of the * following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults * for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the * Textract API specified by DocumentReadAction for all PDF files, including * digital PDF files. *
*

* Constraints:
* Allowed Values: SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION * * @param documentReadMode

* Determines the text extraction actions for PDF files. Enter * one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend * service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend * uses the Textract API specified by DocumentReadAction for all * PDF files, including digital PDF files. *
*

* @see DocumentReadMode */ public void setDocumentReadMode(String documentReadMode) { this.documentReadMode = documentReadMode; } /** *

* Determines the text extraction actions for PDF files. Enter one of the * following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults * for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the * Textract API specified by DocumentReadAction for all PDF files, including * digital PDF files. *
*

* Returns a reference to this object so that method calls can be chained * together. *

* Constraints:
* Allowed Values: SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION * * @param documentReadMode

* Determines the text extraction actions for PDF files. Enter * one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend * service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend * uses the Textract API specified by DocumentReadAction for all * PDF files, including digital PDF files. *
*

* @return A reference to this updated object so that method calls can be * chained together. * @see DocumentReadMode */ public DocumentReaderConfig withDocumentReadMode(String documentReadMode) { this.documentReadMode = documentReadMode; return this; } /** *

* Determines the text extraction actions for PDF files. Enter one of the * following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults * for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the * Textract API specified by DocumentReadAction for all PDF files, including * digital PDF files. *
*

* Constraints:
* Allowed Values: SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION * * @param documentReadMode

* Determines the text extraction actions for PDF files. Enter * one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend * service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend * uses the Textract API specified by DocumentReadAction for all * PDF files, including digital PDF files. *
*

* @see DocumentReadMode */ public void setDocumentReadMode(DocumentReadMode documentReadMode) { this.documentReadMode = documentReadMode.toString(); } /** *

* Determines the text extraction actions for PDF files. Enter one of the * following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend service defaults * for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the * Textract API specified by DocumentReadAction for all PDF files, including * digital PDF files. *
*

* Returns a reference to this object so that method calls can be chained * together. *

* Constraints:
* Allowed Values: SERVICE_DEFAULT, FORCE_DOCUMENT_READ_ACTION * * @param documentReadMode

* Determines the text extraction actions for PDF files. Enter * one of the following values: *

*
* SERVICE_DEFAULT - use the Amazon Comprehend * service defaults for PDF files. *
*
*
* FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend * uses the Textract API specified by DocumentReadAction for all * PDF files, including digital PDF files. *
*

* @return A reference to this updated object so that method calls can be * chained together. * @see DocumentReadMode */ public DocumentReaderConfig withDocumentReadMode(DocumentReadMode documentReadMode) { this.documentReadMode = documentReadMode.toString(); return this; } /** *

* Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must * specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are * detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that * are detected in the input document. *
*

* * @return

* Specifies the type of Amazon Textract features to apply. If you * chose TEXTRACT_ANALYZE_DOCUMENT as the read action, * you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables that * are detected in the input document. *
*
*
* FORMS - Returns information and the data from any * forms that are detected in the input document. *
*

*/ public java.util.List getFeatureTypes() { return featureTypes; } /** *

* Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must * specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are * detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that * are detected in the input document. *
*

* * @param featureTypes

* Specifies the type of Amazon Textract features to apply. If * you chose TEXTRACT_ANALYZE_DOCUMENT as the read * action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables * that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any * forms that are detected in the input document. *
*

*/ public void setFeatureTypes(java.util.Collection featureTypes) { if (featureTypes == null) { this.featureTypes = null; return; } this.featureTypes = new java.util.ArrayList(featureTypes); } /** *

* Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must * specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are * detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that * are detected in the input document. *
*

* Returns a reference to this object so that method calls can be chained * together. * * @param featureTypes

* Specifies the type of Amazon Textract features to apply. If * you chose TEXTRACT_ANALYZE_DOCUMENT as the read * action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables * that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any * forms that are detected in the input document. *
*

* @return A reference to this updated object so that method calls can be * chained together. */ public DocumentReaderConfig withFeatureTypes(String... featureTypes) { if (getFeatureTypes() == null) { this.featureTypes = new java.util.ArrayList(featureTypes.length); } for (String value : featureTypes) { this.featureTypes.add(value); } return this; } /** *

* Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must * specify one or both of the following values: *

*
* TABLES - Returns information about any tables that are * detected in the input document. *
*
*
* FORMS - Returns information and the data from any forms that * are detected in the input document. *
*

* Returns a reference to this object so that method calls can be chained * together. * * @param featureTypes

* Specifies the type of Amazon Textract features to apply. If * you chose TEXTRACT_ANALYZE_DOCUMENT as the read * action, you must specify one or both of the following values: *

*
* TABLES - Returns information about any tables * that are detected in the input document. *
*
*
* FORMS - Returns information and the data from any * forms that are detected in the input document. *
*

* @return A reference to this updated object so that method calls can be * chained together. */ public DocumentReaderConfig withFeatureTypes(java.util.Collection featureTypes) { setFeatureTypes(featureTypes); return this; } /** * Returns a string representation of this object; useful for testing and * debugging. * * @return A string representation of this object. * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("{"); if (getDocumentReadAction() != null) sb.append("DocumentReadAction: " + getDocumentReadAction() + ","); if (getDocumentReadMode() != null) sb.append("DocumentReadMode: " + getDocumentReadMode() + ","); if (getFeatureTypes() != null) sb.append("FeatureTypes: " + getFeatureTypes()); sb.append("}"); return sb.toString(); } @Override public int hashCode() { final int prime = 31; int hashCode = 1; hashCode = prime * hashCode + ((getDocumentReadAction() == null) ? 0 : getDocumentReadAction().hashCode()); hashCode = prime * hashCode + ((getDocumentReadMode() == null) ? 0 : getDocumentReadMode().hashCode()); hashCode = prime * hashCode + ((getFeatureTypes() == null) ? 0 : getFeatureTypes().hashCode()); return hashCode; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (obj instanceof DocumentReaderConfig == false) return false; DocumentReaderConfig other = (DocumentReaderConfig) obj; if (other.getDocumentReadAction() == null ^ this.getDocumentReadAction() == null) return false; if (other.getDocumentReadAction() != null && other.getDocumentReadAction().equals(this.getDocumentReadAction()) == false) return false; if (other.getDocumentReadMode() == null ^ this.getDocumentReadMode() == null) return false; if (other.getDocumentReadMode() != null && other.getDocumentReadMode().equals(this.getDocumentReadMode()) == false) return false; if (other.getFeatureTypes() == null ^ this.getFeatureTypes() == null) return false; if (other.getFeatureTypes() != null && other.getFeatureTypes().equals(this.getFeatureTypes()) == false) return false; return true; } }