/** * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ #pragma once #include #include #include #include #include #include namespace Aws { namespace Utils { namespace Json { class JsonValue; class JsonView; } // namespace Json } // namespace Utils namespace Comprehend { namespace Model { /** *

Provides configuration parameters to override the default actions for * extracting text from PDF documents and image files.

By default, Amazon * Comprehend performs the following actions to extract text from files, based on * the input file type:

  • Word files - Amazon Comprehend * parser extracts the text.

  • Digital PDF files - Amazon * Comprehend parser extracts the text.

  • Image files and * scanned PDF files - Amazon Comprehend uses the Amazon Textract * DetectDocumentText API to extract the text.

* DocumentReaderConfig does not apply to plain text files or Word * files.

For image files and PDF documents, you can override these default * actions using the fields listed below. For more information, see * Setting text extraction options in the Comprehend Developer Guide. *

See Also:

AWS * API Reference

*/ class DocumentReaderConfig { public: AWS_COMPREHEND_API DocumentReaderConfig(); AWS_COMPREHEND_API DocumentReaderConfig(Aws::Utils::Json::JsonView jsonValue); AWS_COMPREHEND_API DocumentReaderConfig& operator=(Aws::Utils::Json::JsonView jsonValue); AWS_COMPREHEND_API Aws::Utils::Json::JsonValue Jsonize() const; /** *

This field defines the Amazon Textract API operation that Amazon Comprehend * uses to extract text from PDF files and image files. Enter one of the following * values:

  • TEXTRACT_DETECT_DOCUMENT_TEXT - The * Amazon Comprehend service uses the DetectDocumentText API * operation.

  • TEXTRACT_ANALYZE_DOCUMENT - The * Amazon Comprehend service uses the AnalyzeDocument API operation. *

*/ inline const DocumentReadAction& GetDocumentReadAction() const{ return m_documentReadAction; } /** *

This field defines the Amazon Textract API operation that Amazon Comprehend * uses to extract text from PDF files and image files. Enter one of the following * values:

  • TEXTRACT_DETECT_DOCUMENT_TEXT - The * Amazon Comprehend service uses the DetectDocumentText API * operation.

  • TEXTRACT_ANALYZE_DOCUMENT - The * Amazon Comprehend service uses the AnalyzeDocument API operation. *

*/ inline bool DocumentReadActionHasBeenSet() const { return m_documentReadActionHasBeenSet; } /** *

This field defines the Amazon Textract API operation that Amazon Comprehend * uses to extract text from PDF files and image files. Enter one of the following * values:

  • TEXTRACT_DETECT_DOCUMENT_TEXT - The * Amazon Comprehend service uses the DetectDocumentText API * operation.

  • TEXTRACT_ANALYZE_DOCUMENT - The * Amazon Comprehend service uses the AnalyzeDocument API operation. *

*/ inline void SetDocumentReadAction(const DocumentReadAction& value) { m_documentReadActionHasBeenSet = true; m_documentReadAction = value; } /** *

This field defines the Amazon Textract API operation that Amazon Comprehend * uses to extract text from PDF files and image files. Enter one of the following * values:

  • TEXTRACT_DETECT_DOCUMENT_TEXT - The * Amazon Comprehend service uses the DetectDocumentText API * operation.

  • TEXTRACT_ANALYZE_DOCUMENT - The * Amazon Comprehend service uses the AnalyzeDocument API operation. *

*/ inline void SetDocumentReadAction(DocumentReadAction&& value) { m_documentReadActionHasBeenSet = true; m_documentReadAction = std::move(value); } /** *

This field defines the Amazon Textract API operation that Amazon Comprehend * uses to extract text from PDF files and image files. Enter one of the following * values:

  • TEXTRACT_DETECT_DOCUMENT_TEXT - The * Amazon Comprehend service uses the DetectDocumentText API * operation.

  • TEXTRACT_ANALYZE_DOCUMENT - The * Amazon Comprehend service uses the AnalyzeDocument API operation. *

*/ inline DocumentReaderConfig& WithDocumentReadAction(const DocumentReadAction& value) { SetDocumentReadAction(value); return *this;} /** *

This field defines the Amazon Textract API operation that Amazon Comprehend * uses to extract text from PDF files and image files. Enter one of the following * values:

  • TEXTRACT_DETECT_DOCUMENT_TEXT - The * Amazon Comprehend service uses the DetectDocumentText API * operation.

  • TEXTRACT_ANALYZE_DOCUMENT - The * Amazon Comprehend service uses the AnalyzeDocument API operation. *

*/ inline DocumentReaderConfig& WithDocumentReadAction(DocumentReadAction&& value) { SetDocumentReadAction(std::move(value)); return *this;} /** *

Determines the text extraction actions for PDF files. Enter one of the * following values:

  • SERVICE_DEFAULT - use the * Amazon Comprehend service defaults for PDF files.

  • * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract * API specified by DocumentReadAction for all PDF files, including digital PDF * files.

*/ inline const DocumentReadMode& GetDocumentReadMode() const{ return m_documentReadMode; } /** *

Determines the text extraction actions for PDF files. Enter one of the * following values:

  • SERVICE_DEFAULT - use the * Amazon Comprehend service defaults for PDF files.

  • * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract * API specified by DocumentReadAction for all PDF files, including digital PDF * files.

*/ inline bool DocumentReadModeHasBeenSet() const { return m_documentReadModeHasBeenSet; } /** *

Determines the text extraction actions for PDF files. Enter one of the * following values:

  • SERVICE_DEFAULT - use the * Amazon Comprehend service defaults for PDF files.

  • * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract * API specified by DocumentReadAction for all PDF files, including digital PDF * files.

*/ inline void SetDocumentReadMode(const DocumentReadMode& value) { m_documentReadModeHasBeenSet = true; m_documentReadMode = value; } /** *

Determines the text extraction actions for PDF files. Enter one of the * following values:

  • SERVICE_DEFAULT - use the * Amazon Comprehend service defaults for PDF files.

  • * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract * API specified by DocumentReadAction for all PDF files, including digital PDF * files.

*/ inline void SetDocumentReadMode(DocumentReadMode&& value) { m_documentReadModeHasBeenSet = true; m_documentReadMode = std::move(value); } /** *

Determines the text extraction actions for PDF files. Enter one of the * following values:

  • SERVICE_DEFAULT - use the * Amazon Comprehend service defaults for PDF files.

  • * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract * API specified by DocumentReadAction for all PDF files, including digital PDF * files.

*/ inline DocumentReaderConfig& WithDocumentReadMode(const DocumentReadMode& value) { SetDocumentReadMode(value); return *this;} /** *

Determines the text extraction actions for PDF files. Enter one of the * following values:

  • SERVICE_DEFAULT - use the * Amazon Comprehend service defaults for PDF files.

  • * FORCE_DOCUMENT_READ_ACTION - Amazon Comprehend uses the Textract * API specified by DocumentReadAction for all PDF files, including digital PDF * files.

*/ inline DocumentReaderConfig& WithDocumentReadMode(DocumentReadMode&& value) { SetDocumentReadMode(std::move(value)); return *this;} /** *

Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one * or both of the following values:

  • TABLES - Returns * information about any tables that are detected in the input document.

  • *
  • FORMS - Returns information and the data from any forms * that are detected in the input document.

*/ inline const Aws::Vector& GetFeatureTypes() const{ return m_featureTypes; } /** *

Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one * or both of the following values:

  • TABLES - Returns * information about any tables that are detected in the input document.

  • *
  • FORMS - Returns information and the data from any forms * that are detected in the input document.

*/ inline bool FeatureTypesHasBeenSet() const { return m_featureTypesHasBeenSet; } /** *

Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one * or both of the following values:

  • TABLES - Returns * information about any tables that are detected in the input document.

  • *
  • FORMS - Returns information and the data from any forms * that are detected in the input document.

*/ inline void SetFeatureTypes(const Aws::Vector& value) { m_featureTypesHasBeenSet = true; m_featureTypes = value; } /** *

Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one * or both of the following values:

  • TABLES - Returns * information about any tables that are detected in the input document.

  • *
  • FORMS - Returns information and the data from any forms * that are detected in the input document.

*/ inline void SetFeatureTypes(Aws::Vector&& value) { m_featureTypesHasBeenSet = true; m_featureTypes = std::move(value); } /** *

Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one * or both of the following values:

  • TABLES - Returns * information about any tables that are detected in the input document.

  • *
  • FORMS - Returns information and the data from any forms * that are detected in the input document.

*/ inline DocumentReaderConfig& WithFeatureTypes(const Aws::Vector& value) { SetFeatureTypes(value); return *this;} /** *

Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one * or both of the following values:

  • TABLES - Returns * information about any tables that are detected in the input document.

  • *
  • FORMS - Returns information and the data from any forms * that are detected in the input document.

*/ inline DocumentReaderConfig& WithFeatureTypes(Aws::Vector&& value) { SetFeatureTypes(std::move(value)); return *this;} /** *

Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one * or both of the following values:

  • TABLES - Returns * information about any tables that are detected in the input document.

  • *
  • FORMS - Returns information and the data from any forms * that are detected in the input document.

*/ inline DocumentReaderConfig& AddFeatureTypes(const DocumentReadFeatureTypes& value) { m_featureTypesHasBeenSet = true; m_featureTypes.push_back(value); return *this; } /** *

Specifies the type of Amazon Textract features to apply. If you chose * TEXTRACT_ANALYZE_DOCUMENT as the read action, you must specify one * or both of the following values:

  • TABLES - Returns * information about any tables that are detected in the input document.

  • *
  • FORMS - Returns information and the data from any forms * that are detected in the input document.

*/ inline DocumentReaderConfig& AddFeatureTypes(DocumentReadFeatureTypes&& value) { m_featureTypesHasBeenSet = true; m_featureTypes.push_back(std::move(value)); return *this; } private: DocumentReadAction m_documentReadAction; bool m_documentReadActionHasBeenSet = false; DocumentReadMode m_documentReadMode; bool m_documentReadModeHasBeenSet = false; Aws::Vector m_featureTypes; bool m_featureTypesHasBeenSet = false; }; } // namespace Model } // namespace Comprehend } // namespace Aws