package awscomprehend


// Provides configuration parameters to override the default actions for extracting text from PDF documents and image files.
//
// By default, Amazon Comprehend performs the following actions to extract text from files, based on the input file type:
//
// - *Word files* - Amazon Comprehend parser extracts the text.
// - *Digital PDF files* - Amazon Comprehend parser extracts the text.
// - *Image files and scanned PDF files* - Amazon Comprehend uses the Amazon Textract `DetectDocumentText` API to extract the text.
//
// `DocumentReaderConfig` does not apply to plain text files or Word files.
//
// For image files and PDF documents, you can override these default actions using the fields listed below. For more information, see [Setting text extraction options](https://docs.aws.amazon.com/comprehend/latest/dg/idp-set-textract-options.html) in the Comprehend Developer Guide.
//
// Example:
//   // The code below shows an example of how to instantiate this type.
//   // The values are placeholders you should change.
//   import "github.com/aws/aws-cdk-go/awscdk"
//
//   documentReaderConfigProperty := &DocumentReaderConfigProperty{
//   	DocumentReadAction: jsii.String("documentReadAction"),
//
//   	// the properties below are optional
//   	DocumentReadMode: jsii.String("documentReadMode"),
//   	FeatureTypes: []*string{
//   		jsii.String("featureTypes"),
//   	},
//   }
//
// See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-comprehend-documentclassifier-documentreaderconfig.html
//
type CfnDocumentClassifier_DocumentReaderConfigProperty struct {
	// This field defines the Amazon Textract API operation that Amazon Comprehend uses to extract text from PDF files and image files.
	//
	// Enter one of the following values:
	//
	// - `TEXTRACT_DETECT_DOCUMENT_TEXT` - The Amazon Comprehend service uses the `DetectDocumentText` API operation.
	// - `TEXTRACT_ANALYZE_DOCUMENT` - The Amazon Comprehend service uses the `AnalyzeDocument` API operation.
	// See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-comprehend-documentclassifier-documentreaderconfig.html#cfn-comprehend-documentclassifier-documentreaderconfig-documentreadaction
	//
	DocumentReadAction *string `field:"required" json:"documentReadAction" yaml:"documentReadAction"`
	// Determines the text extraction actions for PDF files. Enter one of the following values:.
	//
	// - `SERVICE_DEFAULT` - use the Amazon Comprehend service defaults for PDF files.
	// - `FORCE_DOCUMENT_READ_ACTION` - Amazon Comprehend uses the Textract API specified by DocumentReadAction for all PDF files, including digital PDF files.
	// See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-comprehend-documentclassifier-documentreaderconfig.html#cfn-comprehend-documentclassifier-documentreaderconfig-documentreadmode
	//
	DocumentReadMode *string `field:"optional" json:"documentReadMode" yaml:"documentReadMode"`
	// Specifies the type of Amazon Textract features to apply.
	//
	// If you chose `TEXTRACT_ANALYZE_DOCUMENT` as the read action, you must specify one or both of the following values:
	//
	// - `TABLES` - Returns information about any tables that are detected in the input document.
	// - `FORMS` - Returns information and the data from any forms that are detected in the input document.
	// See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-comprehend-documentclassifier-documentreaderconfig.html#cfn-comprehend-documentclassifier-documentreaderconfig-featuretypes
	//
	FeatureTypes *[]*string `field:"optional" json:"featureTypes" yaml:"featureTypes"`
}