/** * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ #pragma once #include #include #include #include #include #include #include #include #include namespace Aws { namespace Utils { namespace Json { class JsonValue; class JsonView; } // namespace Json } // namespace Utils namespace Comprehend { namespace Model { /** *

The input properties for training a document classifier.

For more * information on how the input file is formatted, see Preparing * training data in the Comprehend Developer Guide.

See Also:

* AWS * API Reference

*/ class DocumentClassifierInputDataConfig { public: AWS_COMPREHEND_API DocumentClassifierInputDataConfig(); AWS_COMPREHEND_API DocumentClassifierInputDataConfig(Aws::Utils::Json::JsonView jsonValue); AWS_COMPREHEND_API DocumentClassifierInputDataConfig& operator=(Aws::Utils::Json::JsonView jsonValue); AWS_COMPREHEND_API Aws::Utils::Json::JsonValue Jsonize() const; /** *

The format of your training data:

  • * COMPREHEND_CSV: A two-column CSV file, where labels are provided in * the first column, and documents are provided in the second. If you use this * value, you must provide the S3Uri parameter in your request.

    *
  • AUGMENTED_MANIFEST: A labeled dataset that is * produced by Amazon SageMaker Ground Truth. This file is in JSON lines format. * Each line is a complete JSON object that contains a training document and its * associated labels.

    If you use this value, you must provide the * AugmentedManifests parameter in your request.

If * you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV as * the default.

*/ inline const DocumentClassifierDataFormat& GetDataFormat() const{ return m_dataFormat; } /** *

The format of your training data:

  • * COMPREHEND_CSV: A two-column CSV file, where labels are provided in * the first column, and documents are provided in the second. If you use this * value, you must provide the S3Uri parameter in your request.

    *
  • AUGMENTED_MANIFEST: A labeled dataset that is * produced by Amazon SageMaker Ground Truth. This file is in JSON lines format. * Each line is a complete JSON object that contains a training document and its * associated labels.

    If you use this value, you must provide the * AugmentedManifests parameter in your request.

If * you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV as * the default.

*/ inline bool DataFormatHasBeenSet() const { return m_dataFormatHasBeenSet; } /** *

The format of your training data:

  • * COMPREHEND_CSV: A two-column CSV file, where labels are provided in * the first column, and documents are provided in the second. If you use this * value, you must provide the S3Uri parameter in your request.

    *
  • AUGMENTED_MANIFEST: A labeled dataset that is * produced by Amazon SageMaker Ground Truth. This file is in JSON lines format. * Each line is a complete JSON object that contains a training document and its * associated labels.

    If you use this value, you must provide the * AugmentedManifests parameter in your request.

If * you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV as * the default.

*/ inline void SetDataFormat(const DocumentClassifierDataFormat& value) { m_dataFormatHasBeenSet = true; m_dataFormat = value; } /** *

The format of your training data:

  • * COMPREHEND_CSV: A two-column CSV file, where labels are provided in * the first column, and documents are provided in the second. If you use this * value, you must provide the S3Uri parameter in your request.

    *
  • AUGMENTED_MANIFEST: A labeled dataset that is * produced by Amazon SageMaker Ground Truth. This file is in JSON lines format. * Each line is a complete JSON object that contains a training document and its * associated labels.

    If you use this value, you must provide the * AugmentedManifests parameter in your request.

If * you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV as * the default.

*/ inline void SetDataFormat(DocumentClassifierDataFormat&& value) { m_dataFormatHasBeenSet = true; m_dataFormat = std::move(value); } /** *

The format of your training data:

  • * COMPREHEND_CSV: A two-column CSV file, where labels are provided in * the first column, and documents are provided in the second. If you use this * value, you must provide the S3Uri parameter in your request.

    *
  • AUGMENTED_MANIFEST: A labeled dataset that is * produced by Amazon SageMaker Ground Truth. This file is in JSON lines format. * Each line is a complete JSON object that contains a training document and its * associated labels.

    If you use this value, you must provide the * AugmentedManifests parameter in your request.

If * you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV as * the default.

*/ inline DocumentClassifierInputDataConfig& WithDataFormat(const DocumentClassifierDataFormat& value) { SetDataFormat(value); return *this;} /** *

The format of your training data:

  • * COMPREHEND_CSV: A two-column CSV file, where labels are provided in * the first column, and documents are provided in the second. If you use this * value, you must provide the S3Uri parameter in your request.

    *
  • AUGMENTED_MANIFEST: A labeled dataset that is * produced by Amazon SageMaker Ground Truth. This file is in JSON lines format. * Each line is a complete JSON object that contains a training document and its * associated labels.

    If you use this value, you must provide the * AugmentedManifests parameter in your request.

If * you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV as * the default.

*/ inline DocumentClassifierInputDataConfig& WithDataFormat(DocumentClassifierDataFormat&& value) { SetDataFormat(std::move(value)); return *this;} /** *

The Amazon S3 URI for the input data. The S3 bucket must be in the same * Region as the API endpoint that you are calling. The URI can point to a single * input file or it can provide the prefix for a collection of input files.

*

For example, if you use the URI S3://bucketName/prefix, if the * prefix is a single file, Amazon Comprehend uses that file as input. If more than * one file begins with the prefix, Amazon Comprehend uses all of them as * input.

This parameter is required if you set DataFormat to * COMPREHEND_CSV.

*/ inline const Aws::String& GetS3Uri() const{ return m_s3Uri; } /** *

The Amazon S3 URI for the input data. The S3 bucket must be in the same * Region as the API endpoint that you are calling. The URI can point to a single * input file or it can provide the prefix for a collection of input files.

*

For example, if you use the URI S3://bucketName/prefix, if the * prefix is a single file, Amazon Comprehend uses that file as input. If more than * one file begins with the prefix, Amazon Comprehend uses all of them as * input.

This parameter is required if you set DataFormat to * COMPREHEND_CSV.

*/ inline bool S3UriHasBeenSet() const { return m_s3UriHasBeenSet; } /** *

The Amazon S3 URI for the input data. The S3 bucket must be in the same * Region as the API endpoint that you are calling. The URI can point to a single * input file or it can provide the prefix for a collection of input files.

*

For example, if you use the URI S3://bucketName/prefix, if the * prefix is a single file, Amazon Comprehend uses that file as input. If more than * one file begins with the prefix, Amazon Comprehend uses all of them as * input.

This parameter is required if you set DataFormat to * COMPREHEND_CSV.

*/ inline void SetS3Uri(const Aws::String& value) { m_s3UriHasBeenSet = true; m_s3Uri = value; } /** *

The Amazon S3 URI for the input data. The S3 bucket must be in the same * Region as the API endpoint that you are calling. The URI can point to a single * input file or it can provide the prefix for a collection of input files.

*

For example, if you use the URI S3://bucketName/prefix, if the * prefix is a single file, Amazon Comprehend uses that file as input. If more than * one file begins with the prefix, Amazon Comprehend uses all of them as * input.

This parameter is required if you set DataFormat to * COMPREHEND_CSV.

*/ inline void SetS3Uri(Aws::String&& value) { m_s3UriHasBeenSet = true; m_s3Uri = std::move(value); } /** *

The Amazon S3 URI for the input data. The S3 bucket must be in the same * Region as the API endpoint that you are calling. The URI can point to a single * input file or it can provide the prefix for a collection of input files.

*

For example, if you use the URI S3://bucketName/prefix, if the * prefix is a single file, Amazon Comprehend uses that file as input. If more than * one file begins with the prefix, Amazon Comprehend uses all of them as * input.

This parameter is required if you set DataFormat to * COMPREHEND_CSV.

*/ inline void SetS3Uri(const char* value) { m_s3UriHasBeenSet = true; m_s3Uri.assign(value); } /** *

The Amazon S3 URI for the input data. The S3 bucket must be in the same * Region as the API endpoint that you are calling. The URI can point to a single * input file or it can provide the prefix for a collection of input files.

*

For example, if you use the URI S3://bucketName/prefix, if the * prefix is a single file, Amazon Comprehend uses that file as input. If more than * one file begins with the prefix, Amazon Comprehend uses all of them as * input.

This parameter is required if you set DataFormat to * COMPREHEND_CSV.

*/ inline DocumentClassifierInputDataConfig& WithS3Uri(const Aws::String& value) { SetS3Uri(value); return *this;} /** *

The Amazon S3 URI for the input data. The S3 bucket must be in the same * Region as the API endpoint that you are calling. The URI can point to a single * input file or it can provide the prefix for a collection of input files.

*

For example, if you use the URI S3://bucketName/prefix, if the * prefix is a single file, Amazon Comprehend uses that file as input. If more than * one file begins with the prefix, Amazon Comprehend uses all of them as * input.

This parameter is required if you set DataFormat to * COMPREHEND_CSV.

*/ inline DocumentClassifierInputDataConfig& WithS3Uri(Aws::String&& value) { SetS3Uri(std::move(value)); return *this;} /** *

The Amazon S3 URI for the input data. The S3 bucket must be in the same * Region as the API endpoint that you are calling. The URI can point to a single * input file or it can provide the prefix for a collection of input files.

*

For example, if you use the URI S3://bucketName/prefix, if the * prefix is a single file, Amazon Comprehend uses that file as input. If more than * one file begins with the prefix, Amazon Comprehend uses all of them as * input.

This parameter is required if you set DataFormat to * COMPREHEND_CSV.

*/ inline DocumentClassifierInputDataConfig& WithS3Uri(const char* value) { SetS3Uri(value); return *this;} /** *

This specifies the Amazon S3 location where the test annotations for an * entity recognizer are located. The URI must be in the same Amazon Web Services * Region as the API endpoint that you are calling.

*/ inline const Aws::String& GetTestS3Uri() const{ return m_testS3Uri; } /** *

This specifies the Amazon S3 location where the test annotations for an * entity recognizer are located. The URI must be in the same Amazon Web Services * Region as the API endpoint that you are calling.

*/ inline bool TestS3UriHasBeenSet() const { return m_testS3UriHasBeenSet; } /** *

This specifies the Amazon S3 location where the test annotations for an * entity recognizer are located. The URI must be in the same Amazon Web Services * Region as the API endpoint that you are calling.

*/ inline void SetTestS3Uri(const Aws::String& value) { m_testS3UriHasBeenSet = true; m_testS3Uri = value; } /** *

This specifies the Amazon S3 location where the test annotations for an * entity recognizer are located. The URI must be in the same Amazon Web Services * Region as the API endpoint that you are calling.

*/ inline void SetTestS3Uri(Aws::String&& value) { m_testS3UriHasBeenSet = true; m_testS3Uri = std::move(value); } /** *

This specifies the Amazon S3 location where the test annotations for an * entity recognizer are located. The URI must be in the same Amazon Web Services * Region as the API endpoint that you are calling.

*/ inline void SetTestS3Uri(const char* value) { m_testS3UriHasBeenSet = true; m_testS3Uri.assign(value); } /** *

This specifies the Amazon S3 location where the test annotations for an * entity recognizer are located. The URI must be in the same Amazon Web Services * Region as the API endpoint that you are calling.

*/ inline DocumentClassifierInputDataConfig& WithTestS3Uri(const Aws::String& value) { SetTestS3Uri(value); return *this;} /** *

This specifies the Amazon S3 location where the test annotations for an * entity recognizer are located. The URI must be in the same Amazon Web Services * Region as the API endpoint that you are calling.

*/ inline DocumentClassifierInputDataConfig& WithTestS3Uri(Aws::String&& value) { SetTestS3Uri(std::move(value)); return *this;} /** *

This specifies the Amazon S3 location where the test annotations for an * entity recognizer are located. The URI must be in the same Amazon Web Services * Region as the API endpoint that you are calling.

*/ inline DocumentClassifierInputDataConfig& WithTestS3Uri(const char* value) { SetTestS3Uri(value); return *this;} /** *

Indicates the delimiter used to separate each label for training a * multi-label classifier. The default delimiter between labels is a pipe (|). You * can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a * delimiter other than the default or the delimiter you specify, the labels on * that line will be combined to make a single unique label, such as * LABELLABELLABEL.

*/ inline const Aws::String& GetLabelDelimiter() const{ return m_labelDelimiter; } /** *

Indicates the delimiter used to separate each label for training a * multi-label classifier. The default delimiter between labels is a pipe (|). You * can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a * delimiter other than the default or the delimiter you specify, the labels on * that line will be combined to make a single unique label, such as * LABELLABELLABEL.

*/ inline bool LabelDelimiterHasBeenSet() const { return m_labelDelimiterHasBeenSet; } /** *

Indicates the delimiter used to separate each label for training a * multi-label classifier. The default delimiter between labels is a pipe (|). You * can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a * delimiter other than the default or the delimiter you specify, the labels on * that line will be combined to make a single unique label, such as * LABELLABELLABEL.

*/ inline void SetLabelDelimiter(const Aws::String& value) { m_labelDelimiterHasBeenSet = true; m_labelDelimiter = value; } /** *

Indicates the delimiter used to separate each label for training a * multi-label classifier. The default delimiter between labels is a pipe (|). You * can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a * delimiter other than the default or the delimiter you specify, the labels on * that line will be combined to make a single unique label, such as * LABELLABELLABEL.

*/ inline void SetLabelDelimiter(Aws::String&& value) { m_labelDelimiterHasBeenSet = true; m_labelDelimiter = std::move(value); } /** *

Indicates the delimiter used to separate each label for training a * multi-label classifier. The default delimiter between labels is a pipe (|). You * can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a * delimiter other than the default or the delimiter you specify, the labels on * that line will be combined to make a single unique label, such as * LABELLABELLABEL.

*/ inline void SetLabelDelimiter(const char* value) { m_labelDelimiterHasBeenSet = true; m_labelDelimiter.assign(value); } /** *

Indicates the delimiter used to separate each label for training a * multi-label classifier. The default delimiter between labels is a pipe (|). You * can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a * delimiter other than the default or the delimiter you specify, the labels on * that line will be combined to make a single unique label, such as * LABELLABELLABEL.

*/ inline DocumentClassifierInputDataConfig& WithLabelDelimiter(const Aws::String& value) { SetLabelDelimiter(value); return *this;} /** *

Indicates the delimiter used to separate each label for training a * multi-label classifier. The default delimiter between labels is a pipe (|). You * can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a * delimiter other than the default or the delimiter you specify, the labels on * that line will be combined to make a single unique label, such as * LABELLABELLABEL.

*/ inline DocumentClassifierInputDataConfig& WithLabelDelimiter(Aws::String&& value) { SetLabelDelimiter(std::move(value)); return *this;} /** *

Indicates the delimiter used to separate each label for training a * multi-label classifier. The default delimiter between labels is a pipe (|). You * can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a * delimiter other than the default or the delimiter you specify, the labels on * that line will be combined to make a single unique label, such as * LABELLABELLABEL.

*/ inline DocumentClassifierInputDataConfig& WithLabelDelimiter(const char* value) { SetLabelDelimiter(value); return *this;} /** *

A list of augmented manifest files that provide training data for your custom * model. An augmented manifest file is a labeled dataset that is produced by * Amazon SageMaker Ground Truth.

This parameter is required if you set * DataFormat to AUGMENTED_MANIFEST.

*/ inline const Aws::Vector& GetAugmentedManifests() const{ return m_augmentedManifests; } /** *

A list of augmented manifest files that provide training data for your custom * model. An augmented manifest file is a labeled dataset that is produced by * Amazon SageMaker Ground Truth.

This parameter is required if you set * DataFormat to AUGMENTED_MANIFEST.

*/ inline bool AugmentedManifestsHasBeenSet() const { return m_augmentedManifestsHasBeenSet; } /** *

A list of augmented manifest files that provide training data for your custom * model. An augmented manifest file is a labeled dataset that is produced by * Amazon SageMaker Ground Truth.

This parameter is required if you set * DataFormat to AUGMENTED_MANIFEST.

*/ inline void SetAugmentedManifests(const Aws::Vector& value) { m_augmentedManifestsHasBeenSet = true; m_augmentedManifests = value; } /** *

A list of augmented manifest files that provide training data for your custom * model. An augmented manifest file is a labeled dataset that is produced by * Amazon SageMaker Ground Truth.

This parameter is required if you set * DataFormat to AUGMENTED_MANIFEST.

*/ inline void SetAugmentedManifests(Aws::Vector&& value) { m_augmentedManifestsHasBeenSet = true; m_augmentedManifests = std::move(value); } /** *

A list of augmented manifest files that provide training data for your custom * model. An augmented manifest file is a labeled dataset that is produced by * Amazon SageMaker Ground Truth.

This parameter is required if you set * DataFormat to AUGMENTED_MANIFEST.

*/ inline DocumentClassifierInputDataConfig& WithAugmentedManifests(const Aws::Vector& value) { SetAugmentedManifests(value); return *this;} /** *

A list of augmented manifest files that provide training data for your custom * model. An augmented manifest file is a labeled dataset that is produced by * Amazon SageMaker Ground Truth.

This parameter is required if you set * DataFormat to AUGMENTED_MANIFEST.

*/ inline DocumentClassifierInputDataConfig& WithAugmentedManifests(Aws::Vector&& value) { SetAugmentedManifests(std::move(value)); return *this;} /** *

A list of augmented manifest files that provide training data for your custom * model. An augmented manifest file is a labeled dataset that is produced by * Amazon SageMaker Ground Truth.

This parameter is required if you set * DataFormat to AUGMENTED_MANIFEST.

*/ inline DocumentClassifierInputDataConfig& AddAugmentedManifests(const AugmentedManifestsListItem& value) { m_augmentedManifestsHasBeenSet = true; m_augmentedManifests.push_back(value); return *this; } /** *

A list of augmented manifest files that provide training data for your custom * model. An augmented manifest file is a labeled dataset that is produced by * Amazon SageMaker Ground Truth.

This parameter is required if you set * DataFormat to AUGMENTED_MANIFEST.

*/ inline DocumentClassifierInputDataConfig& AddAugmentedManifests(AugmentedManifestsListItem&& value) { m_augmentedManifestsHasBeenSet = true; m_augmentedManifests.push_back(std::move(value)); return *this; } /** *

The type of input documents for training the model. Provide plain-text * documents to create a plain-text model, and provide semi-structured documents to * create a native model.

*/ inline const DocumentClassifierDocumentTypeFormat& GetDocumentType() const{ return m_documentType; } /** *

The type of input documents for training the model. Provide plain-text * documents to create a plain-text model, and provide semi-structured documents to * create a native model.

*/ inline bool DocumentTypeHasBeenSet() const { return m_documentTypeHasBeenSet; } /** *

The type of input documents for training the model. Provide plain-text * documents to create a plain-text model, and provide semi-structured documents to * create a native model.

*/ inline void SetDocumentType(const DocumentClassifierDocumentTypeFormat& value) { m_documentTypeHasBeenSet = true; m_documentType = value; } /** *

The type of input documents for training the model. Provide plain-text * documents to create a plain-text model, and provide semi-structured documents to * create a native model.

*/ inline void SetDocumentType(DocumentClassifierDocumentTypeFormat&& value) { m_documentTypeHasBeenSet = true; m_documentType = std::move(value); } /** *

The type of input documents for training the model. Provide plain-text * documents to create a plain-text model, and provide semi-structured documents to * create a native model.

*/ inline DocumentClassifierInputDataConfig& WithDocumentType(const DocumentClassifierDocumentTypeFormat& value) { SetDocumentType(value); return *this;} /** *

The type of input documents for training the model. Provide plain-text * documents to create a plain-text model, and provide semi-structured documents to * create a native model.

*/ inline DocumentClassifierInputDataConfig& WithDocumentType(DocumentClassifierDocumentTypeFormat&& value) { SetDocumentType(std::move(value)); return *this;} /** *

The S3 location of the training documents. This parameter is required in a * request to create a native classifier model.

*/ inline const DocumentClassifierDocuments& GetDocuments() const{ return m_documents; } /** *

The S3 location of the training documents. This parameter is required in a * request to create a native classifier model.

*/ inline bool DocumentsHasBeenSet() const { return m_documentsHasBeenSet; } /** *

The S3 location of the training documents. This parameter is required in a * request to create a native classifier model.

*/ inline void SetDocuments(const DocumentClassifierDocuments& value) { m_documentsHasBeenSet = true; m_documents = value; } /** *

The S3 location of the training documents. This parameter is required in a * request to create a native classifier model.

*/ inline void SetDocuments(DocumentClassifierDocuments&& value) { m_documentsHasBeenSet = true; m_documents = std::move(value); } /** *

The S3 location of the training documents. This parameter is required in a * request to create a native classifier model.

*/ inline DocumentClassifierInputDataConfig& WithDocuments(const DocumentClassifierDocuments& value) { SetDocuments(value); return *this;} /** *

The S3 location of the training documents. This parameter is required in a * request to create a native classifier model.

*/ inline DocumentClassifierInputDataConfig& WithDocuments(DocumentClassifierDocuments&& value) { SetDocuments(std::move(value)); return *this;} inline const DocumentReaderConfig& GetDocumentReaderConfig() const{ return m_documentReaderConfig; } inline bool DocumentReaderConfigHasBeenSet() const { return m_documentReaderConfigHasBeenSet; } inline void SetDocumentReaderConfig(const DocumentReaderConfig& value) { m_documentReaderConfigHasBeenSet = true; m_documentReaderConfig = value; } inline void SetDocumentReaderConfig(DocumentReaderConfig&& value) { m_documentReaderConfigHasBeenSet = true; m_documentReaderConfig = std::move(value); } inline DocumentClassifierInputDataConfig& WithDocumentReaderConfig(const DocumentReaderConfig& value) { SetDocumentReaderConfig(value); return *this;} inline DocumentClassifierInputDataConfig& WithDocumentReaderConfig(DocumentReaderConfig&& value) { SetDocumentReaderConfig(std::move(value)); return *this;} private: DocumentClassifierDataFormat m_dataFormat; bool m_dataFormatHasBeenSet = false; Aws::String m_s3Uri; bool m_s3UriHasBeenSet = false; Aws::String m_testS3Uri; bool m_testS3UriHasBeenSet = false; Aws::String m_labelDelimiter; bool m_labelDelimiterHasBeenSet = false; Aws::Vector m_augmentedManifests; bool m_augmentedManifestsHasBeenSet = false; DocumentClassifierDocumentTypeFormat m_documentType; bool m_documentTypeHasBeenSet = false; DocumentClassifierDocuments m_documents; bool m_documentsHasBeenSet = false; DocumentReaderConfig m_documentReaderConfig; bool m_documentReaderConfigHasBeenSet = false; }; } // namespace Model } // namespace Comprehend } // namespace Aws