/* * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with * the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions * and limitations under the License. */ package com.amazonaws.services.comprehend.model; import java.io.Serializable; import javax.annotation.Generated; import com.amazonaws.protocol.StructuredPojo; import com.amazonaws.protocol.ProtocolMarshaller; /** *
* The input properties for training a document classifier. *
** For more information on how the input file is formatted, see Preparing training data in the * Comprehend Developer Guide. *
* * @see AWS API Documentation */ @Generated("com.amazonaws:aws-java-sdk-code-generator") public class DocumentClassifierInputDataConfig implements Serializable, Cloneable, StructuredPojo { /** ** The format of your training data: *
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and documents
* are provided in the second. If you use this value, you must provide the S3Uri
parameter in your
* request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth. This file
* is in JSON lines format. Each line is a complete JSON object that contains a training document and its associated
* labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
*
* The Amazon S3 URI for the input data. The S3 bucket must be in the same Region as the API endpoint that you are * calling. The URI can point to a single input file or it can provide the prefix for a collection of input files. *
*
* For example, if you use the URI S3://bucketName/prefix
, if the prefix is a single file, Amazon
* Comprehend uses that file as input. If more than one file begins with the prefix, Amazon Comprehend uses all of
* them as input.
*
* This parameter is required if you set DataFormat
to COMPREHEND_CSV
.
*
* This specifies the Amazon S3 location where the test annotations for an entity recognizer are located. The URI * must be in the same Amazon Web Services Region as the API endpoint that you are calling. *
*/ private String testS3Uri; /** ** Indicates the delimiter used to separate each label for training a multi-label classifier. The default delimiter * between labels is a pipe (|). You can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a delimiter other than the default or the * delimiter you specify, the labels on that line will be combined to make a single unique label, such as * LABELLABELLABEL. *
*/ private String labelDelimiter; /** ** A list of augmented manifest files that provide training data for your custom model. An augmented manifest file * is a labeled dataset that is produced by Amazon SageMaker Ground Truth. *
*
* This parameter is required if you set DataFormat
to AUGMENTED_MANIFEST
.
*
* The type of input documents for training the model. Provide plain-text documents to create a plain-text model, * and provide semi-structured documents to create a native model. *
*/ private String documentType; /** ** The S3 location of the training documents. This parameter is required in a request to create a native classifier * model. *
*/ private DocumentClassifierDocuments documents; private DocumentReaderConfig documentReaderConfig; /** ** The format of your training data: *
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and documents
* are provided in the second. If you use this value, you must provide the S3Uri
parameter in your
* request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth. This file
* is in JSON lines format. Each line is a complete JSON object that contains a training document and its associated
* labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and
* documents are provided in the second. If you use this value, you must provide the S3Uri
* parameter in your request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth. This
* file is in JSON lines format. Each line is a complete JSON object that contains a training document and
* its associated labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
* @see DocumentClassifierDataFormat
*/
public void setDataFormat(String dataFormat) {
this.dataFormat = dataFormat;
}
/**
*
* The format of your training data: *
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and documents
* are provided in the second. If you use this value, you must provide the S3Uri
parameter in your
* request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth. This file
* is in JSON lines format. Each line is a complete JSON object that contains a training document and its associated
* labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and
* documents are provided in the second. If you use this value, you must provide the S3Uri
* parameter in your request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth.
* This file is in JSON lines format. Each line is a complete JSON object that contains a training document
* and its associated labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
* @see DocumentClassifierDataFormat
*/
public String getDataFormat() {
return this.dataFormat;
}
/**
*
* The format of your training data: *
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and documents
* are provided in the second. If you use this value, you must provide the S3Uri
parameter in your
* request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth. This file
* is in JSON lines format. Each line is a complete JSON object that contains a training document and its associated
* labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and
* documents are provided in the second. If you use this value, you must provide the S3Uri
* parameter in your request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth. This
* file is in JSON lines format. Each line is a complete JSON object that contains a training document and
* its associated labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentClassifierDataFormat
*/
public DocumentClassifierInputDataConfig withDataFormat(String dataFormat) {
setDataFormat(dataFormat);
return this;
}
/**
*
* The format of your training data: *
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and documents
* are provided in the second. If you use this value, you must provide the S3Uri
parameter in your
* request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth. This file
* is in JSON lines format. Each line is a complete JSON object that contains a training document and its associated
* labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
*
* COMPREHEND_CSV
: A two-column CSV file, where labels are provided in the first column, and
* documents are provided in the second. If you use this value, you must provide the S3Uri
* parameter in your request.
*
* AUGMENTED_MANIFEST
: A labeled dataset that is produced by Amazon SageMaker Ground Truth. This
* file is in JSON lines format. Each line is a complete JSON object that contains a training document and
* its associated labels.
*
* If you use this value, you must provide the AugmentedManifests
parameter in your request.
*
* If you don't specify a value, Amazon Comprehend uses COMPREHEND_CSV
as the default.
* @return Returns a reference to this object so that method calls can be chained together.
* @see DocumentClassifierDataFormat
*/
public DocumentClassifierInputDataConfig withDataFormat(DocumentClassifierDataFormat dataFormat) {
this.dataFormat = dataFormat.toString();
return this;
}
/**
*
* The Amazon S3 URI for the input data. The S3 bucket must be in the same Region as the API endpoint that you are * calling. The URI can point to a single input file or it can provide the prefix for a collection of input files. *
*
* For example, if you use the URI S3://bucketName/prefix
, if the prefix is a single file, Amazon
* Comprehend uses that file as input. If more than one file begins with the prefix, Amazon Comprehend uses all of
* them as input.
*
* This parameter is required if you set DataFormat
to COMPREHEND_CSV
.
*
* For example, if you use the URI S3://bucketName/prefix
, if the prefix is a single file,
* Amazon Comprehend uses that file as input. If more than one file begins with the prefix, Amazon Comprehend
* uses all of them as input.
*
* This parameter is required if you set DataFormat
to COMPREHEND_CSV
.
*/
public void setS3Uri(String s3Uri) {
this.s3Uri = s3Uri;
}
/**
*
* The Amazon S3 URI for the input data. The S3 bucket must be in the same Region as the API endpoint that you are * calling. The URI can point to a single input file or it can provide the prefix for a collection of input files. *
*
* For example, if you use the URI S3://bucketName/prefix
, if the prefix is a single file, Amazon
* Comprehend uses that file as input. If more than one file begins with the prefix, Amazon Comprehend uses all of
* them as input.
*
* This parameter is required if you set DataFormat
to COMPREHEND_CSV
.
*
* For example, if you use the URI S3://bucketName/prefix
, if the prefix is a single file,
* Amazon Comprehend uses that file as input. If more than one file begins with the prefix, Amazon
* Comprehend uses all of them as input.
*
* This parameter is required if you set DataFormat
to COMPREHEND_CSV
.
*/
public String getS3Uri() {
return this.s3Uri;
}
/**
*
* The Amazon S3 URI for the input data. The S3 bucket must be in the same Region as the API endpoint that you are * calling. The URI can point to a single input file or it can provide the prefix for a collection of input files. *
*
* For example, if you use the URI S3://bucketName/prefix
, if the prefix is a single file, Amazon
* Comprehend uses that file as input. If more than one file begins with the prefix, Amazon Comprehend uses all of
* them as input.
*
* This parameter is required if you set DataFormat
to COMPREHEND_CSV
.
*
* For example, if you use the URI S3://bucketName/prefix
, if the prefix is a single file,
* Amazon Comprehend uses that file as input. If more than one file begins with the prefix, Amazon Comprehend
* uses all of them as input.
*
* This parameter is required if you set DataFormat
to COMPREHEND_CSV
.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public DocumentClassifierInputDataConfig withS3Uri(String s3Uri) {
setS3Uri(s3Uri);
return this;
}
/**
*
* This specifies the Amazon S3 location where the test annotations for an entity recognizer are located. The URI * must be in the same Amazon Web Services Region as the API endpoint that you are calling. *
* * @param testS3Uri * This specifies the Amazon S3 location where the test annotations for an entity recognizer are located. The * URI must be in the same Amazon Web Services Region as the API endpoint that you are calling. */ public void setTestS3Uri(String testS3Uri) { this.testS3Uri = testS3Uri; } /** ** This specifies the Amazon S3 location where the test annotations for an entity recognizer are located. The URI * must be in the same Amazon Web Services Region as the API endpoint that you are calling. *
* * @return This specifies the Amazon S3 location where the test annotations for an entity recognizer are located. * The URI must be in the same Amazon Web Services Region as the API endpoint that you are calling. */ public String getTestS3Uri() { return this.testS3Uri; } /** ** This specifies the Amazon S3 location where the test annotations for an entity recognizer are located. The URI * must be in the same Amazon Web Services Region as the API endpoint that you are calling. *
* * @param testS3Uri * This specifies the Amazon S3 location where the test annotations for an entity recognizer are located. The * URI must be in the same Amazon Web Services Region as the API endpoint that you are calling. * @return Returns a reference to this object so that method calls can be chained together. */ public DocumentClassifierInputDataConfig withTestS3Uri(String testS3Uri) { setTestS3Uri(testS3Uri); return this; } /** ** Indicates the delimiter used to separate each label for training a multi-label classifier. The default delimiter * between labels is a pipe (|). You can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a delimiter other than the default or the * delimiter you specify, the labels on that line will be combined to make a single unique label, such as * LABELLABELLABEL. *
* * @param labelDelimiter * Indicates the delimiter used to separate each label for training a multi-label classifier. The default * delimiter between labels is a pipe (|). You can use a different character as a delimiter (if it's an * allowed character) by specifying it under Delimiter for labels. If the training documents use a delimiter * other than the default or the delimiter you specify, the labels on that line will be combined to make a * single unique label, such as LABELLABELLABEL. */ public void setLabelDelimiter(String labelDelimiter) { this.labelDelimiter = labelDelimiter; } /** ** Indicates the delimiter used to separate each label for training a multi-label classifier. The default delimiter * between labels is a pipe (|). You can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a delimiter other than the default or the * delimiter you specify, the labels on that line will be combined to make a single unique label, such as * LABELLABELLABEL. *
* * @return Indicates the delimiter used to separate each label for training a multi-label classifier. The default * delimiter between labels is a pipe (|). You can use a different character as a delimiter (if it's an * allowed character) by specifying it under Delimiter for labels. If the training documents use a delimiter * other than the default or the delimiter you specify, the labels on that line will be combined to make a * single unique label, such as LABELLABELLABEL. */ public String getLabelDelimiter() { return this.labelDelimiter; } /** ** Indicates the delimiter used to separate each label for training a multi-label classifier. The default delimiter * between labels is a pipe (|). You can use a different character as a delimiter (if it's an allowed character) by * specifying it under Delimiter for labels. If the training documents use a delimiter other than the default or the * delimiter you specify, the labels on that line will be combined to make a single unique label, such as * LABELLABELLABEL. *
* * @param labelDelimiter * Indicates the delimiter used to separate each label for training a multi-label classifier. The default * delimiter between labels is a pipe (|). You can use a different character as a delimiter (if it's an * allowed character) by specifying it under Delimiter for labels. If the training documents use a delimiter * other than the default or the delimiter you specify, the labels on that line will be combined to make a * single unique label, such as LABELLABELLABEL. * @return Returns a reference to this object so that method calls can be chained together. */ public DocumentClassifierInputDataConfig withLabelDelimiter(String labelDelimiter) { setLabelDelimiter(labelDelimiter); return this; } /** ** A list of augmented manifest files that provide training data for your custom model. An augmented manifest file * is a labeled dataset that is produced by Amazon SageMaker Ground Truth. *
*
* This parameter is required if you set DataFormat
to AUGMENTED_MANIFEST
.
*
* This parameter is required if you set
* A list of augmented manifest files that provide training data for your custom model. An augmented manifest file
* is a labeled dataset that is produced by Amazon SageMaker Ground Truth.
*
* This parameter is required if you set DataFormat
to AUGMENTED_MANIFEST
.
*/
public java.util.ListDataFormat
to AUGMENTED_MANIFEST
.
*
* This parameter is required if you set
* A list of augmented manifest files that provide training data for your custom model. An augmented manifest file
* is a labeled dataset that is produced by Amazon SageMaker Ground Truth.
*
* This parameter is required if you set
* NOTE: This method appends the values to the existing list (if any). Use
* {@link #setAugmentedManifests(java.util.Collection)} or {@link #withAugmentedManifests(java.util.Collection)} if
* you want to override the existing values.
* DataFormat
to AUGMENTED_MANIFEST
.
*/
public void setAugmentedManifests(java.util.CollectionDataFormat
to AUGMENTED_MANIFEST
.
*
* This parameter is required if you set
* A list of augmented manifest files that provide training data for your custom model. An augmented manifest file
* is a labeled dataset that is produced by Amazon SageMaker Ground Truth.
*
* This parameter is required if you set DataFormat
to AUGMENTED_MANIFEST
.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public DocumentClassifierInputDataConfig withAugmentedManifests(AugmentedManifestsListItem... augmentedManifests) {
if (this.augmentedManifests == null) {
setAugmentedManifests(new java.util.ArrayListDataFormat
to AUGMENTED_MANIFEST
.
*
* This parameter is required if you set
* The type of input documents for training the model. Provide plain-text documents to create a plain-text model,
* and provide semi-structured documents to create a native model.
*
* The type of input documents for training the model. Provide plain-text documents to create a plain-text model,
* and provide semi-structured documents to create a native model.
*
* The type of input documents for training the model. Provide plain-text documents to create a plain-text model,
* and provide semi-structured documents to create a native model.
*
* The type of input documents for training the model. Provide plain-text documents to create a plain-text model,
* and provide semi-structured documents to create a native model.
*
* The S3 location of the training documents. This parameter is required in a request to create a native classifier
* model.
*
* The S3 location of the training documents. This parameter is required in a request to create a native classifier
* model.
*
* The S3 location of the training documents. This parameter is required in a request to create a native classifier
* model.
* DataFormat
to AUGMENTED_MANIFEST
.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public DocumentClassifierInputDataConfig withAugmentedManifests(java.util.Collection