/* * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with * the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions * and limitations under the License. */ package com.amazonaws.services.sagemaker.model; import java.io.Serializable; import javax.annotation.Generated; import com.amazonaws.protocol.StructuredPojo; import com.amazonaws.protocol.ProtocolMarshaller; /** *

* Describes the S3 data source. *

*

* Your input bucket must be in the same Amazon Web Services region as your training job. *

* * @see AWS API * Documentation */ @Generated("com.amazonaws:aws-java-sdk-code-generator") public class S3DataSource implements Serializable, Cloneable, StructuredPojo { /** *

* If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all objects * that match the specified key name prefix for model training. *

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file * in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. *

*/ private String s3DataType; /** *

* Depending on the value specified for the S3DataType, identifies either a key name prefix or a * manifest. For example: *

* *

* Your input bucket must be located in same Amazon Web Services region as your training job. *

*/ private String s3Uri; /** *

* If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for model * training, specify FullyReplicated. *

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for model * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on * each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe * modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when * TrainingInputMode is set to File), this copies 1/n of the number of objects. *

*/ private String s3DataDistributionType; /** *

* A list of one or more attribute names to use that are found in a specified augmented manifest file. *

*/ private java.util.List attributeNames; /** *

* A list of names of instance groups that get data from the S3 data source. *

*/ private java.util.List instanceGroupNames; /** *

* If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all objects * that match the specified key name prefix for model training. *

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file * in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. *

* * @param s3DataType * If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all * objects that match the specified key name prefix for model training.

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest * file in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. * @see S3DataType */ public void setS3DataType(String s3DataType) { this.s3DataType = s3DataType; } /** *

* If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all objects * that match the specified key name prefix for model training. *

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file * in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. *

* * @return If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all * objects that match the specified key name prefix for model training.

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented * manifest file in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. * @see S3DataType */ public String getS3DataType() { return this.s3DataType; } /** *

* If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all objects * that match the specified key name prefix for model training. *

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file * in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. *

* * @param s3DataType * If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all * objects that match the specified key name prefix for model training.

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest * file in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. * @return Returns a reference to this object so that method calls can be chained together. * @see S3DataType */ public S3DataSource withS3DataType(String s3DataType) { setS3DataType(s3DataType); return this; } /** *

* If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all objects * that match the specified key name prefix for model training. *

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest file * in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. *

* * @param s3DataType * If you choose S3Prefix, S3Uri identifies a key name prefix. SageMaker uses all * objects that match the specified key name prefix for model training.

*

* If you choose ManifestFile, S3Uri identifies an object that is a manifest file * containing a list of object keys that you want SageMaker to use for model training. *

*

* If you choose AugmentedManifestFile, S3Uri identifies an object that is an augmented manifest * file in JSON lines format. This file contains the data you want to use for model training. * AugmentedManifestFile can only be used if the Channel's input mode is Pipe. * @return Returns a reference to this object so that method calls can be chained together. * @see S3DataType */ public S3DataSource withS3DataType(S3DataType s3DataType) { this.s3DataType = s3DataType.toString(); return this; } /** *

* Depending on the value specified for the S3DataType, identifies either a key name prefix or a * manifest. For example: *

* *

* Your input bucket must be located in same Amazon Web Services region as your training job. *

* * @param s3Uri * Depending on the value specified for the S3DataType, identifies either a key name prefix or a * manifest. For example:

* *

* Your input bucket must be located in same Amazon Web Services region as your training job. */ public void setS3Uri(String s3Uri) { this.s3Uri = s3Uri; } /** *

* Depending on the value specified for the S3DataType, identifies either a key name prefix or a * manifest. For example: *

* *

* Your input bucket must be located in same Amazon Web Services region as your training job. *

* * @return Depending on the value specified for the S3DataType, identifies either a key name prefix or * a manifest. For example:

* *

* Your input bucket must be located in same Amazon Web Services region as your training job. */ public String getS3Uri() { return this.s3Uri; } /** *

* Depending on the value specified for the S3DataType, identifies either a key name prefix or a * manifest. For example: *

* *

* Your input bucket must be located in same Amazon Web Services region as your training job. *

* * @param s3Uri * Depending on the value specified for the S3DataType, identifies either a key name prefix or a * manifest. For example:

* *

* Your input bucket must be located in same Amazon Web Services region as your training job. * @return Returns a reference to this object so that method calls can be chained together. */ public S3DataSource withS3Uri(String s3Uri) { setS3Uri(s3Uri); return this; } /** *

* If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for model * training, specify FullyReplicated. *

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for model * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on * each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe * modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when * TrainingInputMode is set to File), this copies 1/n of the number of objects. *

* * @param s3DataDistributionType * If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for * model training, specify FullyReplicated.

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for model * training, specify ShardedByS3Key. If there are n ML compute instances launched for a * training job, each instance gets approximately 1/n of the number of S3 objects. In this case, model * training on each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't * get any data and you will pay for nodes that aren't getting any training data. This applies in both File * and Pipe modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume * (when TrainingInputMode is set to File), this copies 1/n of the number of * objects. * @see S3DataDistribution */ public void setS3DataDistributionType(String s3DataDistributionType) { this.s3DataDistributionType = s3DataDistributionType; } /** *

* If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for model * training, specify FullyReplicated. *

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for model * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on * each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe * modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when * TrainingInputMode is set to File), this copies 1/n of the number of objects. *

* * @return If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for * model training, specify FullyReplicated.

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for * model training, specify ShardedByS3Key. If there are n ML compute instances launched * for a training job, each instance gets approximately 1/n of the number of S3 objects. In this * case, model training on each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes * won't get any data and you will pay for nodes that aren't getting any training data. This applies in both * File and Pipe modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume * (when TrainingInputMode is set to File), this copies 1/n of the number * of objects. * @see S3DataDistribution */ public String getS3DataDistributionType() { return this.s3DataDistributionType; } /** *

* If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for model * training, specify FullyReplicated. *

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for model * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on * each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe * modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when * TrainingInputMode is set to File), this copies 1/n of the number of objects. *

* * @param s3DataDistributionType * If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for * model training, specify FullyReplicated.

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for model * training, specify ShardedByS3Key. If there are n ML compute instances launched for a * training job, each instance gets approximately 1/n of the number of S3 objects. In this case, model * training on each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't * get any data and you will pay for nodes that aren't getting any training data. This applies in both File * and Pipe modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume * (when TrainingInputMode is set to File), this copies 1/n of the number of * objects. * @return Returns a reference to this object so that method calls can be chained together. * @see S3DataDistribution */ public S3DataSource withS3DataDistributionType(String s3DataDistributionType) { setS3DataDistributionType(s3DataDistributionType); return this; } /** *

* If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for model * training, specify FullyReplicated. *

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for model * training, specify ShardedByS3Key. If there are n ML compute instances launched for a training * job, each instance gets approximately 1/n of the number of S3 objects. In this case, model training on * each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't get * any data and you will pay for nodes that aren't getting any training data. This applies in both File and Pipe * modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume (when * TrainingInputMode is set to File), this copies 1/n of the number of objects. *

* * @param s3DataDistributionType * If you want SageMaker to replicate the entire dataset on each ML compute instance that is launched for * model training, specify FullyReplicated.

*

* If you want SageMaker to replicate a subset of data on each ML compute instance that is launched for model * training, specify ShardedByS3Key. If there are n ML compute instances launched for a * training job, each instance gets approximately 1/n of the number of S3 objects. In this case, model * training on each machine uses only the subset of training data. *

*

* Don't choose more ML compute instances for training than available S3 objects. If you do, some nodes won't * get any data and you will pay for nodes that aren't getting any training data. This applies in both File * and Pipe modes. Keep this in mind when developing algorithms. *

*

* In distributed training, where you use multiple ML compute EC2 instances, you might choose * ShardedByS3Key. If the algorithm requires copying training data to the ML storage volume * (when TrainingInputMode is set to File), this copies 1/n of the number of * objects. * @return Returns a reference to this object so that method calls can be chained together. * @see S3DataDistribution */ public S3DataSource withS3DataDistributionType(S3DataDistribution s3DataDistributionType) { this.s3DataDistributionType = s3DataDistributionType.toString(); return this; } /** *

* A list of one or more attribute names to use that are found in a specified augmented manifest file. *

* * @return A list of one or more attribute names to use that are found in a specified augmented manifest file. */ public java.util.List getAttributeNames() { return attributeNames; } /** *

* A list of one or more attribute names to use that are found in a specified augmented manifest file. *

* * @param attributeNames * A list of one or more attribute names to use that are found in a specified augmented manifest file. */ public void setAttributeNames(java.util.Collection attributeNames) { if (attributeNames == null) { this.attributeNames = null; return; } this.attributeNames = new java.util.ArrayList(attributeNames); } /** *

* A list of one or more attribute names to use that are found in a specified augmented manifest file. *

*

* NOTE: This method appends the values to the existing list (if any). Use * {@link #setAttributeNames(java.util.Collection)} or {@link #withAttributeNames(java.util.Collection)} if you want * to override the existing values. *

* * @param attributeNames * A list of one or more attribute names to use that are found in a specified augmented manifest file. * @return Returns a reference to this object so that method calls can be chained together. */ public S3DataSource withAttributeNames(String... attributeNames) { if (this.attributeNames == null) { setAttributeNames(new java.util.ArrayList(attributeNames.length)); } for (String ele : attributeNames) { this.attributeNames.add(ele); } return this; } /** *

* A list of one or more attribute names to use that are found in a specified augmented manifest file. *

* * @param attributeNames * A list of one or more attribute names to use that are found in a specified augmented manifest file. * @return Returns a reference to this object so that method calls can be chained together. */ public S3DataSource withAttributeNames(java.util.Collection attributeNames) { setAttributeNames(attributeNames); return this; } /** *

* A list of names of instance groups that get data from the S3 data source. *

* * @return A list of names of instance groups that get data from the S3 data source. */ public java.util.List getInstanceGroupNames() { return instanceGroupNames; } /** *

* A list of names of instance groups that get data from the S3 data source. *

* * @param instanceGroupNames * A list of names of instance groups that get data from the S3 data source. */ public void setInstanceGroupNames(java.util.Collection instanceGroupNames) { if (instanceGroupNames == null) { this.instanceGroupNames = null; return; } this.instanceGroupNames = new java.util.ArrayList(instanceGroupNames); } /** *

* A list of names of instance groups that get data from the S3 data source. *

*

* NOTE: This method appends the values to the existing list (if any). Use * {@link #setInstanceGroupNames(java.util.Collection)} or {@link #withInstanceGroupNames(java.util.Collection)} if * you want to override the existing values. *

* * @param instanceGroupNames * A list of names of instance groups that get data from the S3 data source. * @return Returns a reference to this object so that method calls can be chained together. */ public S3DataSource withInstanceGroupNames(String... instanceGroupNames) { if (this.instanceGroupNames == null) { setInstanceGroupNames(new java.util.ArrayList(instanceGroupNames.length)); } for (String ele : instanceGroupNames) { this.instanceGroupNames.add(ele); } return this; } /** *

* A list of names of instance groups that get data from the S3 data source. *

* * @param instanceGroupNames * A list of names of instance groups that get data from the S3 data source. * @return Returns a reference to this object so that method calls can be chained together. */ public S3DataSource withInstanceGroupNames(java.util.Collection instanceGroupNames) { setInstanceGroupNames(instanceGroupNames); return this; } /** * Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be * redacted from this string using a placeholder value. * * @return A string representation of this object. * * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("{"); if (getS3DataType() != null) sb.append("S3DataType: ").append(getS3DataType()).append(","); if (getS3Uri() != null) sb.append("S3Uri: ").append(getS3Uri()).append(","); if (getS3DataDistributionType() != null) sb.append("S3DataDistributionType: ").append(getS3DataDistributionType()).append(","); if (getAttributeNames() != null) sb.append("AttributeNames: ").append(getAttributeNames()).append(","); if (getInstanceGroupNames() != null) sb.append("InstanceGroupNames: ").append(getInstanceGroupNames()); sb.append("}"); return sb.toString(); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (obj instanceof S3DataSource == false) return false; S3DataSource other = (S3DataSource) obj; if (other.getS3DataType() == null ^ this.getS3DataType() == null) return false; if (other.getS3DataType() != null && other.getS3DataType().equals(this.getS3DataType()) == false) return false; if (other.getS3Uri() == null ^ this.getS3Uri() == null) return false; if (other.getS3Uri() != null && other.getS3Uri().equals(this.getS3Uri()) == false) return false; if (other.getS3DataDistributionType() == null ^ this.getS3DataDistributionType() == null) return false; if (other.getS3DataDistributionType() != null && other.getS3DataDistributionType().equals(this.getS3DataDistributionType()) == false) return false; if (other.getAttributeNames() == null ^ this.getAttributeNames() == null) return false; if (other.getAttributeNames() != null && other.getAttributeNames().equals(this.getAttributeNames()) == false) return false; if (other.getInstanceGroupNames() == null ^ this.getInstanceGroupNames() == null) return false; if (other.getInstanceGroupNames() != null && other.getInstanceGroupNames().equals(this.getInstanceGroupNames()) == false) return false; return true; } @Override public int hashCode() { final int prime = 31; int hashCode = 1; hashCode = prime * hashCode + ((getS3DataType() == null) ? 0 : getS3DataType().hashCode()); hashCode = prime * hashCode + ((getS3Uri() == null) ? 0 : getS3Uri().hashCode()); hashCode = prime * hashCode + ((getS3DataDistributionType() == null) ? 0 : getS3DataDistributionType().hashCode()); hashCode = prime * hashCode + ((getAttributeNames() == null) ? 0 : getAttributeNames().hashCode()); hashCode = prime * hashCode + ((getInstanceGroupNames() == null) ? 0 : getInstanceGroupNames().hashCode()); return hashCode; } @Override public S3DataSource clone() { try { return (S3DataSource) super.clone(); } catch (CloneNotSupportedException e) { throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e); } } @com.amazonaws.annotation.SdkInternalApi @Override public void marshall(ProtocolMarshaller protocolMarshaller) { com.amazonaws.services.sagemaker.model.transform.S3DataSourceMarshaller.getInstance().marshall(this, protocolMarshaller); } }