/* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ /* * Do not modify this file. This file is generated from the machinelearning-2014-12-12.normal.json service model. */ using System; using System.Collections.Generic; using System.Xml.Serialization; using System.Text; using System.IO; using System.Net; using Amazon.Runtime; using Amazon.Runtime.Internal; namespace Amazon.MachineLearning.Model { ///

/// Describes the data specification of a DataSource. ///

public partial class S3DataSpec { private string _dataLocationS3; private string _dataRearrangement; private string _dataSchema; private string _dataSchemaLocationS3; ///

/// Gets and sets the property DataLocationS3. /// /// The location of the data file(s) used by a DataSource. The URI specifies /// a data file or an Amazon Simple Storage Service (Amazon S3) directory or bucket containing /// data files. /// ///

[AWSProperty(Required=true, Max=2048)] public string DataLocationS3 { get { return this._dataLocationS3; } set { this._dataLocationS3 = value; } } // Check to see if DataLocationS3 property is set internal bool IsSetDataLocationS3() { return this._dataLocationS3 != null; } ///

/// Gets and sets the property DataRearrangement. /// /// A JSON string that represents the splitting and rearrangement processing to be applied /// to a DataSource. If the DataRearrangement parameter is not /// provided, all of the input data is used to create the Datasource. /// /// /// /// There are multiple parameters that control what data is used to create a datasource: /// ///

/// /// percentBegin /// /// /// /// Use percentBegin to indicate the beginning of the range of the data used /// to create the Datasource. If you do not include percentBegin and percentEnd, /// Amazon ML includes all of the data when creating the datasource. /// ///
/// /// percentEnd /// /// /// /// Use percentEnd to indicate the end of the range of the data used to create /// the Datasource. If you do not include percentBegin and percentEnd, /// Amazon ML includes all of the data when creating the datasource. /// ///
/// /// complement /// /// /// /// The complement parameter instructs Amazon ML to use the data that is /// not included in the range of percentBegin to percentEnd /// to create a datasource. The complement parameter is useful if you need /// to create complementary datasources for training and evaluation. To create a complementary /// datasource, use the same values for percentBegin and percentEnd, /// along with the complement parameter. /// /// /// /// For example, the following two datasources do not share any data, and can be used /// to train and evaluate a model. The first datasource has 25 percent of the data, and /// the second one has 75 percent of the data. /// /// /// /// Datasource for evaluation: {"splitting":{"percentBegin":0, "percentEnd":25}} /// /// /// /// /// Datasource for training: {"splitting":{"percentBegin":0, "percentEnd":25, "complement":"true"}} /// /// ///
/// /// strategy /// /// /// /// To change how Amazon ML splits the data for a datasource, use the strategy /// parameter. /// /// /// /// The default value for the strategy parameter is sequential, /// meaning that Amazon ML takes all of the data records between the percentBegin /// and percentEnd parameters for the datasource, in the order that the records /// appear in the input data. /// /// /// /// The following two DataRearrangement lines are examples of sequentially /// ordered training and evaluation datasources: /// /// /// /// Datasource for evaluation: {"splitting":{"percentBegin":70, "percentEnd":100, /// "strategy":"sequential"}} /// /// /// /// Datasource for training: {"splitting":{"percentBegin":70, "percentEnd":100, /// "strategy":"sequential", "complement":"true"}} /// /// /// /// To randomly split the input data into the proportions indicated by the percentBegin /// and percentEnd parameters, set the strategy parameter to random /// and provide a string that is used as the seed value for the random data splitting /// (for example, you can use the S3 path to your data as the random seed string). If /// you choose the random split strategy, Amazon ML assigns each row of data a pseudo-random /// number between 0 and 100, and then selects the rows that have an assigned number between /// percentBegin and percentEnd. Pseudo-random numbers are assigned /// using both the input seed string value and the byte offset as a seed, so changing /// the data results in a different split. Any existing ordering is preserved. The random /// splitting strategy ensures that variables in the training and evaluation data are /// distributed similarly. It is useful in the cases where the input data may have an /// implicit sort order, which would otherwise result in training and evaluation datasources /// containing non-similar data records. /// /// /// /// The following two DataRearrangement lines are examples of non-sequentially /// ordered training and evaluation datasources: /// /// /// /// Datasource for evaluation: {"splitting":{"percentBegin":70, "percentEnd":100, /// "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv"}} /// /// /// /// Datasource for training: {"splitting":{"percentBegin":70, "percentEnd":100, /// "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}} /// /// ///

///

public string DataRearrangement { get { return this._dataRearrangement; } set { this._dataRearrangement = value; } } // Check to see if DataRearrangement property is set internal bool IsSetDataRearrangement() { return this._dataRearrangement != null; } ///

/// Gets and sets the property DataSchema. /// /// A JSON string that represents the schema for an Amazon S3 DataSource. /// The DataSchema defines the structure of the observation data in the data /// file(s) referenced in the DataSource. /// /// /// /// You must provide either the DataSchema or the DataSchemaLocationS3. /// /// /// /// Define your DataSchema as a series of key-value pairs. attributes /// and excludedVariableNames have an array of key-value pairs for their /// value. Use the following format to define your DataSchema. /// /// /// /// { "version": "1.0", /// /// /// /// "recordAnnotationFieldName": "F1", /// /// /// /// "recordWeightFieldName": "F2", /// /// /// /// "targetFieldName": "F3", /// /// /// /// "dataFormat": "CSV", /// /// /// /// "dataFileContainsHeader": true, /// /// /// /// "attributes": [ /// /// /// /// { "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" /// }, { "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4", "fieldType": /// "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, { "fieldName": "F6", /// "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType": "WEIGHTED_INT_SEQUENCE" }, /// { "fieldName": "F8", "fieldType": "WEIGHTED_STRING_SEQUENCE" } ], /// /// /// /// "excludedVariableNames": [ "F6" ] } /// ///

[AWSProperty(Max=131071)] public string DataSchema { get { return this._dataSchema; } set { this._dataSchema = value; } } // Check to see if DataSchema property is set internal bool IsSetDataSchema() { return this._dataSchema != null; } ///

/// Gets and sets the property DataSchemaLocationS3. /// /// Describes the schema location in Amazon S3. You must provide either the DataSchema /// or the DataSchemaLocationS3. /// ///

[AWSProperty(Max=2048)] public string DataSchemaLocationS3 { get { return this._dataSchemaLocationS3; } set { this._dataSchemaLocationS3 = value; } } // Check to see if DataSchemaLocationS3 property is set internal bool IsSetDataSchemaLocationS3() { return this._dataSchemaLocationS3 != null; } } }