/******************************************************************************* * Copyright 2012-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"). You may not use * this file except in compliance with the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. * This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. * ***************************************************************************** * * AWS Tools for Windows (TM) PowerShell (TM) * */ using System; using System.Collections.Generic; using System.Linq; using System.Management.Automation; using System.Text; using Amazon.PowerShell.Common; using Amazon.Runtime; using Amazon.MachineLearning; using Amazon.MachineLearning.Model; namespace Amazon.PowerShell.Cmdlets.ML { /// /// Creates a DataSource object. A DataSource references data /// that can be used to perform CreateMLModel, CreateEvaluation, /// or CreateBatchPrediction operations. /// /// /// CreateDataSourceFromS3 is an asynchronous operation. In response to /// CreateDataSourceFromS3, Amazon Machine Learning (Amazon ML) immediately /// returns and sets the DataSource status to PENDING. After /// the DataSource has been created and is ready for use, Amazon ML sets /// the Status parameter to COMPLETED. DataSource /// in the COMPLETED or PENDING state can be used to perform /// only CreateMLModel, CreateEvaluation or CreateBatchPrediction /// operations. /// /// If Amazon ML can't accept the input source, it sets the Status parameter /// to FAILED and includes an error message in the Message attribute /// of the GetDataSource operation response. /// /// The observation data used in a DataSource should be ready to use; that /// is, it should have a consistent structure, and missing data values should be kept /// to a minimum. The observation data must reside in one or more .csv files in an Amazon /// Simple Storage Service (Amazon S3) location, along with a schema that describes the /// data items by name and type. The same schema must be used for all of the data files /// referenced by the DataSource. /// /// After the DataSource has been created, it's ready to use in evaluations /// and batch predictions. If you plan to use the DataSource to train an /// MLModel, the DataSource also needs a recipe. A recipe describes /// how each input variable will be used in training an MLModel. Will the /// variable be included or excluded from training? Will the variable be manipulated; /// for example, will it be combined with another variable or will it be split apart into /// word combinations? The recipe provides answers to these questions. /// /// [Cmdlet("New", "MLDataSourceFromS3", SupportsShouldProcess = true, ConfirmImpact = ConfirmImpact.Medium)] [OutputType("System.String")] [AWSCmdlet("Calls the Amazon Machine Learning CreateDataSourceFromS3 API operation.", Operation = new[] {"CreateDataSourceFromS3"}, SelectReturnType = typeof(Amazon.MachineLearning.Model.CreateDataSourceFromS3Response))] [AWSCmdletOutput("System.String or Amazon.MachineLearning.Model.CreateDataSourceFromS3Response", "This cmdlet returns a System.String object.", "The service call response (type Amazon.MachineLearning.Model.CreateDataSourceFromS3Response) can also be referenced from properties attached to the cmdlet entry in the $AWSHistory stack." )] public partial class NewMLDataSourceFromS3Cmdlet : AmazonMachineLearningClientCmdlet, IExecutor { #region Parameter ComputeStatistic /// /// /// The compute statistics for a DataSource. The statistics are generated /// from the observation data referenced by a DataSource. Amazon ML uses /// the statistics internally during MLModel training. This parameter must /// be set to true if the DataSource needs to be used for MLModel /// training. /// /// [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] [Alias("ComputeStatistics")] public System.Boolean? ComputeStatistic { get; set; } #endregion #region Parameter DataSpec_DataLocationS3 /// /// /// The location of the data file(s) used by a DataSource. The URI specifies /// a data file or an Amazon Simple Storage Service (Amazon S3) directory or bucket containing /// data files. /// /// #if !MODULAR [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] #else [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true)] [System.Management.Automation.AllowEmptyString] [System.Management.Automation.AllowNull] #endif [Amazon.PowerShell.Common.AWSRequiredParameter] public System.String DataSpec_DataLocationS3 { get; set; } #endregion #region Parameter DataSpec_DataRearrangement /// /// /// A JSON string that represents the splitting and rearrangement processing to be applied /// to a DataSource. If the DataRearrangement parameter is not /// provided, all of the input data is used to create the Datasource.There are multiple parameters that control what data is used to create a datasource:
  • percentBeginUse percentBegin to indicate the beginning of the range of the data used /// to create the Datasource. If you do not include percentBegin and percentEnd, /// Amazon ML includes all of the data when creating the datasource.
  • percentEndUse percentEnd to indicate the end of the range of the data used to create /// the Datasource. If you do not include percentBegin and percentEnd, /// Amazon ML includes all of the data when creating the datasource.
  • complementThe complement parameter instructs Amazon ML to use the data that is /// not included in the range of percentBegin to percentEnd /// to create a datasource. The complement parameter is useful if you need /// to create complementary datasources for training and evaluation. To create a complementary /// datasource, use the same values for percentBegin and percentEnd, /// along with the complement parameter.For example, the following two datasources do not share any data, and can be used /// to train and evaluate a model. The first datasource has 25 percent of the data, and /// the second one has 75 percent of the data.Datasource for evaluation: {"splitting":{"percentBegin":0, "percentEnd":25}}Datasource for training: {"splitting":{"percentBegin":0, "percentEnd":25, "complement":"true"}}
  • strategyTo change how Amazon ML splits the data for a datasource, use the strategy /// parameter.The default value for the strategy parameter is sequential, /// meaning that Amazon ML takes all of the data records between the percentBegin /// and percentEnd parameters for the datasource, in the order that the records /// appear in the input data.The following two DataRearrangement lines are examples of sequentially /// ordered training and evaluation datasources:Datasource for evaluation: {"splitting":{"percentBegin":70, "percentEnd":100, /// "strategy":"sequential"}}Datasource for training: {"splitting":{"percentBegin":70, "percentEnd":100, /// "strategy":"sequential", "complement":"true"}}To randomly split the input data into the proportions indicated by the percentBegin /// and percentEnd parameters, set the strategy parameter to random /// and provide a string that is used as the seed value for the random data splitting /// (for example, you can use the S3 path to your data as the random seed string). If /// you choose the random split strategy, Amazon ML assigns each row of data a pseudo-random /// number between 0 and 100, and then selects the rows that have an assigned number between /// percentBegin and percentEnd. Pseudo-random numbers are assigned /// using both the input seed string value and the byte offset as a seed, so changing /// the data results in a different split. Any existing ordering is preserved. The random /// splitting strategy ensures that variables in the training and evaluation data are /// distributed similarly. It is useful in the cases where the input data may have an /// implicit sort order, which would otherwise result in training and evaluation datasources /// containing non-similar data records.The following two DataRearrangement lines are examples of non-sequentially /// ordered training and evaluation datasources:Datasource for evaluation: {"splitting":{"percentBegin":70, "percentEnd":100, /// "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv"}}Datasource for training: {"splitting":{"percentBegin":70, "percentEnd":100, /// "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}}
///
///
[System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] public System.String DataSpec_DataRearrangement { get; set; } #endregion #region Parameter DataSpec_DataSchema /// /// /// A JSON string that represents the schema for an Amazon S3 DataSource. /// The DataSchema defines the structure of the observation data in the data /// file(s) referenced in the DataSource.You must provide either the DataSchema or the DataSchemaLocationS3.Define your DataSchema as a series of key-value pairs. attributes /// and excludedVariableNames have an array of key-value pairs for their /// value. Use the following format to define your DataSchema.{ "version": "1.0","recordAnnotationFieldName": "F1","recordWeightFieldName": "F2","targetFieldName": "F3","dataFormat": "CSV","dataFileContainsHeader": true,"attributes": [{ "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" /// }, { "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4", "fieldType": /// "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, { "fieldName": "F6", /// "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType": "WEIGHTED_INT_SEQUENCE" }, /// { "fieldName": "F8", "fieldType": "WEIGHTED_STRING_SEQUENCE" } ],"excludedVariableNames": [ "F6" ] } /// /// [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] public System.String DataSpec_DataSchema { get; set; } #endregion #region Parameter DataSpec_DataSchemaLocationS3 /// /// /// Describes the schema location in Amazon S3. You must provide either the DataSchema /// or the DataSchemaLocationS3. /// /// [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] public System.String DataSpec_DataSchemaLocationS3 { get; set; } #endregion #region Parameter DataSourceId /// /// /// A user-supplied identifier that uniquely identifies the DataSource. /// /// #if !MODULAR [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] #else [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true, Mandatory = true)] [System.Management.Automation.AllowEmptyString] [System.Management.Automation.AllowNull] #endif [Amazon.PowerShell.Common.AWSRequiredParameter] public System.String DataSourceId { get; set; } #endregion #region Parameter DataSourceName /// /// /// A user-supplied name or description of the DataSource. /// /// [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] public System.String DataSourceName { get; set; } #endregion #region Parameter Select /// /// Use the -Select parameter to control the cmdlet output. The default value is 'DataSourceId'. /// Specifying -Select '*' will result in the cmdlet returning the whole service response (Amazon.MachineLearning.Model.CreateDataSourceFromS3Response). /// Specifying the name of a property of type Amazon.MachineLearning.Model.CreateDataSourceFromS3Response will result in that property being returned. /// Specifying -Select '^ParameterName' will result in the cmdlet returning the selected cmdlet parameter value. /// [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] public string Select { get; set; } = "DataSourceId"; #endregion #region Parameter Force /// /// This parameter overrides confirmation prompts to force /// the cmdlet to continue its operation. This parameter should always /// be used with caution. /// [System.Management.Automation.Parameter(ValueFromPipelineByPropertyName = true)] public SwitchParameter Force { get; set; } #endregion protected override void ProcessRecord() { this._AWSSignerType = "v4"; base.ProcessRecord(); var resourceIdentifiersText = FormatParameterValuesForConfirmationMsg(nameof(this.DataSourceName), MyInvocation.BoundParameters); if (!ConfirmShouldProceed(this.Force.IsPresent, resourceIdentifiersText, "New-MLDataSourceFromS3 (CreateDataSourceFromS3)")) { return; } var context = new CmdletContext(); // allow for manipulation of parameters prior to loading into context PreExecutionContextLoad(context); if (ParameterWasBound(nameof(this.Select))) { context.Select = CreateSelectDelegate(Select) ?? throw new System.ArgumentException("Invalid value for -Select parameter.", nameof(this.Select)); } context.ComputeStatistic = this.ComputeStatistic; context.DataSourceId = this.DataSourceId; #if MODULAR if (this.DataSourceId == null && ParameterWasBound(nameof(this.DataSourceId))) { WriteWarning("You are passing $null as a value for parameter DataSourceId which is marked as required. In case you believe this parameter was incorrectly marked as required, report this by opening an issue at https://github.com/aws/aws-tools-for-powershell/issues."); } #endif context.DataSourceName = this.DataSourceName; context.DataSpec_DataLocationS3 = this.DataSpec_DataLocationS3; #if MODULAR if (this.DataSpec_DataLocationS3 == null && ParameterWasBound(nameof(this.DataSpec_DataLocationS3))) { WriteWarning("You are passing $null as a value for parameter DataSpec_DataLocationS3 which is marked as required. In case you believe this parameter was incorrectly marked as required, report this by opening an issue at https://github.com/aws/aws-tools-for-powershell/issues."); } #endif context.DataSpec_DataRearrangement = this.DataSpec_DataRearrangement; context.DataSpec_DataSchema = this.DataSpec_DataSchema; context.DataSpec_DataSchemaLocationS3 = this.DataSpec_DataSchemaLocationS3; // allow further manipulation of loaded context prior to processing PostExecutionContextLoad(context); var output = Execute(context) as CmdletOutput; ProcessOutput(output); } #region IExecutor Members public object Execute(ExecutorContext context) { var cmdletContext = context as CmdletContext; // create request var request = new Amazon.MachineLearning.Model.CreateDataSourceFromS3Request(); if (cmdletContext.ComputeStatistic != null) { request.ComputeStatistics = cmdletContext.ComputeStatistic.Value; } if (cmdletContext.DataSourceId != null) { request.DataSourceId = cmdletContext.DataSourceId; } if (cmdletContext.DataSourceName != null) { request.DataSourceName = cmdletContext.DataSourceName; } // populate DataSpec var requestDataSpecIsNull = true; request.DataSpec = new Amazon.MachineLearning.Model.S3DataSpec(); System.String requestDataSpec_dataSpec_DataLocationS3 = null; if (cmdletContext.DataSpec_DataLocationS3 != null) { requestDataSpec_dataSpec_DataLocationS3 = cmdletContext.DataSpec_DataLocationS3; } if (requestDataSpec_dataSpec_DataLocationS3 != null) { request.DataSpec.DataLocationS3 = requestDataSpec_dataSpec_DataLocationS3; requestDataSpecIsNull = false; } System.String requestDataSpec_dataSpec_DataRearrangement = null; if (cmdletContext.DataSpec_DataRearrangement != null) { requestDataSpec_dataSpec_DataRearrangement = cmdletContext.DataSpec_DataRearrangement; } if (requestDataSpec_dataSpec_DataRearrangement != null) { request.DataSpec.DataRearrangement = requestDataSpec_dataSpec_DataRearrangement; requestDataSpecIsNull = false; } System.String requestDataSpec_dataSpec_DataSchema = null; if (cmdletContext.DataSpec_DataSchema != null) { requestDataSpec_dataSpec_DataSchema = cmdletContext.DataSpec_DataSchema; } if (requestDataSpec_dataSpec_DataSchema != null) { request.DataSpec.DataSchema = requestDataSpec_dataSpec_DataSchema; requestDataSpecIsNull = false; } System.String requestDataSpec_dataSpec_DataSchemaLocationS3 = null; if (cmdletContext.DataSpec_DataSchemaLocationS3 != null) { requestDataSpec_dataSpec_DataSchemaLocationS3 = cmdletContext.DataSpec_DataSchemaLocationS3; } if (requestDataSpec_dataSpec_DataSchemaLocationS3 != null) { request.DataSpec.DataSchemaLocationS3 = requestDataSpec_dataSpec_DataSchemaLocationS3; requestDataSpecIsNull = false; } // determine if request.DataSpec should be set to null if (requestDataSpecIsNull) { request.DataSpec = null; } CmdletOutput output; // issue call var client = Client ?? CreateClient(_CurrentCredentials, _RegionEndpoint); try { var response = CallAWSServiceOperation(client, request); object pipelineOutput = null; pipelineOutput = cmdletContext.Select(response, this); output = new CmdletOutput { PipelineOutput = pipelineOutput, ServiceResponse = response }; } catch (Exception e) { output = new CmdletOutput { ErrorResponse = e }; } return output; } public ExecutorContext CreateContext() { return new CmdletContext(); } #endregion #region AWS Service Operation Call private Amazon.MachineLearning.Model.CreateDataSourceFromS3Response CallAWSServiceOperation(IAmazonMachineLearning client, Amazon.MachineLearning.Model.CreateDataSourceFromS3Request request) { Utils.Common.WriteVerboseEndpointMessage(this, client.Config, "Amazon Machine Learning", "CreateDataSourceFromS3"); try { #if DESKTOP return client.CreateDataSourceFromS3(request); #elif CORECLR return client.CreateDataSourceFromS3Async(request).GetAwaiter().GetResult(); #else #error "Unknown build edition" #endif } catch (AmazonServiceException exc) { var webException = exc.InnerException as System.Net.WebException; if (webException != null) { throw new Exception(Utils.Common.FormatNameResolutionFailureMessage(client.Config, webException.Message), webException); } throw; } } #endregion internal partial class CmdletContext : ExecutorContext { public System.Boolean? ComputeStatistic { get; set; } public System.String DataSourceId { get; set; } public System.String DataSourceName { get; set; } public System.String DataSpec_DataLocationS3 { get; set; } public System.String DataSpec_DataRearrangement { get; set; } public System.String DataSpec_DataSchema { get; set; } public System.String DataSpec_DataSchemaLocationS3 { get; set; } public System.Func Select { get; set; } = (response, cmdlet) => response.DataSourceId; } } }