apiVersion: sagemaker.aws.amazon.com/v1 kind: HyperparameterTuningJob metadata: name: spot-xgboost-mnist-hpo spec: region: us-west-2 hyperParameterTuningJobConfig: strategy: Bayesian hyperParameterTuningJobObjective: type: Minimize metricName: validation:error resourceLimits: maxNumberOfTrainingJobs: 10 maxParallelTrainingJobs: 5 parameterRanges: integerParameterRanges: - name: num_round minValue: '10' maxValue: '20' scalingType: Linear continuousParameterRanges: [] categoricalParameterRanges: [] trainingJobDefinition: staticHyperParameters: - name: base_score value: '0.5' - name: booster value: gbtree - name: csv_weights value: '0' - name: dsplit value: row - name: grow_policy value: depthwise - name: lambda_bias value: '0.0' - name: max_bin value: '256' - name: max_leaves value: '0' - name: normalize_type value: tree - name: objective value: reg:linear - name: one_drop value: '0' - name: prob_buffer_row value: '1.0' - name: process_type value: default - name: rate_drop value: '0.0' - name: refresh_leaf value: '1' - name: sample_type value: uniform - name: scale_pos_weight value: '1.0' - name: silent value: '0' - name: sketch_eps value: '0.03' - name: skip_drop value: '0.0' - name: tree_method value: auto - name: tweedie_variance_power value: '1.5' - name: updater value: grow_colmaker,prune algorithmSpecification: trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1 trainingInputMode: File roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole inputDataConfig: - channelName: train dataSource: s3DataSource: s3DataType: S3Prefix s3Uri: s3://my-bucket/xgboost/train/ s3DataDistributionType: FullyReplicated contentType: text/csv compressionType: None recordWrapperType: None inputMode: File - channelName: validation dataSource: s3DataSource: s3DataType: S3Prefix s3Uri: s3://my-bucket/xgboost/validation/ s3DataDistributionType: FullyReplicated contentType: text/csv compressionType: None recordWrapperType: None inputMode: File outputDataConfig: s3OutputPath: s3://my-bucket/xgboost resourceConfig: instanceType: ml.m4.xlarge instanceCount: 1 volumeSizeInGB: 25 stoppingCondition: maxRuntimeInSeconds: 3599 maxWaitTimeInSeconds: 3600 enableManagedSpotTraining: true enableNetworkIsolation: true enableInterContainerTrafficEncryption: false checkpointConfig: s3Uri: s3://my-bucket/xgboost/checkpoints/