apiVersion: sagemaker.aws.amazon.com/v1
kind: TrainingJob
metadata:
  name: spot-xgboost-mnist
spec:
  hyperParameters:
    - name: max_depth
      value: "5"
    - name: eta
      value: "0.2"
    - name: gamma
      value: "4"
    - name: min_child_weight
      value: "6"
    - name: silent
      value: "0"
    - name: objective
      value: multi:softmax
    - name: num_class
      value: "10"
    - name: num_round
      value: "10"
  algorithmSpecification:
    trainingImage: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1
    trainingInputMode: File
  roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole
  region: us-west-2
  outputDataConfig:
    s3OutputPath: s3://my-bucket/xgboost
  resourceConfig:
    instanceCount: 1
    instanceType: ml.m4.xlarge
    volumeSizeInGB: 5
  stoppingCondition:
    maxRuntimeInSeconds: 3599
    maxWaitTimeInSeconds: 3600
  enableManagedSpotTraining: true
  inputDataConfig:
    - channelName: train
      dataSource:
        s3DataSource:
          s3DataType: S3Prefix
          s3Uri: s3://my-bucket/xgboost/train/
          s3DataDistributionType: FullyReplicated
      contentType: text/csv
      compressionType: None
    - channelName: validation
      dataSource:
        s3DataSource:
          s3DataType: S3Prefix
          s3Uri: s3://my-bucket/xgboost/validation/
          s3DataDistributionType: FullyReplicated
      contentType: text/csv
      compressionType: None
  checkpointConfig:
    s3Uri: s3://my-bucket/xgboost/checkpoints/