apiVersion: "kubeflow.org/v1" kind: PyTorchJob metadata: name: pytorch-mnist #namespace: kubeflow spec: pytorchReplicaSpecs: Master: replicas: 1 restartPolicy: OnFailure template: metadata: annotations: sidecar.istio.io/inject: "false" spec: containers: - name: pytorch image: <account_num>.dkr.ecr.us-west-2.amazonaws.com/pytorch-mnist:latest imagePullPolicy: Always command: - "python3" - "/opt/pytorch-mnist/mnist-debug.py" - "--epochs=5" Worker: replicas: 5 restartPolicy: OnFailure template: metadata: annotations: sidecar.istio.io/inject: "false" spec: containers: - name: pytorch image: <account_num>.dkr.ecr.us-west-2.amazonaws.com/pytorch-mnist:latest imagePullPolicy: Always command: - "python3" - "/opt/pytorch-mnist/mnist-debug.py" - "--epochs=5"