apiVersion: "kubeflow.org/v1"
kind: PyTorchJob
metadata:
  name: pytorch-mnist
  #namespace: kubeflow
spec:
  pytorchReplicaSpecs:
    Master:
      replicas: 1
      restartPolicy: OnFailure
      template:
        metadata:
          annotations:
            sidecar.istio.io/inject: "false"
        spec:
          containers:
            - name: pytorch
              image: <account_num>.dkr.ecr.us-west-2.amazonaws.com/pytorch-mnist:latest
              imagePullPolicy: Always
              command:
                - "python3"
                - "/opt/pytorch-mnist/mnist-debug.py"
                - "--epochs=5"
    Worker:
      replicas: 5
      restartPolicy: OnFailure
      template:
        metadata:
          annotations:
            sidecar.istio.io/inject: "false"
        spec:
          containers:
            - name: pytorch
              image: <account_num>.dkr.ecr.us-west-2.amazonaws.com/pytorch-mnist:latest
              imagePullPolicy: Always
              command:
                - "python3"
                - "/opt/pytorch-mnist/mnist-debug.py"
                - "--epochs=5"