apiVersion: "kubeflow.org/v1"
kind: "PyTorchJob"
metadata:
  name: "distributed-pytorch-job"
spec:
  pytorchReplicaSpecs:
    Master:
      replicas: 1
      restartPolicy: OnFailure
      template:
        metadata:
          annotations:
            sidecar.istio.io/inject: "false"
        spec:
          containers:
            - name: pytorch
              image: gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0
              args: ["--backend", "gloo"]
              # Comment out the below resources to use the CPU.
              #resources:
                #limits:
                  #nvidia.com/gpu: 1
    Worker:
      replicas: 2
      restartPolicy: OnFailure
      template:
        metadata:
          annotations:
            sidecar.istio.io/inject: "false"
        spec:
          containers:
            - name: pytorch
              image: gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0
              args: ["--backend", "gloo"]
              # Comment out the below resources to use the CPU.
              #resources:
                #limits:
                  #nvidia.com/gpu: 1