--- apiVersion: kubeflow.org/v1beta1 kind: TFJob metadata: labels: ksonnet.io/component: tf-cnn-training name: tf-cnn-training namespace: kubeflow spec: tfReplicaSpecs: Ps: template: spec: containers: - args: - python - tf_cnn_benchmarks.py - --batch_size=32 - --model=resnet50 - --variable_update=parameter_server - --flush_stdout=true - --num_gpus=1 - --local_parameter_device=cpu - --device=cpu - --data_format=NHWC image: gcr.io/kubeflow/tf-benchmarks-gpu:v20171202-bdab599-dirty-284af3 name: tensorflow workingDir: /opt/tf-benchmarks/scripts/tf_cnn_benchmarks restartPolicy: OnFailure tfReplicaType: PS Worker: replicas: 1 template: spec: containers: - args: - python - tf_cnn_benchmarks.py - --batch_size=32 - --model=resnet50 - --variable_update=parameter_server - --flush_stdout=true - --num_gpus=2 - --local_parameter_device=gpu - --device=gpu - --data_format=NHWC image: gcr.io/kubeflow/tf-benchmarks-gpu:v20171202-bdab599-dirty-284af3 name: tensorflow resources: limits: nvidia.com/gpu: 2 workingDir: /opt/tf-benchmarks/scripts/tf_cnn_benchmarks restartPolicy: OnFailure