apiVersion: "kubeflow.org/v1alpha3" kind: Experiment metadata: name: tfjob-example spec: parallelTrialCount: 3 maxTrialCount: 12 maxFailedTrialCount: 3 objective: type: maximize goal: 0.99 objectiveMetricName: accuracy_1 algorithm: algorithmName: random metricsCollectorSpec: source: fileSystemPath: path: /train kind: Directory collector: kind: TensorFlowEvent parameters: - name: --learning_rate parameterType: double feasibleSpace: min: "0.01" max: "0.05" - name: --batch_size parameterType: int feasibleSpace: min: "100" max: "200" trialTemplate: goTemplate: rawTemplate: |- apiVersion: "kubeflow.org/v1" kind: TFJob metadata: name: {{.Trial}} namespace: {{.NameSpace}} spec: tfReplicaSpecs: Worker: replicas: 1 restartPolicy: OnFailure template: spec: containers: - name: tensorflow image: gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0 imagePullPolicy: Always command: - "python" - "/var/tf_mnist/mnist_with_summaries.py" - "--log_dir=/train/metrics" {{- with .HyperParameters}} {{- range .}} - "{{.Name}}={{.Value}}" {{- end}} {{- end}}