--- kind: Service apiVersion: v1 metadata: name: ${instance_name} namespace: ${namespace} labels: app: ${instance_name} spec: ports: - name: preds port: ${service_port} targetPort: pod-port type: ClusterIP selector: app: ${instance_name} --- kind: Deployment apiVersion: apps/v1 metadata: name: ${instance_name} namespace: ${namespace} labels: app: ${instance_name} spec: replicas: 1 selector: matchLabels: app: ${instance_name} template: metadata: labels: app: ${instance_name} spec: nodeSelector: node.kubernetes.io/instance-type: "${instance_type}" containers: - name: main image: "${registry}${model_image_name}${model_image_tag}" imagePullPolicy: Always env: - name: NUM_MODELS value: "${num_models}" - name: POSTPROCESS value: "${postprocess}" - name: QUIET value: "${quiet}" ports: - name: pod-port containerPort: 8080 resources: limits: nvidia.com/gpu: 1