apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: aws-virtual-gpu-device-plugin-daemonset
  namespace: kube-system
spec:
  selector:
    matchLabels:
      name: aws-virtual-gpu-device-plugin
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      # This annotation is deprecated. Kept here for backward compatibility
      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
      annotations:
        scheduler.alpha.kubernetes.io/critical-pod: ""
      labels:
        name: aws-virtual-gpu-device-plugin
    spec:
      hostIPC: true
      nodeSelector:
        k8s.amazonaws.com/accelerator: vgpu
      tolerations:
      # This toleration is deprecated. Kept here for backward compatibility
      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
      - key: CriticalAddonsOnly
        operator: Exists
      - key: k8s.amazonaws.com/vgpu
        operator: Exists
        effect: NoSchedule
      # Mark this pod as a critical add-on; when enabled, the critical add-on
      # scheduler reserves resources for critical add-on pods so that they can
      # be rescheduled after a failure.
      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
      priorityClassName: "system-node-critical"
      # In case machine deploy MPS device plugin and change compute mode to
      initContainers:
      - name: set-compute-mode
        image: nvidia/cuda:11.2.1-runtime
        command: ['nvidia-smi', '-c', 'EXCLUSIVE_PROCESS']
        securityContext:
          capabilities:
            add: ["SYS_ADMIN"]
      containers:
      - image: amazon/aws-virtual-gpu-device-plugin:v0.1.0
        name: aws-virtual-gpu-device-plugin-ctr
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop: ["ALL"]
        volumeMounts:
        - name: device-plugin
          mountPath: /var/lib/kubelet/device-plugins
      - image: nvidia/mps
        name: mps
        volumeMounts:
        - name: nvidia-mps
          mountPath: /tmp/nvidia-mps
        env:
        - name: CUDA_MPS_ACTIVE_THREAD_PERCENTAGE
          value: "10"
      volumes:
      - name: device-plugin
        hostPath:
          path: /var/lib/kubelet/device-plugins
      - name: nvidia-mps
        hostPath:
          path: /tmp/nvidia-mps