#!/bin/bash
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

# Kubernetes Namespace
NAMESPACE=${NAMESPACE:-kubeflow}

# Google service Account (GSA)
SYSTEM_GSA=${SYSTEM_GSA:-$RESOURCE_PREFIX-kfp-system}
USER_GSA=${USER_GSA:-$RESOURCE_PREFIX-kfp-user}

# Kubernetes Service Account (KSA)
# Note, if deploying manifests/kustomize/env/gcp, you can add the following KSAs
# to the array of SYSTEM_KSA:
# * kubeflow-pipelines-minio-gcs-gateway needs gcs permissions
# * kubeflow-pipelines-cloudsql-proxy needs cloudsql permissions
SYSTEM_KSA=(ml-pipeline-ui ml-pipeline-visualizationserver)
USER_KSA=(pipeline-runner kubeflow-pipelines-container-builder kubeflow-pipelines-viewer)

if [ -n $USE_GCP_MANAGED_STORAGE ]; then
  SYSTEM_KSA+=(kubeflow-pipelines-minio-gcs-gateway)
  SYSTEM_KSA+=(kubeflow-pipelines-cloudsql-proxy)
fi

cat <<EOF

This script sets up Google service accounts, Kubernetes service accounts and workload identity bindings for a Kubeflow Pipelines (KFP) standalone deployment.
You can also choose to manually set these up based on documentation: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity.

Before you begin, please check the following list:
* Please first review introduction to workload identity: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity.
* KFP is already or will be deployed by standalone deployment: https://www.kubeflow.org/docs/pipelines/installation/standalone-deployment/
* gcloud is configured following steps: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#before_you_begin.
* kubectl talks to the cluster KFP is deployed to: https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl.
* The namespace you specified by NAMESPACE env var already exists on the cluster. You can create it by "kubectl create namespace \$NAMESPACE".

The following resources will be created or updated to create workload identity bindings between GSAs and KSAs:
* Google service accounts (GSAs)
* Service account IAM policy bindings on these GSAs
* Kubernetes service accounts with annotations in namespace "$NAMESPACE".

Note, this script is designed to be idempotent. If something went wrong, you can safely fix the error and rerun this script.

EOF

function usage {
cat <<\EOF
Usage:
```
PROJECT_ID=<your-gcp-project-id> RESOURCE_PREFIX=<your-chosen-prefix> NAMESPACE=<your-k8s-namespace> ./gcp-workload-identity-setup.sh
```

PROJECT_ID: GCP project ID your cluster belongs to.
RESOURCE_PREFIX: Your preferred resource prefix for GCP resources this script creates.
NAMESPACE: Optional. Kubernetes namespace your Kubeflow Pipelines standalone deployment belongs to. (Defaults to kubeflow)
USE_GCP_MANAGED_STORAGE: Optional. Defaults to "false", specify "true" if you intend to use GCP managed storage (Google Cloud Storage and Cloud SQL) following instructions in:
https://github.com/kubeflow/pipelines/tree/master/manifests/kustomize/sample
EOF
}
if [ -z "$PROJECT_ID" ]; then
  usage
  echo
  echo "Error: PROJECT_ID env variable is empty!"
  exit 1
fi
if [ -z "$RESOURCE_PREFIX" ]; then
  usage
  echo
  echo "Error: RESOURCE_PREFIX env variable is empty!"
  exit 1
fi
echo "Env variables set:"
echo "* PROJECT_ID=$PROJECT_ID"
echo "* RESOURCE_PREFIX=$RESOURCE_PREFIX"
echo "* NAMESPACE=$NAMESPACE"
echo "* USE_GCP_MANAGED_STORAGE=${USE_GCP_MANAGED_STORAGE:-false}"
echo

SYSTEM_GSA_FULL="$SYSTEM_GSA@$PROJECT_ID.iam.gserviceaccount.com"
USER_GSA_FULL="$USER_GSA@$PROJECT_ID.iam.gserviceaccount.com"

cat <<EOF

The following resources will be created or updated to create workload identity bindings between GSAs and KSAs:
* Google service accounts (GSAs):
  * $SYSTEM_GSA_FULL
  * $USER_GSA_FULL
* Service account IAM policy bindings on these GSAs to grant "Workload Identity User" role.
* Kubernetes service accounts with annotations in namespace "$NAMESPACE".
* $SYSTEM_GSA_FULL will be bound to these KSAs:
  ${SYSTEM_KSA[@]}.
* $USER_GSA_FULL will be bound to these KSAs:
  ${USER_KSA[@]}.

Note: if you prefer more granular workload identity bindings, you can modify this script to suit your needs.

EOF

read -p "Continue? (Y/n) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
  exit 0
fi

echo "Creating Google service accounts..."
function create_gsa_if_not_present {
  local name=${1}
  local already_present=$(gcloud iam service-accounts list --filter='name:'$name'' --format='value(name)')
  if [ -n "$already_present" ]; then
    echo "Service account $name already exists"
  else
    gcloud iam service-accounts create $name
  fi
}
create_gsa_if_not_present $SYSTEM_GSA
create_gsa_if_not_present $USER_GSA

function create_ksa_if_not_present {
  local name=${1}
  if kubectl get serviceaccount $name -n $NAMESPACE >/dev/null; then
    echo "KSA $name already exists"
  else
    kubectl create serviceaccount $name -n $NAMESPACE --save-config
    echo "KSA $name created"
  fi
}

# Bind KSA to GSA through workload identity.
# Documentation: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity
function bind_gsa_and_ksa {
  local gsa=${1}
  local ksa=${2}

  gcloud iam service-accounts add-iam-policy-binding $gsa@$PROJECT_ID.iam.gserviceaccount.com \
    --member="serviceAccount:$PROJECT_ID.svc.id.goog[$NAMESPACE/$ksa]" \
    --role="roles/iam.workloadIdentityUser" \
    > /dev/null # hide verbose output

  create_ksa_if_not_present $ksa
  kubectl annotate serviceaccount \
    --namespace $NAMESPACE \
    --overwrite \
    $ksa \
    iam.gke.io/gcp-service-account=$gsa@$PROJECT_ID.iam.gserviceaccount.com
  echo "* Bound KSA $ksa to GSA $gsa"
}

echo "Binding each kfp system KSA to $SYSTEM_GSA"
for ksa in ${SYSTEM_KSA[@]}; do
  bind_gsa_and_ksa $SYSTEM_GSA $ksa
done

echo "Binding each kfp user KSA to $USER_GSA"
for ksa in ${USER_KSA[@]}; do
  bind_gsa_and_ksa $USER_GSA $ksa
done

echo
echo "All the workload identity bindings have succeeded!"
cat <<EOF


=============
Next steps:
* This script won't add IAM policies to grant these GSAs with permissions KFP needs, you need to do that by yourself.

### If **NOT** using GCP managed storage, you can:
* Give $SYSTEM_GSA_FULL "Storage Object Viewer" role to allow KFP UI load data in GCS in the same project:
gcloud projects add-iam-policy-binding $PROJECT_ID \\
  --member="serviceAccount:$SYSTEM_GSA_FULL" \\
  --role="roles/storage.objectViewer"

* Give $USER_GSA_FULL any permissions your pipelines, container builder and tensorboard need. For **QUICK** tryouts, you can give it Project Editor role for all permissions, but **WARNING** be aware this overgrants too much permission:
gcloud projects add-iam-policy-binding $PROJECT_ID \\
  --member="serviceAccount:$USER_GSA_FULL" \\
  --role="roles/editor"

### If using GCP managed storage, you **ALSO** need to give $SYSTEM_GSA_FULL these roles:
* "Storage Admin" role on specified GCS bucket to allow writing to specified GCS artifact bucket:
gsutil iam ch serviceAccount:$SYSTEM_GSA_FULL:roles/storage.admin gs://[BUCKET_NAME]

Or you can find other ways in https://cloud.google.com/storage/docs/access-control/using-iam-permissions#bucket-add.

* "Cloud SQL Client" role to allow connecting to Cloud SQL instances:
gcloud projects add-iam-policy-binding $PROJECT_ID \\
  --member="serviceAccount:$SYSTEM_GSA_FULL" \\
  --role="roles/cloudsql.client"
EOF