#!/bin/bash # SPDX-FileCopyrightText: Copyright 2021 Amazon.com, Inc. or its affiliates. # SPDX-License-Identifier: MIT-0 # export EMRCLUSTER_NAME=emr-on-eks-quickstart # export AWS_REGION=us-east-1 export ACCOUNTID=$(aws sts get-caller-identity --query Account --output text) export VIRTUAL_CLUSTER_ID=$(aws emr-containers list-virtual-clusters --query "virtualClusters[?name == '$EMRCLUSTER_NAME' && state == 'RUNNING'].id" --output text) export EMR_ROLE_ARN=arn:aws:iam::$ACCOUNTID:role/$EMRCLUSTER_NAME-execution-role export S3BUCKET=$EMRCLUSTER_NAME-$ACCOUNTID-$AWS_REGION aws emr-containers start-job-run \ --virtual-cluster-id $VIRTUAL_CLUSTER_ID \ --name em68-iceberg \ --execution-role-arn $EMR_ROLE_ARN \ --release-label emr-6.8.0-latest \ --job-driver '{ "sparkSubmitJobDriver": { "entryPoint": "s3://'$S3BUCKET'/blog/iceberg_scd_script.py", "entryPointArguments":["'$S3BUCKET'"], "sparkSubmitParameters": "--jars local:///usr/share/aws/iceberg/lib/iceberg-spark3-runtime.jar --conf spark.executor.memory=2G --conf spark.executor.cores=2"}}' \ --configuration-overrides '{ "applicationConfiguration": [ { "classification": "spark-defaults", "properties": { "spark.sql.extensions": "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", "spark.sql.catalog.glue_catalog": "org.apache.iceberg.spark.SparkCatalog", "spark.sql.catalog.glue_catalog.catalog-impl": "org.apache.iceberg.aws.glue.GlueCatalog", "spark.sql.catalog.glue_catalog.warehouse": "s3://'$S3BUCKET'/iceberg/", "spark.sql.catalog.glue_catalog.io-impl": "org.apache.iceberg.aws.s3.S3FileIO", "spark.sql.catalog.glue_catalog.lock-impl": "org.apache.iceberg.aws.glue.DynamoLockManager", "spark.sql.catalog.glue_catalog.lock.table": "myIcebergLockTable" }} ], "monitoringConfiguration": { "s3MonitoringConfiguration": {"logUri": "s3://'$S3BUCKET'/elasticmapreduce/emr-containers"}}}'