apiVersion: "sparkoperator.k8s.io/v1beta2" kind: SparkApplication metadata: name: tpcds-benchmark-sql-1g namespace: default spec: type: Scala mode: cluster image: seedjeffwan/spark:v2.4.5-examples sparkVersion: 2.4.5 mainClass: com.amazonaws.eks.tpcds.BenchmarkSQL mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar arguments: # TPC-DS data localtion - "s3a://spark-k8s-data/TPCDS-TEST-1G" # results location - "s3a://spark-k8s-data/TPCDS-TEST-1G-RESULT" # Path to kit in the docker image - "/opt/tpcds-kit/tools" # Data Format - "parquet" # Scale factor (in GB) - "1" # Number of iterations - "1" # Optimize queries - "false" # Filter queries, will run all if empty - "q70-v2.4,q82-v2.4,q64-v2.4" - "" # Logging set to WARN - "true" sparkConf: "spark.speculation": "false" "spark.network.timeout": "2400" # TPCDs Specific "spark.sql.broadcastTimeout": "7200" "spark.sql.crossJoin.enabled": "true" "spark.sql.parquet.mergeSchema": "false" "spark.sql.parquet.filterPushdown": "true" # S3 credential "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.InstanceProfileCredentialsProvider" # S3 Specific config "spark.hadoop.fs.s3a.connection.timeout": "1200000" "spark.hadoop.fs.s3a.path.style.access": "true" "spark.hadoop.fs.s3a.connection.maximum": "200" "spark.hadoop.fs.s3a.fast.upload": "true" "spark.hadoop.fs.s3a.readahead.range": "256K" "spark.hadoop.fs.s3a.input.fadvise": "random" # S3A Committer "spark.hadoop.mapreduce.outputcommitter.factory.scheme.s3a": "org.apache.hadoop.fs.s3a.commit.S3ACommitterFactory" # "spark.sql.sources.commitProtocolClass": "org.apache.spark.internal.io.cloud.PathOutputCommitProtocol" # "spark.sql.parquet.output.committer.class": "org.apache.hadoop.mapreduce.lib.output.BindingPathOutputCommitter" "spark.hadoop.fs.s3a.committer.name": "directory" "spark.hadoop.fs.s3a.committer.staging.conflict-mode": "append" driver: cores: 2 coreLimit: "2048m" memory: "8000m" serviceAccount: spark executor: instances: 3 cores: 1 memory: "8000m" memoryOverhead: "2g" restartPolicy: type: Never