apiVersion: "sparkoperator.k8s.io/v1beta2" kind: SparkApplication metadata: name: repartition-job namespace: default spec: volumes: - name: ivy emptyDir: {} sparkConf: spark.jars.packages: "org.apache.hadoop:hadoop-aws:3.2.0,org.apache.spark:spark-avro_2.12:3.0.1" spark.driver.extraJavaOptions: "-Divy.cache.dir=/tmp -Divy.home=/tmp" spark.kubernetes.allocation.batch.size: "10" hadoopConf: fs.s3a.impl: org.apache.hadoop.fs.s3a.S3AFileSystem fs.s3a.aws.credentials.provider: com.amazonaws.auth.InstanceProfileCredentialsProvider type: Python pythonVersion: "3" mode: cluster image: "3bittechs/spark-py:3.0.1@sha256:dc1225d8429351426f05ba2dca85054368ec35f110601059c20d5587a029860f" imagePullPolicy: IfNotPresent mainApplicationFile: s3a://aws001-preprod-dev-eks-emr-on-eks-logs/aws001-preprod-test-eks-emr-data-team-a/emr-data-team-a/taxidata/scripts/sample-spark-taxi-trip.py arguments: ['s3a://aws001-preprod-dev-eks-emr-on-eks-logs/aws001-preprod-test-eks-emr-data-team-a/emr-data-team-a/taxidata/input/taxi-trip-data/', 's3a://aws001-preprod-dev-eks-emr-on-eks-logs/aws001-preprod-test-eks-emr-data-team-a/emr-data-team-a/taxidata/output/sparkk8soperator/taxi-trip-data/'] sparkVersion: "3.0.1" restartPolicy: type: Never driver: cores: 1 coreLimit: "1200m" memory: "2g" labels: version: 3.0.1 serviceAccount: spark securityContext: fsGroup: 65534 volumeMounts: - name: ivy mountPath: /tmp executor: cores: 2 instances: 2 memory: "2g" labels: version: 3.0.1 volumeMounts: - name: ivy mountPath: /tmp