# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0

from pyspark.sql import SparkSession
from pyspark.sql.functions import *
import argparse


def transform_data(spark, source_bucket, destination_bucket):
    movies = (spark.read.option("header", "true")
              .option("delimiter", ",")
              .option("inferSchema", "true")
              .csv(f"{source_bucket}"))

    movies.write.mode('overwrite').parquet(f"{destination_bucket}")


def parse_arguments():
    spark_job_parser = argparse.ArgumentParser(description='PySpark Job Arguments')
    spark_job_parser.add_argument('--source_bucket', action='store', type=str, required=True)
    spark_job_parser.add_argument('--destination_bucket', action='store', type=str, required=True)
    spark_job_parser.add_argument('--app_name', action='store', type=str, required=True)
    pyspark_args = spark_job_parser.parse_args()
    pyspark_args.app_name
    return pyspark_args


def main():
    pyspark_args = parse_arguments()

    spark = SparkSession.builder.appName(pyspark_args.app_name).getOrCreate()

    transform_data(spark,
                   source_bucket=pyspark_args.source_bucket,
                   destination_bucket=pyspark_args.destination_bucket
                   )


if __name__ == '__main__':
    main()