import sys from pyspark.sql import SparkSession spark = SparkSession.builder.appName('Amazon reviews word count').getOrCreate() df = spark.read.parquet(sys.argv[1]) df.selectExpr("explode(split(lower(review_body), ' ')) as words").groupBy("words").count().write.mode("overwrite").parquet(sys.argv[2]) exit()