# -*- coding: utf-8 -*- """ author SparkByExamples.com """ import pyspark from pyspark.sql import SparkSession spark = SparkSession.builder.appName('SparkByExamples.com') \ .master("local[5]").getOrCreate() df = spark.range(0,20) print(df.rdd.getNumPartitions()) spark.conf.set("spark.sql.shuffle.partitions", "500") rdd = spark.sparkContext.parallelize((0,20)) print("From local[5]"+str(rdd.getNumPartitions())) rdd1 = spark.sparkContext.parallelize((0,25), 6) print("parallelize : "+str(rdd1.getNumPartitions())) """rddFromFile = spark.sparkContext.textFile("src/main/resources/test.txt",10) print("TextFile : "+str(rddFromFile.getNumPartitions())) """ rdd1.saveAsTextFile("c://tmp/partition2") rdd2 = rdd1.repartition(4) print("Repartition size : "+str(rdd2.getNumPartitions())) rdd2.saveAsTextFile("c://tmp/re-partition2") rdd3 = rdd1.coalesce(4) print("Repartition size : "+str(rdd3.getNumPartitions())) rdd3.saveAsTextFile("c:/tmp/coalesce2")