# -*- coding: utf-8 -*- """ author SparkByExamples.com """ from pyspark.sql import SparkSession spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate() dept = [("Finance",10), \ ("Marketing",20), \ ("Sales",30), \ ("IT",40) \ ] rdd=spark.sparkContext.parallelize(dept) print(rdd) dataColl=rdd.collect() for row in dataColl: print(row[0] + "," +str(row[1])) """ deptColumns = ["dept_name","dept_id"] deptDF = spark.createDataFrame(data=dept, schema = deptColumns) deptDF.printSchema() deptDF.show(truncate=False) dataCollect = deptDF.collect() print(dataCollect) dataCollect2 = deptDF.select("dept_name").collect() print(dataCollect2) for row in dataCollect: print(row['dept_name'] + "," +str(row['dept_id'])) """