# -*- coding: utf-8 -*- """ Created on Sat Jun 13 21:08:30 2020 @author: NNK """ import pyspark from pyspark.sql import SparkSession spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate() data = [("James","","Smith","36636","M",60000), ("Michael","Rose","","40288","M",70000), ("Robert","","Williams","42114","",400000), ("Maria","Anne","Jones","39192","F",500000), ("Jen","Mary","Brown","","F",0)] columns = ["first_name","middle_name","last_name","dob","gender","salary"] pysparkDF = spark.createDataFrame(data = data, schema = columns) pysparkDF.printSchema() pysparkDF.show(truncate=False) pandasDF = pysparkDF.toPandas() print(pandasDF) # Nested structure elements from pyspark.sql.types import StructType, StructField, StringType,IntegerType dataStruct = [(("James","","Smith"),"36636","M","3000"), \ (("Michael","Rose",""),"40288","M","4000"), \ (("Robert","","Williams"),"42114","M","4000"), \ (("Maria","Anne","Jones"),"39192","F","4000"), \ (("Jen","Mary","Brown"),"","F","-1") \ ] schemaStruct = StructType([ StructField('name', StructType([ StructField('firstname', StringType(), True), StructField('middlename', StringType(), True), StructField('lastname', StringType(), True) ])), StructField('dob', StringType(), True), StructField('gender', StringType(), True), StructField('salary', StringType(), True) ]) df = spark.createDataFrame(data=dataStruct, schema = schemaStruct) df.printSchema() df.show(truncate=False) pandasDF2 = df.toPandas() print(pandasDF2)