{ "metadata": { "version": 1, "disable_limits": false }, "nodes": [ { "node_id": "e405a68e-7b86-438c-9e98-9d2b1963e167", "type": "SOURCE", "operator": "sagemaker.s3_source_0.1", "parameters": { "dataset_definition": { "__typename": "S3CreateDatasetDefinitionOutput", "datasetSourceType": "S3", "name": "signup_attempts.csv", "description": null, "s3ExecutionContext": { "__typename": "S3ExecutionContext", "s3Uri": "s3://${bucket}/${prefix}/data/raw/signup_attempts.csv", "s3ContentType": "csv", "s3HasHeader": true } } }, "inputs": [], "outputs": [ { "name": "default", "sampling": { "sampling_method": "sample_by_limit", "limit_rows": 50000 } } ] }, { "node_id": "91372460-ddbc-4dd7-be64-db0a12c30dad", "type": "TRANSFORM", "operator": "sagemaker.spark.infer_and_cast_type_0.1", "parameters": {}, "trained_parameters": { "schema": { "ip_address": "string", "email_address": "string", "user_agent": "string", "customer_city": "string", "customer_state": "string", "customer_postal": "long", "customer_name": "string", "customer_address": "string", "phone_number": "string", "timestamp": "long" } }, "inputs": [ { "name": "default", "node_id": "e405a68e-7b86-438c-9e98-9d2b1963e167", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "dbe6a4eb-f697-4e8e-b7ef-9faef77e1020", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_pandas_0.1", "parameters": { "code": "# Table is available as variable `df`\nimport pandas as pd\ndf['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms').dt.strftime('%m/%d/%y %H:%M:%S')" }, "inputs": [ { "name": "df", "node_id": "91372460-ddbc-4dd7-be64-db0a12c30dad", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "572880dd-87da-4b63-888d-0f24cc4eb158", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "timestamp", "new_name": "EVENT_TIMESTAMP" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "dbe6a4eb-f697-4e8e-b7ef-9faef77e1020", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "47d9c8e0-9478-416f-9ef8-604b4ca7eada", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "email_address", "fill_value": "synth_missing@email.com" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "572880dd-87da-4b63-888d-0f24cc4eb158", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "7f2cefd2-5538-4f09-9f1e-a1d16fbe8b39", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "user_agent", "fill_value": "Missing" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "47d9c8e0-9478-416f-9ef8-604b4ca7eada", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "5481d4b8-03fc-48c9-8051-bdc5333f2098", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "customer_city", "fill_value": "Missing" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "7f2cefd2-5538-4f09-9f1e-a1d16fbe8b39", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "bab72342-c6c1-48df-a259-751c0a7df470", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "customer_state", "fill_value": "Missing" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "5481d4b8-03fc-48c9-8051-bdc5333f2098", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "a6ddb7dd-2817-4030-b960-b072b1cd68f9", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "customer_postal", "fill_value": "0" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "bab72342-c6c1-48df-a259-751c0a7df470", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "e076b7cc-d5f2-4996-852e-59d601b19a9d", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "customer_name", "fill_value": "Missing" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "a6ddb7dd-2817-4030-b960-b072b1cd68f9", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "02f86fd3-c64b-4dbb-9736-3e9e0ba1c0ce", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "customer_address", "fill_value": "Missing" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "e076b7cc-d5f2-4996-852e-59d601b19a9d", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "13a61851-f189-4b0d-8aeb-64dd40965e60", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "phone_number", "fill_value": "Missing" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "02f86fd3-c64b-4dbb-9736-3e9e0ba1c0ce", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "ec09720f-c544-4a30-b0c1-9e4730889f6b", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "ip_address", "fill_value": "0.0.0.0" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "13a61851-f189-4b0d-8aeb-64dd40965e60", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "b70a84b8-6570-487f-9e3c-b72f68886a28", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_pandas_0.1", "parameters": { "code": "# Table is available as variable `df`\nimport pandas as pd\ndf['EventTime'] = pd.to_datetime('now').timestamp()" }, "inputs": [ { "name": "df", "node_id": "ec09720f-c544-4a30-b0c1-9e4730889f6b", "output_name": "default" } ], "outputs": [ { "name": "default" } ] } ] }