{ "metadata": { "version": 1, "disable_limits": false }, "nodes": [ { "node_id": "8ceb5424-4b13-4ca8-a498-5bc084d15b17", "type": "SOURCE", "operator": "sagemaker.s3_source_0.1", "parameters": { "dataset_definition": { "__typename": "S3CreateDatasetDefinitionOutput", "datasetSourceType": "S3", "name": "signup_outcomes.csv", "description": null, "s3ExecutionContext": { "__typename": "S3ExecutionContext", "s3Uri": "s3://${bucket}/${prefix}/data/raw/signup_outcomes.csv", "s3ContentType": "csv", "s3HasHeader": true } } }, "inputs": [], "outputs": [ { "name": "default", "sampling": { "sampling_method": "sample_by_limit", "limit_rows": 50000 } } ] }, { "node_id": "c679c92d-ae35-440c-9dc2-649164010bd2", "type": "TRANSFORM", "operator": "sagemaker.spark.infer_and_cast_type_0.1", "parameters": {}, "trained_parameters": { "schema": { "ip_address": "string", "email_address": "string", "EVENT_LABEL": "string" } }, "inputs": [ { "name": "default", "node_id": "8ceb5424-4b13-4ca8-a498-5bc084d15b17", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "7c154ee7-e3dd-4fae-a3ab-29c60900de5b", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "ip_address", "fill_value": "0.0.0.0" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "c679c92d-ae35-440c-9dc2-649164010bd2", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "9268ca24-7772-4b79-bf42-e11ec0bb7c90", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": "email_address", "fill_value": "synth_missing@email.com" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "7c154ee7-e3dd-4fae-a3ab-29c60900de5b", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "d8c363ff-9f7c-4b44-a7dc-44004df88c29", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_pandas_0.1", "parameters": { "code": "# Table is available as variable `df`\nimport pandas as pd\ndf['EventTime'] = pd.to_datetime('now').timestamp()" }, "inputs": [ { "name": "df", "node_id": "9268ca24-7772-4b79-bf42-e11ec0bb7c90", "output_name": "default" } ], "outputs": [ { "name": "default" } ] } ] }