{ "metadata": { "version": 1, "disable_limits": false }, "nodes": [ { "node_id": "e586ff4d-cc6d-4132-a208-c82cb0699f7c", "type": "SOURCE", "operator": "sagemaker.s3_source_0.1", "parameters": { "dataset_definition": { "__typename": "S3CreateDatasetDefinitionOutput", "datasetSourceType": "S3", "name": "partb", "description": null, "s3ExecutionContext": { "__typename": "S3ExecutionContext", "s3Uri": "s3://sagemaker-us-east-1-403678423963/fraud-detect-demo/data/raw/medicareB.csv", "s3ContentType": "csv", "s3HasHeader": true, "s3FieldDelimiter": ",", "s3DirIncludesNested": false, "s3AddsFilenameColumn": false } } }, "inputs": [], "outputs": [ { "name": "default", "sampling": { "sampling_method": "sample_by_limit", "limit_rows": 50000 } } ] }, { "node_id": "cc78bd0d-a5fa-4516-9e16-b0de92c34fae", "type": "TRANSFORM", "operator": "sagemaker.spark.infer_and_cast_type_0.1", "parameters": {}, "trained_parameters": { "schema": { "Rndrng_NPI": "long", "Rndrng_Prvdr_Last_Org_Name": "string", "Rndrng_Prvdr_First_Name": "string", "Rndrng_Prvdr_MI": "string", "Rndrng_Prvdr_Crdntls": "string", "Rndrng_Prvdr_Gndr": "string", "Rndrng_Prvdr_Ent_Cd": "string", "Rndrng_Prvdr_St1": "string", "Rndrng_Prvdr_St2": "string", "Rndrng_Prvdr_City": "string", "Rndrng_Prvdr_State_Abrvtn": "string", "Rndrng_Prvdr_State_FIPS": "long", "Rndrng_Prvdr_Zip5": "string", "Rndrng_Prvdr_RUCA": "long", "Rndrng_Prvdr_RUCA_Desc": "string", "Rndrng_Prvdr_Cntry": "string", "Rndrng_Prvdr_Type": "string", "Rndrng_Prvdr_Mdcr_Prtcptg_Ind": "string", "HCPCS_Cd": "long", "HCPCS_Desc": "string", "HCPCS_Drug_Ind": "string", "Place_Of_Srvc": "string", "Tot_Benes": "long", "Tot_Srvcs": "long", "Tot_Bene_Day_Srvcs": "long", "Avg_Sbmtd_Chrg": "float", "Avg_Mdcr_Alowd_Amt": "float", "Avg_Mdcr_Pymt_Amt": "float", "Avg_Mdcr_Stdzd_Amt": "float" } }, "inputs": [ { "name": "default", "node_id": "e586ff4d-cc6d-4132-a208-c82cb0699f7c", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "f0fe8038-e645-47f3-a3f1-762f9ffd89e7", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Rndrng_NPI", "new_name": "NPI" }, "drop_column_parameters": { "column_to_drop": [] } }, "inputs": [ { "name": "df", "node_id": "cc78bd0d-a5fa-4516-9e16-b0de92c34fae", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "43b05f6b-db0c-4a87-820d-ac1bfcd978ac", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Rndrng_Prvdr_Type", "new_name": "provider_type" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "f0fe8038-e645-47f3-a3f1-762f9ffd89e7", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "dd7998bd-15f3-4f95-8c1a-346463b2a317", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "HCPCS_Cd", "new_name": "hcpcs_code" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "43b05f6b-db0c-4a87-820d-ac1bfcd978ac", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "053cc6c2-36c5-47f6-9bc0-ba5f3b6add5b", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Drop missing", "drop_missing_parameters": { "input_column": [ "NPI", "hcpcs_code" ] }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "dd7998bd-15f3-4f95-8c1a-346463b2a317", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "962dd2b6-8193-4346-8719-32e4a28d2b3b", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Drop column", "drop_column_parameters": { "column_to_drop": [ "Rndrng_Prvdr_Last_Org_Name", "Rndrng_Prvdr_First_Name", "Rndrng_Prvdr_MI", "Rndrng_Prvdr_Crdntls", "Rndrng_Prvdr_Ent_Cd", "Rndrng_Prvdr_St1", "Rndrng_Prvdr_St2", "Rndrng_Prvdr_City", "Rndrng_Prvdr_State_Abrvtn", "Rndrng_Prvdr_State_FIPS", "Rndrng_Prvdr_RUCA", "Rndrng_Prvdr_RUCA_Desc", "Rndrng_Prvdr_Cntry", "Rndrng_Prvdr_Mdcr_Prtcptg_Ind", "HCPCS_Desc", "Place_Of_Srvc", "HCPCS_Drug_Ind" ] } }, "inputs": [ { "name": "df", "node_id": "053cc6c2-36c5-47f6-9bc0-ba5f3b6add5b", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "cb4bad5a-960e-4d03-8e7a-ff2a85f2b985", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Rndrng_Prvdr_Gndr", "new_name": "nppes_provider_gender" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "962dd2b6-8193-4346-8719-32e4a28d2b3b", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "50cc14b9-9aa1-4156-b468-c989f300bdc7", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Tot_Srvcs", "new_name": "line_svc_cnt" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "cb4bad5a-960e-4d03-8e7a-ff2a85f2b985", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "5f955852-5b4e-485c-b8a7-fa8b42f354d9", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Tot_Bene_Day_Srvcs", "new_name": "bene_day_srvc_cnt" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "50cc14b9-9aa1-4156-b468-c989f300bdc7", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "4c008b37-066d-4709-b78b-545cdca291a8", "type": "SOURCE", "operator": "sagemaker.s3_source_0.1", "parameters": { "dataset_definition": { "__typename": "S3CreateDatasetDefinitionOutput", "datasetSourceType": "S3", "name": "partd", "description": null, "s3ExecutionContext": { "__typename": "S3ExecutionContext", "s3Uri": "s3://sagemaker-us-east-1-403678423963/fraud-detect-demo/data/raw/medicareD.csv", "s3ContentType": "csv", "s3HasHeader": true, "s3FieldDelimiter": ",", "s3DirIncludesNested": false, "s3AddsFilenameColumn": false } } }, "inputs": [], "outputs": [ { "name": "default", "sampling": { "sampling_method": "sample_by_limit", "limit_rows": 50000 } } ] }, { "node_id": "c2a66799-d19b-4405-a0fd-676faca065ae", "type": "TRANSFORM", "operator": "sagemaker.spark.infer_and_cast_type_0.1", "parameters": {}, "trained_parameters": { "schema": { "Prscrbr_NPI": "long", "Prscrbr_Last_Org_Name": "string", "Prscrbr_First_Name": "string", "Prscrbr_City": "string", "Prscrbr_State_Abrvtn": "string", "Prscrbr_State_FIPS": "long", "Prscrbr_Type": "string", "Prscrbr_Type_Src": "string", "Brnd_Name": "string", "Gnrc_Name": "string", "Tot_Clms": "long", "Tot_30day_Fills": "long", "Tot_Day_Suply": "long", "Tot_Drug_Cst": "float", "Tot_Benes": "string", "GE65_Sprsn_Flag": "string", "GE65_Tot_Clms": "string", "GE65_Tot_30day_Fills": "string", "GE65_Tot_Drug_Cst": "string", "GE65_Tot_Day_Suply": "string", "GE65_Bene_Sprsn_Flag": "string", "GE65_Tot_Benes": "string" } }, "inputs": [ { "name": "default", "node_id": "4c008b37-066d-4709-b78b-545cdca291a8", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "bbe4cb92-330c-48ce-9bbf-7960a821f9b2", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Tot_Benes", "new_name": "bene_unique_cnt" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "5f955852-5b4e-485c-b8a7-fa8b42f354d9", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "f38244a5-c921-4f95-96ac-1a9b120d25dc", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Avg_Sbmtd_Chrg", "new_name": "average_submitted_chrg_amt" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "bbe4cb92-330c-48ce-9bbf-7960a821f9b2", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "9853fd51-30dc-4c78-9817-2a1bedb241d8", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Avg_Mdcr_Pymt_Amt", "new_name": "average_medicare_payment_amt" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "f38244a5-c921-4f95-96ac-1a9b120d25dc", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "e9458eb9-f12c-4705-a450-a14a96329c5f", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_sql_0.1", "parameters": { "sql": "/* Table is available as variable `df`*/\nSELECT NPI,\nprovider_type,\nnppes_provider_gender,\nmax(average_medicare_payment_amt) average_medicare_payment_amt_max,\nmin(average_medicare_payment_amt) average_medicare_payment_amt_min,\nstd(average_medicare_payment_amt) average_medicare_payment_amt_standard_deviation,\npercentile_approx(average_medicare_payment_amt,.5) average_medicare_payment_amt_median,\nsum(average_medicare_payment_amt) average_medicare_payment_amt_sum,\navg(average_medicare_payment_amt) average_medicare_payment_amt_mean,\n\nmax(average_submitted_chrg_amt) average_submitted_chrg_amt_max,\nmin(average_submitted_chrg_amt) average_submitted_chrg_amt_min,\nstd(average_submitted_chrg_amt) average_submitted_chrg_amt_standard_deviation,\npercentile_approx(average_submitted_chrg_amt,.5) average_submitted_chrg_amt_median,\nsum(average_submitted_chrg_amt) average_submitted_chrg_amt_sum,\navg(average_submitted_chrg_amt) average_submitted_chrg_amt_mean,\n\nmax(bene_day_srvc_cnt) bene_day_srvc_cnt_max,\nmin(bene_day_srvc_cnt) bene_day_srvc_cnt_min,\nstd(bene_day_srvc_cnt) bene_day_srvc_cnt_standard_deviation,\npercentile_approx(bene_day_srvc_cnt,.5) bene_day_srvc_cnt_median,\nsum(bene_day_srvc_cnt) bene_day_srvc_cnt_sum,\navg(bene_day_srvc_cnt) bene_day_srvc_cnt_mean,\n\nmax(bene_unique_cnt) bene_unique_cnt_max,\nmin(bene_unique_cnt) bene_unique_cnt_min,\nstd(bene_unique_cnt) bene_unique_cnt_standard_deviation,\npercentile_approx(bene_unique_cnt,.5) bene_unique_cnt_median,\nsum(bene_unique_cnt) bene_unique_cnt_sum,\navg(bene_unique_cnt) bene_unique_cnt_mean,\n\nmax(line_svc_cnt) line_svc_cnt_max,\nmin(line_svc_cnt) line_svc_cnt_min,\nstd(line_svc_cnt) line_svc_cnt_standard_deviation,\npercentile_approx(line_svc_cnt,.5) line_svc_cnt_median,\nsum(line_svc_cnt) line_svc_cnt_sum,\navg(line_svc_cnt) line_svc_cnt_mean\n\nfrom df\ngroup by NPI,\nprovider_type,\nnppes_provider_gender" }, "inputs": [ { "name": "df", "node_id": "9853fd51-30dc-4c78-9817-2a1bedb241d8", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "6591ada5-18cf-449c-aba8-14cafd9045d3", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": [ "average_submitted_chrg_amt_standard_deviation", "average_medicare_payment_amt_standard_deviation", "bene_day_srvc_cnt_standard_deviation", "bene_unique_cnt_standard_deviation", "line_svc_cnt_standard_deviation" ], "fill_value": "0" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "e9458eb9-f12c-4705-a450-a14a96329c5f", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "0dd8e090-a08b-4ac6-9018-b4a3ece9939d", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Drop missing", "drop_missing_parameters": { "input_column": [ "Prscrbr_NPI" ] }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "c2a66799-d19b-4405-a0fd-676faca065ae", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "ab55fb0e-fa65-416e-995c-0c564c307050", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Drop column", "drop_column_parameters": { "column_to_drop": [ "Prscrbr_Last_Org_Name", "Prscrbr_First_Name", "Prscrbr_City", "Prscrbr_State_Abrvtn", "Prscrbr_State_FIPS", "Prscrbr_Type_Src", "Brnd_Name", "Gnrc_Name", "GE65_Sprsn_Flag", "GE65_Tot_Clms", "GE65_Tot_30day_Fills", "GE65_Tot_Drug_Cst", "GE65_Tot_Day_Suply", "GE65_Bene_Sprsn_Flag", "GE65_Tot_Benes" ] } }, "inputs": [ { "name": "df", "node_id": "0dd8e090-a08b-4ac6-9018-b4a3ece9939d", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "d490ad76-eba9-4dc7-afd6-a02f91bc546c", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": [ "Tot_Benes" ], "fill_value": "5" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "ab55fb0e-fa65-416e-995c-0c564c307050", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "da95ce68-ed88-435a-88df-711f93ac7ca4", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Prscrbr_NPI", "new_name": "NPI" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "d490ad76-eba9-4dc7-afd6-a02f91bc546c", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "ec4da336-af45-46df-aac7-03964df855cd", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Prscrbr_Type", "new_name": "specialty_description" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "da95ce68-ed88-435a-88df-711f93ac7ca4", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "81417b28-873c-415c-9fbd-0b3020915a8c", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Tot_Clms", "new_name": "total_claim_count" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "ec4da336-af45-46df-aac7-03964df855cd", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "6cc7cb2c-9e0a-4309-ae86-023174750023", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Tot_30day_Fills", "new_name": "Total_30_day_fill_count" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "81417b28-873c-415c-9fbd-0b3020915a8c", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "44223d00-0076-44b3-a40d-25cb8f371ab1", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Tot_Day_Suply", "new_name": "Total_day_supply" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "6cc7cb2c-9e0a-4309-ae86-023174750023", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "f9ade4bf-22a3-432d-96e2-998efb359793", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Tot_Drug_Cst", "new_name": "total_drug_cost" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "44223d00-0076-44b3-a40d-25cb8f371ab1", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "dcd99e44-dfcc-404a-b09b-f12c625a5108", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Tot_Benes", "new_name": "bene_count" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "f9ade4bf-22a3-432d-96e2-998efb359793", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "cf532cf5-fa6d-4765-8914-7ec5782d6398", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_sql_0.1", "parameters": { "sql": "/* Table is available as variable `df`*/\nSELECT NPI,\nspecialty_description,\nmax(total_drug_cost) total_drug_cost_max,\nmin(total_drug_cost) total_drug_cost_min,\nstd(total_drug_cost) total_drug_cost_standard_deviation,\npercentile_approx(total_drug_cost,.5) total_drug_cost_median,\nsum(total_drug_cost) total_drug_cost_sum,\navg(total_drug_cost) total_drug_cost_mean,\n\nmax(Total_day_supply) Total_day_supply_max,\nmin(Total_day_supply) Total_day_supply_min,\nstd(Total_day_supply) Total_day_supply_standard_deviation,\npercentile_approx(Total_day_supply,.5) Total_day_supply_median,\nsum(Total_day_supply) Total_day_supply_sum,\navg(Total_day_supply) Total_day_supply_mean,\n\nmax(Total_30_day_fill_count) Total_30_day_fill_count_max,\nmin(Total_30_day_fill_count) Total_30_day_fill_count_min,\nstd(Total_30_day_fill_count) Total_30_day_fill_count_standard_deviation,\npercentile_approx(Total_30_day_fill_count,.5) Total_30_day_fill_count_median,\nsum(Total_30_day_fill_count) Total_30_day_fill_count_sum,\navg(Total_30_day_fill_count) Total_30_day_fill_count_mean,\n\nmax(total_claim_count) total_claim_count_max,\nmin(total_claim_count) total_claim_count_min,\nstd(total_claim_count) total_claim_count_standard_deviation,\npercentile_approx(total_claim_count,.5) total_claim_count_median,\nsum(total_claim_count) total_claim_count_sum,\navg(total_claim_count) total_claim_count_mean,\n\nmax(bene_count) bene_count_max,\nmin(bene_count) bene_count_min,\nstd(bene_count) bene_count_standard_deviation,\npercentile_approx(bene_count,.5) bene_count_median,\nsum(bene_count) bene_count_sum,\navg(bene_count) bene_count_mean\n\nfrom df\ngroup by NPI,\nspecialty_description" }, "inputs": [ { "name": "df", "node_id": "dcd99e44-dfcc-404a-b09b-f12c625a5108", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "0ab5d4f5-568e-4951-bb6e-47b6c8e2bd5d", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "fill_value": "0", "input_column": [ "Total_day_supply_standard_deviation", "total_drug_cost_standard_deviation", "Total_30_day_fill_count_standard_deviation", "total_claim_count_standard_deviation", "bene_count_standard_deviation" ] }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "cf532cf5-fa6d-4765-8914-7ec5782d6398", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "e12d8751-6981-4277-a310-e0a62142fe01", "type": "SOURCE", "operator": "sagemaker.s3_source_0.1", "parameters": { "dataset_definition": { "__typename": "S3CreateDatasetDefinitionOutput", "datasetSourceType": "S3", "name": "dmepos", "description": null, "s3ExecutionContext": { "__typename": "S3ExecutionContext", "s3Uri": "s3://sagemaker-us-east-1-403678423963/fraud-detect-demo/data/raw/dmepos.csv", "s3ContentType": "csv", "s3HasHeader": true, "s3FieldDelimiter": ",", "s3DirIncludesNested": false, "s3AddsFilenameColumn": false } } }, "inputs": [], "outputs": [ { "name": "default", "sampling": { "sampling_method": "sample_by_limit", "limit_rows": 50000 } } ] }, { "node_id": "248cfa7d-a9c1-463a-82a3-5800ccfcd6ad", "type": "TRANSFORM", "operator": "sagemaker.spark.infer_and_cast_type_0.1", "parameters": {}, "trained_parameters": {}, "inputs": [ { "name": "df", "node_id": "e12d8751-6981-4277-a310-e0a62142fe01", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "f68430dd-a124-4bc8-b2bc-bc45534c389a", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Drop missing", "drop_missing_parameters": { "input_column": [ "Rfrg_NPI" ] }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "248cfa7d-a9c1-463a-82a3-5800ccfcd6ad", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "442f88c9-84bf-4029-aacc-be0908779f4b", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Drop column", "drop_column_parameters": { "column_to_drop": [ "Rfrg_Prvdr_Last_Name", "Rfrg_Prvdr_First_Name", "Rfrg_Prvdr_MI", "Rfrg_Crdntls", "Rfrg_Ent_Cd", "Rfrg_Prvdr_St1", "Rfrg_Prvdr_St2", "Rfrg_Prvdr_City", "Rfrg_Prvdr_State_Abrvtn", "Rfrg_Prvdr_State_FIPS", "Rfrg_Prvdr_RUCA", "Rfrg_Prvdr_RUCA_Desc", "Rfrg_Prvdr_Type_Flag", "BETOS_Lvl", "BETOS_Cd", "BETOS_Desc", "HCPCS_Desc", "Suplr_Rentl_Ind", "Avg_Suplr_Mdcr_Alowd_Amt", "HCPCS_CD" ] } }, "inputs": [ { "name": "df", "node_id": "f68430dd-a124-4bc8-b2bc-bc45534c389a", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "40a27fd5-0f90-4d69-af4c-4ef66cab43bd", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": [ "Tot_Suplr_Benes" ], "fill_value": "5" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "442f88c9-84bf-4029-aacc-be0908779f4b", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "078b690d-00a6-41b8-ace7-dc137a946ff6", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_sql_0.1", "parameters": { "sql": "/* Table is available as variable `df`*/\nSELECT Rfrg_NPI,\nRfrg_Prvdr_Gndr,\nRfrg_Prvdr_Type,\nRfrg_Prvdr_Zip5,\n\nmax(Avg_Suplr_Mdcr_Pymt_Amt) Avg_Suplr_Mdcr_Pymt_Amt_max,\nmin(Avg_Suplr_Mdcr_Pymt_Amt) Avg_Suplr_Mdcr_Pymt_Amt_min,\nstd(Avg_Suplr_Mdcr_Pymt_Amt) Avg_Suplr_Mdcr_Pymt_Amt_standard_deviation,\npercentile_approx(Avg_Suplr_Mdcr_Pymt_Amt,.5) Avg_Suplr_Mdcr_Pymt_Amt_median,\nsum(Avg_Suplr_Mdcr_Pymt_Amt) Avg_Suplr_Mdcr_Pymt_Amt_sum,\navg(Avg_Suplr_Mdcr_Pymt_Amt) Avg_Suplr_Mdcr_Pymt_Amt_mean,\n\nmax(Avg_Suplr_Sbmtd_Chrg) Avg_Suplr_Sbmtd_Chrg_max,\nmin(Avg_Suplr_Sbmtd_Chrg) Avg_Suplr_Sbmtd_Chrg_min,\nstd(Avg_Suplr_Sbmtd_Chrg) Avg_Suplr_Sbmtd_Chrg_standard_deviation,\npercentile_approx(Avg_Suplr_Sbmtd_Chrg,.5) Avg_Suplr_Sbmtd_Chrg_median,\nsum(Avg_Suplr_Sbmtd_Chrg) Avg_Suplr_Sbmtd_Chrg_sum,\navg(Avg_Suplr_Sbmtd_Chrg) Avg_Suplr_Sbmtd_Chrg_mean,\n\nmax(Tot_Suplr_Srvcs) Tot_Suplr_Srvcs_max,\nmin(Tot_Suplr_Srvcs) Tot_Suplr_Srvcs_min,\nstd(Tot_Suplr_Srvcs) Tot_Suplr_Srvcs_standard_deviation,\npercentile_approx(Tot_Suplr_Srvcs,.5) Tot_Suplr_Srvcs_median,\nsum(Tot_Suplr_Srvcs) Tot_Suplr_Srvcs_sum,\navg(Tot_Suplr_Srvcs) Tot_Suplr_Srvcs_mean,\n\nmax(Tot_Suplr_Clms) Tot_Suplr_Clms_max,\nmin(Tot_Suplr_Clms) Tot_Suplr_Clms_min,\nstd(Tot_Suplr_Clms) Tot_Suplr_Clms_standard_deviation,\npercentile_approx(Tot_Suplr_Clms,.5) Tot_Suplr_Clms_median,\nsum(Tot_Suplr_Clms) Tot_Suplr_Clms_sum,\navg(Tot_Suplr_Clms) Tot_Suplr_Clms_mean,\n\nmax(Tot_Suplr_Benes) Tot_Suplr_Benes_max,\nmin(Tot_Suplr_Benes) Tot_Suplr_Benes_min,\nstd(Tot_Suplr_Benes) Tot_Suplr_Benes_standard_deviation,\npercentile_approx(Tot_Suplr_Benes,.5) Tot_Suplr_Benes_median,\nsum(Tot_Suplr_Benes) Tot_Suplr_Benes_sum,\navg(Tot_Suplr_Benes) Tot_Suplr_Benes_mean,\n\nmax(Tot_Suplrs) Tot_Suplrs_max,\nmin(Tot_Suplrs) Tot_Suplrs_min,\nstd(Tot_Suplrs) Tot_Suplrs_standard_deviation,\npercentile_approx(Tot_Suplrs,.5) Tot_Suplrs_median,\nsum(Tot_Suplrs) Tot_Suplrs_sum,\navg(Tot_Suplrs) Tot_Suplrs_mean\n\nfrom df\ngroup by Rfrg_NPI,\nRfrg_Prvdr_Gndr,\nRfrg_Prvdr_Type,\nRfrg_Prvdr_Zip5" }, "inputs": [ { "name": "df", "node_id": "40a27fd5-0f90-4d69-af4c-4ef66cab43bd", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "28316b60-410e-4cc2-b2ff-1a3e737ebdcb", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Rfrg_NPI", "new_name": "referring_npi" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "078b690d-00a6-41b8-ace7-dc137a946ff6", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "50b17426-a52b-4a67-bce9-69f660ac276b", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "Rfrg_Prvdr_Gndr", "new_name": "referring_provider_gender" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "28316b60-410e-4cc2-b2ff-1a3e737ebdcb", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "dbc645b5-c1ab-4ad4-a908-1c66e47095bf", "type": "TRANSFORM", "operator": "sagemaker.spark.handle_missing_0.1", "parameters": { "operator": "Fill missing", "fill_missing_parameters": { "input_column": [ "Avg_Suplr_Mdcr_Pymt_Amt_standard_deviation", "Avg_Suplr_Sbmtd_Chrg_standard_deviation", "Tot_Suplr_Srvcs_standard_deviation", "Tot_Suplr_Clms_standard_deviation", "Tot_Suplr_Benes_standard_deviation", "Tot_Suplrs_standard_deviation" ], "fill_value": "0" }, "impute_parameters": { "column_type": "Numeric", "numeric_parameters": { "strategy": "Approximate Median" } } }, "inputs": [ { "name": "df", "node_id": "50b17426-a52b-4a67-bce9-69f660ac276b", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "cd987419-a28f-4d59-b93e-06c0bd5427a4", "type": "SOURCE", "operator": "sagemaker.s3_source_0.1", "parameters": { "dataset_definition": { "__typename": "S3CreateDatasetDefinitionOutput", "datasetSourceType": "S3", "name": "leie", "description": null, "s3ExecutionContext": { "__typename": "S3ExecutionContext", "s3Uri": "s3://sagemaker-us-east-1-403678423963/fraud-detect-demo/data/raw/leie.csv", "s3ContentType": "csv", "s3HasHeader": true, "s3FieldDelimiter": ",", "s3DirIncludesNested": false, "s3AddsFilenameColumn": false } } }, "inputs": [], "outputs": [ { "name": "default", "sampling": { "sampling_method": "sample_by_limit", "limit_rows": 50000 } } ] }, { "node_id": "1923568e-2d96-4717-a8b7-179f54fe0112", "type": "TRANSFORM", "operator": "sagemaker.spark.infer_and_cast_type_0.1", "parameters": {}, "trained_parameters": { "schema": { "LASTNAME": "string", "FIRSTNAME": "string", "MIDNAME": "string", "BUSNAME": "string", "GENERAL": "string", "SPECIALTY": "string", "UPIN": "string", "NPI": "long", "DOB": "string", "ADDRESS": "string", "CITY": "string", "STATE": "string", "ZIP": "long", "EXCLTYPE": "string", "EXCLDATE": "long", "REINDATE": "long", "WAIVERDATE": "long", "WVRSTATE": "string" } }, "inputs": [ { "name": "default", "node_id": "cd987419-a28f-4d59-b93e-06c0bd5427a4", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "4ea1444a-6055-4d24-a3c9-c00670c900af", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Drop column", "drop_column_parameters": { "column_to_drop": [ "LASTNAME", "MIDNAME", "FIRSTNAME", "BUSNAME", "GENERAL", "SPECIALTY", "UPIN", "DOB", "ADDRESS", "CITY", "STATE", "ZIP", "EXCLDATE", "REINDATE", "WAIVERDATE", "WVRSTATE" ] } }, "inputs": [ { "name": "df", "node_id": "1923568e-2d96-4717-a8b7-179f54fe0112", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "e79bd67a-0190-4df9-9a1b-109662ddf782", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_sql_0.1", "parameters": { "sql": "/* Table is available as variable `df`*/\nselect NPI,\nEXCLTYPE\nfrom df\nwhere NPI <> 0" }, "inputs": [ { "name": "df", "node_id": "4ea1444a-6055-4d24-a3c9-c00670c900af", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "990fe296-9e89-4ff6-9be9-0d3813a693c7", "type": "TRANSFORM", "operator": "sagemaker.spark.join_multi_keys_0.1", "parameters": { "join_keys": [ { "left": "NPI", "right": "NPI" }, { "left": "provider_type", "right": "specialty_description" } ], "join_type": "inner" }, "inputs": [ { "name": "df", "node_id": "6591ada5-18cf-449c-aba8-14cafd9045d3", "output_name": "default" }, { "name": "df", "node_id": "0ab5d4f5-568e-4951-bb6e-47b6c8e2bd5d", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "be0d5492-ba6f-4f30-846e-683b76e668b2", "type": "TRANSFORM", "operator": "sagemaker.spark.join_multi_keys_0.1", "parameters": { "join_keys": [ { "left": "NPI", "right": "referring_npi" }, { "left": "provider_type", "right": "Rfrg_Prvdr_Type" } ], "join_type": "inner" }, "inputs": [ { "name": "df", "node_id": "990fe296-9e89-4ff6-9be9-0d3813a693c7", "output_name": "default" }, { "name": "df", "node_id": "dbc645b5-c1ab-4ad4-a908-1c66e47095bf", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "aef80118-7036-4689-9f48-6ed7e7672aa9", "type": "TRANSFORM", "operator": "sagemaker.spark.join_tables_0.1", "parameters": { "left_column": "NPI", "right_column": "NPI", "join_type": "leftouter" }, "inputs": [ { "name": "df", "node_id": "be0d5492-ba6f-4f30-846e-683b76e668b2", "output_name": "default" }, { "name": "df", "node_id": "e79bd67a-0190-4df9-9a1b-109662ddf782", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "1737a5e9-da61-4f1a-9a1a-48ed6c3fdfb1", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_formula_0.1", "parameters": { "output_column": "fraudulent_provider", "formula": "case when EXCLTYPE in ('1128a1','1128a2','1128a3','1128b4','1128b7') then 1 else 0 end" }, "inputs": [ { "name": "df", "node_id": "aef80118-7036-4689-9f48-6ed7e7672aa9", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "0a187dea-5b89-4b26-92fb-786617909330", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Drop column", "drop_column_parameters": { "column_to_drop": [ "Rfrg_Prvdr_Zip5", "NPI_1", "EXCLTYPE", "nppes_provider_gender", "provider_type" ] } }, "inputs": [ { "name": "df", "node_id": "1737a5e9-da61-4f1a-9a1a-48ed6c3fdfb1", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "39c4ddf2-1d88-4c63-ba48-506128b45b16", "type": "TRANSFORM", "operator": "sagemaker.spark.encode_categorical_0.1", "parameters": { "operator": "One-hot encode", "one_hot_encode_parameters": { "invalid_handling_strategy": "Skip", "drop_last": false, "output_style": "Columns", "input_column": [ "referring_provider_gender" ], "input_already_ordinal_encoded": false }, "ordinal_encode_parameters": { "invalid_handling_strategy": "Replace with NaN" } }, "trained_parameters": { "one_hot_encode_parameters": { "_hash": -6254551675464078000, "string_indexer_model": "P)h>@6aWAK2mmp7N>q03+xF`K004IZ000vJ003=ebYWy+bYU-WVRCdWFfcGMFm;blPXaL%#P5ELo|mS`g31*y9Ec_wubW5vc4_E8Ut1!Q{q8<6F-qcfGV`06lh6)w7E2LnkdQ#{Q04@*m)^-9p;SWoQpkp39SnXTZK6RZ8d0prT)-!>oG#jSuI9H7?Nqgmxa8lEvWGHvFiBJ<_?POB4^*aLs_+M`CP*!5*qB&%cWDWziF{@6aWAK2mmp7N>txRD1{CH000aC000;O003=ebYWy+bYU+paA9(EEif=JFfcA-a$`#_N@ieSU}E@o`)wHjP)h>@6aWAK2mmp7N>rLLt9yR|003tK002k;003lRbYU-WVRCdWFfcGMFfC$YV>M+tWj8H0FkxgZG&wReEjck{V=ZGeIAvitHf1?9I5;h1FfcGKb8ca9aCt6pVRCVGWps^FO-sW-5S_$W3--EU7jh^IEf(4(F*UZuix&|=DdIs8kxjF)(M?*jTScV5&%fo?R;`NQWte$yzTPmy!}9l9l@06@GAV>hbcSX+Jvz%tS4!WczTSAIl6#6HHYKOFnkQUy{A#^W{X7+PK$R|MBTd^h&4f%89bCQ8g69irZGFQp^FQnO3SOzW_y@a~l@+%J9!(0(vqGKCq@)A0ftddR7AD}a6ic8@KO~r0^ZE50@X1PGCok)(i_`-a_$;XJ#IsB$bwBMVX*Wznn#R3en(U{&D2kFOjQT;ZyBmgn+Ku9*-y)u84&F3ZADS0?15ir?1QY-O00;mvcuG_m59q-X0000G0000@0001FVRT_HE^uLTbS*G2FfcGJVq#-8WjSRxEjBP=WGyr~GBYhXF=S&cV>CErVK_ErIW#ypEn_e+FfMa$VQ_GHE^uLTadl;ME@N_IOD;-gU|?WkVA$r~aF-);6#!660Rj{Q6aWAK2mmp7N>q03+xF`K004IZ000vJ0000000000005+c00000ZDn*}WMOn+FK}UUbS*G2FfcGsO928D0~7!N00;mvcuG{?M<|630000C0000O00000000000001_fgAw<0BvP-VPs)+VJ|LlVRCdWFfcGMFfLM+tWj8H0FkxgZG&wReEjck{V=ZGeIAvitHf1?9I5;h1FfcGKb8ca9aCt6pVRCVGWpplMa$`_S1qJ{B000C41ORve006=R00000", "one_hot_encoder_model": "P)h>@6aWAK2mmp7N>oet)0@)(003zL000vJ003=ebYWy+bYU-WVRCdWFfcGMFm;YSPXjRwhIf93=WCRd}zZ%2QmdJf+52&zwP^#UM<%BQZG8V^q+~#lwu<($A%@E6jIG`IHN(jBc`J869k2W;dMI8ONii@-3VFVsP*lB;ZzZxkaGva!jVPx2Ih7T@6aWAK2mmp7N>p=?GO7*$000aC000;O003=ebYWy+bYU+paA9(EEif=JFfcA-a$`#_N@ieSU}Bhk`MWm&P)h>@6aWAK2mmp7N>slJW_Er7001xo002k;003lRbYU-WVRCdWFfcGMFfC>|VKg!{W;88gI5#ydG&nXmEjeUlGc7bTG-hREH8f#4VlXXZFfcGKb8ca9aCt6pVRCVGWpsT~O-m~=6is8TE_C4{Lk4D10v3fa%)F`Nw7Afnh=`~=c|6iiqQlF~)Ff4^7W^6hK!1q0wu&N{z~z3NbMLt!XNTu8L4=V`NH91p(h6u3BZTJGLt9@WI2#8uX9+G?jAjAB{>jmr9SQUQpB>)FQV@)GF{D0qg@~W|b!oBR{0HHrHH?1c>DCdik9+wx}z*obZT8h;IKt*jvrPxgAT#O?}Tx=|nD^pf8O_<|ZS_`ph~r9y%5>RY;v%*1vjWl_+PWZ)*Tz&iG^sW5fi~i`6dY*`flHG>{+Gl@mo}Psdubz7pEXX~-&w#6SZS2Fs9rXoAMm>E9`$#|yH&Bh|}QH_4?^gDg||p32fR&C?|9#aZ`Hl0>ST4)Wf%9|Z7%uMLFG;IF&^P)h>@6aWAK2mmp7N>ma&S=AB%000mG002z@003lRbYU+paA9(EEif=JFfc7L8kIbtv^V=yo~Mo6`UQ0BHdL022TJ00000000000HlEc0001OWprU=VRT_HaA9(EEif=JFfdR{0Rj{Q6aWAK2mmp7N>p=?GO7*$000aC000;O0000000000005+c1OWg5ZDn*}WMOn+FD`Ila&#>)FfcGME@N_IP)h*<6ay3h000O8F?dQ;zY1n{egOagFaiJoNB{r;0000000000q=85Q003lRbYU-WVRCdWFfcGMFfC>|VKg!{W;88gI5#ydG&nXmEjeUlGc7bTG-hREH8f#4VlXXZFfcGKb8ca9aCt6pVRCVGWpq$W0Rj{Q6aWAK2mmp7N>ma&S=AB%000mG002z@0000000000005+cECT=lWMOn+FD`Ila&#>)FfcGMEoM1kG%_@1G%aE{H#IFZI5s#fIb>urEi^MUW@TeFG+{YnFfC&+FfcB2Zeeh6c`k5aa&dKKbS`6ZV^B*41^@s600aO80C)ia0I3520000" } }, "inputs": [ { "name": "df", "node_id": "0a187dea-5b89-4b26-92fb-786617909330", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "3f6a832f-55ba-4a01-869b-45b0320cd10c", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "NPI_0", "new_name": "referring_npi" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "39c4ddf2-1d88-4c63-ba48-506128b45b16", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "11ef0eb5-a1ec-49ca-8632-2ef4589829b2", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Move column", "move_column_parameters": { "move_type": "Move to start", "move_to_start_parameters": { "column_to_move": "fraudulent_provider" } }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "3f6a832f-55ba-4a01-869b-45b0320cd10c", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "64a0bcee-83fe-4497-a8fa-d9e57e115715", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Drop column", "drop_column_parameters": { "column_to_drop": [ "referring_npi", "Tot_Suplrs_max", "Tot_Suplrs_min", "Tot_Suplrs_median", "Tot_Suplr_Benes_max", "Tot_Suplr_Benes_min", "Tot_Suplr_Benes_median", "Tot_Suplr_Clms_max", "Tot_Suplr_Clms_min", "Tot_Suplr_Clms_median", "Tot_Suplr_Srvcs_max", "Tot_Suplr_Srvcs_min", "Tot_Suplr_Srvcs_median", "Avg_Suplr_Sbmtd_Chrg_max", "Avg_Suplr_Sbmtd_Chrg_min", "Avg_Suplr_Sbmtd_Chrg_median", "Avg_Suplr_Mdcr_Pymt_Amt_max", "Avg_Suplr_Mdcr_Pymt_Amt_min", "Avg_Suplr_Mdcr_Pymt_Amt_median", "bene_unique_cnt_median", "bene_unique_cnt_min", "bene_unique_cnt_max", "line_svc_cnt_median", "line_svc_cnt_min", "line_svc_cnt_max", "bene_day_srvc_cnt_max", "bene_day_srvc_cnt_min", "bene_day_srvc_cnt_median", "average_submitted_chrg_amt_max", "average_submitted_chrg_amt_min", "average_submitted_chrg_amt_median", "average_medicare_payment_amt_median", "average_medicare_payment_amt_min", "average_medicare_payment_amt_max", "total_claim_count_max", "total_claim_count_min", "total_claim_count_median", "Total_30_day_fill_count_max", "Total_30_day_fill_count_min", "Total_30_day_fill_count_median", "Total_day_supply_median", "Total_day_supply_max", "Total_day_supply_min", "total_drug_cost_max", "total_drug_cost_min", "total_drug_cost_median", "bene_count_median", "bene_count_min", "referring_provider_gender_M" ] } }, "inputs": [ { "name": "df", "node_id": "11ef0eb5-a1ec-49ca-8632-2ef4589829b2", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "deb701f9-9bb7-4283-8073-b9b0ef43f5b2", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Rename column", "rename_column_parameters": { "input_column": "referring_provider_gender_F", "new_name": "female" }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "64a0bcee-83fe-4497-a8fa-d9e57e115715", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "d9691a71-12d6-46a1-98c2-9473a22622a4", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": { "operator": "Move column", "move_column_parameters": { "move_type": "Move after", "move_after_parameters": { "column_to_move": "female", "target_column": "fraudulent_provider" }, "move_to_start_parameters": {} }, "drop_column_parameters": {} }, "inputs": [ { "name": "df", "node_id": "deb701f9-9bb7-4283-8073-b9b0ef43f5b2", "output_name": "default" } ], "outputs": [ { "name": "default" } ] }, { "node_id": "c77587a5-7e2a-48d9-81e2-852ea26c0e20", "type": "DESTINATION", "operator": "sagemaker.spark.s3_destination_0.1", "name": "S3: sample", "parameters": { "output_config": { "compression": "none", "output_path": "s3://sagemaker-us-east-1-403678423963/fraud-detect-demo/data/", "output_content_type": "CSV", "delimiter": "," } }, "inputs": [ { "name": "default", "node_id": "d9691a71-12d6-46a1-98c2-9473a22622a4", "output_name": "default" } ], "outputs": [ { "name": "default" } ] } ] }