{"metadata": {"version": 1, "disable_limits": false}, "nodes": [{"node_id": "5c9d4f98-dd8a-4b74-89ad-735638aa7fe6", "type": "SOURCE", "operator": "sagemaker.s3_source_0.1", "parameters": {"dataset_definition": {"__typename": "S3CreateDatasetDefinitionOutput", "datasetSourceType": "S3", "name": "tracks.csv", "description": null, "s3ExecutionContext": {"__typename": "S3ExecutionContext", "s3Uri": "s3://sagemaker-us-east-2-xxxxxx/music-recommendation/tracks.csv", "s3ContentType": "csv", "s3HasHeader": true}}}, "inputs": [], "outputs": [{"name": "default", "sampling": {"sampling_method": "sample_by_limit", "limit_rows": 50000}}]}, {"node_id": "d6154703-6fe5-4501-bf0a-568d2ef32974", "type": "TRANSFORM", "operator": "sagemaker.spark.infer_and_cast_type_0.1", "parameters": {}, "trained_parameters": {"schema": {"trackId": "string", "length": "float", "energy": "float", "acousticness": "float", "valence": "float", "speechiness": "float", "instrumentalness": "float", "liveness": "float", "tempo": "float", "genre": "string"}}, "inputs": [{"name": "default", "node_id": "5c9d4f98-dd8a-4b74-89ad-735638aa7fe6", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "44bd21fc-7685-4a3c-acfe-42740edd99cb", "type": "TRANSFORM", "operator": "sagemaker.spark.encode_categorical_0.1", "parameters": {"operator": "One-hot encode", "one_hot_encode_parameters": {"invalid_handling_strategy": "Keep", "drop_last": false, "output_style": "Columns", "input_column": "genre"}, "ordinal_encode_parameters": {"invalid_handling_strategy": "Replace with NaN"}}, "trained_parameters": {"one_hot_encode_parameters": {"_hash": -1767228197713558300, "string_indexer_model": "P)h>@6aWAK2mr#H-BPFL`-k5E003hF000vJ003=ebYWy+bYU-WVRCdWFfcGMFny29PQx$|MfZG$*Qw=}G-|qqO&3)wf!IbvJV~wk#n?uaD*w(9BqWevXC2>rX67n-3n_`M2sBEF(4SDI2y~Xt$^ju4Liv!%_;Keoz9H>{Mk^|jjl-0|MX_BrtGaF1O0An!TZy^(uPyHUE7-YsMvNRWK9+nJERzvEA)`?8H_PZ^*Yk6#gvJ4i&BuSFNEC<_Y2NBU8tL&8k$(olmx8{(++*s04f*?quKUV~X}b9UP)h>@6aWAK2mr#H-BJ$B-DM5{000aC000;O003=ebYWy+bYU+paA9(EEif=JFfcA-a$`#_N@ieSU}9+h{d_q9P)h>@6aWAK2mr&I-BKPFeOA^1007DY002k;003lRbYU-WVRCdWFfcGMFfBD?GhsI|G&U_XG&V9VG%_?aEnzcaH7z(eW->BlIbk_vIW#R}FfcGKb8ca9aCt6pVRCVGWps^F(Q4Z;6qVK330)pGh=L7%2!@mvJTIP8F9~~T$HpjQgs_Lf7)7?_)I^qDNp49?{$XFRhdq`3$bMlD`n?z*=;#e~zAEUVslG3Yib=k#6>qT2m}a{t`08p$_39EsYd$3NIVSp}Pq=I;a9Wh7vm##Xs`@1*vE&8I;(C0&4nd2)Zi^E0#i@wTNRAN%htPaLaYU#%;v6p#L3)IT&v38x{9Znmt#SjXPNtSXV7aDQ@VSRetPFT-FWv>=A}g4w2j0LN0Q2B4g*d04D$Xk+JziXTIgvQQ5}V(&Yez0i;!GSNc@;~?b<&K`L^zWV*AByZP2KUK_>KP89NxlPJMaF3t#@T?uHd*}5@$@jttfRS>IG!=50I$_N0e*;*SWQzcCA()SHL{k8ho7d-Zqh*@A(7I@9jrbMw8wk9Vh89NJyGSqfweXOGjZCCSeec{n7Ag5Dd~`7$xHa%eGbGPgLwv^%wsHP)h>@6aWAK2mr&I-BQQ!#hMZT000mG002z@003lRbYU+paA9(EEif=JFfc7OWHVtmF*G(UG&D9cEi^JTG%aB>Vl^!|IA$_3WI16uWjQo0V=yo)FfcGME@N_IP)h*<6ay3h000O8!<*ew9u|F8)&T$j$^rlYNB{r;0000000000q=8fc003lRbYU-WVRCdWFfcGMFfBD?GhsI|G&U_XG&V9VG%_?aEnzcaH7z(eW->BlIbk_vIW#R}FfcGKb8ca9aCt6pVRCVGWpq$W0Rj{Q6aWAK2mr&I-BQQ!#hMZT000mG002z@0000000000005+ckOKe!WMOn+FD`Ila&#>)FfcGMEj45_VK*@}HZ3$XHZm@6aWAK2mr&I-BQq6*~ioX003zL000vJ003=ebYWy+bYU-WVRCdWFfcGMFm;YSPXjRwhIf93=WCRs?NxUxLTV){m{=oo@wF%NCC6c)>VGE`fdrz9hu40e$0rya<_rTwJ}68$9!ced{HnZD6QL9)^%!Uy}T``_J{Q5qh)0<2`=~JoO{m|x;+0L6GRy4gam`rBCUWnKOuxR)2JQc2EDK)em4F7_+K8dKmd3rM0=|Vj@5!U+d++q_IU^(T@p_c!cQDKL|V39GvM)I&SLUyp&Gl!U_7+STHxANH%2Fnc3K}kf}oI+_K@#f>okimENrZhz;Y7DU1{SyqUM_E!h<@2BJ}ymryJ9=$am3?xcQh+>-j38DSG^>cN{zBi1QY-O00;oXo840IUmJT80000G0000@0001FVRT_HE^uLTbS*G2FfcGJH)UpIG&f~7Eiz;>G%YkXHe@YfGBsi?H90giH90wBV>4x9En_e+FfMa$VQ_GHE^uLTadl;ME@N_IOD;-gU|?WkxaGgQ{MVl6c}G&D6iIb&lpWnwL3FfcGKb8ca9aCt6pVRCVGWpplMa$`_S1qJ{B000C41ORve006E700000"}}, "inputs": [{"name": "df", "node_id": "d6154703-6fe5-4501-bf0a-568d2ef32974", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "9a4e227d-222b-4067-a81a-605faf001547", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_pyspark_0.1", "parameters": {"code": "# Table is available as variable `df`\nfrom pyspark.sql import functions as F\ndf = df.withColumn('EventTime', F.unix_timestamp(F.current_timestamp()))"}, "inputs": [{"name": "df", "node_id": "44bd21fc-7685-4a3c-acfe-42740edd99cb", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "d65f643a-9fb9-4b66-bf0d-2b32ce881dc5", "type": "TRANSFORM", "operator": "sagemaker.spark.cast_single_data_type_0.1", "parameters": {"column": "EventTime", "original_data_type": "Long", "data_type": "float"}, "inputs": [{"name": "df", "node_id": "9a4e227d-222b-4067-a81a-605faf001547", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "19ad8e80-2002-4ee9-9753-fe9a384b1166", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_formula_0.1", "parameters": {"output_column": "danceability", "formula": "0.3*valence + 0.1*liveness + 0.1*energy"}, "inputs": [{"name": "df", "node_id": "d65f643a-9fb9-4b66-bf0d-2b32ce881dc5", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "814830ce-ab4d-4193-90bb-8b95fb375da0", "type": "SOURCE", "operator": "sagemaker.s3_source_0.1", "parameters": {"dataset_definition": {"__typename": "S3CreateDatasetDefinitionOutput", "datasetSourceType": "S3", "name": "ratings.csv", "description": null, "s3ExecutionContext": {"__typename": "S3ExecutionContext", "s3Uri": "s3://sagemaker-us-east-2-xxxxxx/music-recommendation/ratings.csv", "s3ContentType": "csv", "s3HasHeader": true}}}, "inputs": [], "outputs": [{"name": "default", "sampling": {"sampling_method": "sample_by_limit", "limit_rows": 50000}}]}, {"node_id": "9c0d5850-2331-4f2c-b099-f6e0f8d42848", "type": "TRANSFORM", "operator": "sagemaker.spark.infer_and_cast_type_0.1", "parameters": {}, "trained_parameters": {"schema": {"ratingEventId": "string", "ts": "long", "userId": "long", "trackId": "string", "sessionId": "long", "itemInSession": "long", "Rating": "float"}}, "inputs": [{"name": "default", "node_id": "814830ce-ab4d-4193-90bb-8b95fb375da0", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "a88a70f8-2933-4cef-ba8f-8617ae2d1748", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_pyspark_0.1", "parameters": {"code": "# Table is available as variable `df`\nfrom pyspark.sql import functions as F\ndf = df.withColumn('EventTime', F.unix_timestamp(F.current_timestamp()))"}, "inputs": [{"name": "df", "node_id": "9c0d5850-2331-4f2c-b099-f6e0f8d42848", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "9a283380-91ca-478e-be99-6ba3bf57c680", "type": "TRANSFORM", "operator": "sagemaker.spark.cast_single_data_type_0.1", "parameters": {"column": "EventTime", "original_data_type": "Long", "data_type": "float"}, "inputs": [{"name": "df", "node_id": "a88a70f8-2933-4cef-ba8f-8617ae2d1748", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "d90b4efc-1a47-4971-85f9-c65aedbff3a2", "type": "TRANSFORM", "operator": "sagemaker.spark.join_tables_0.1", "parameters": {"left_column": "trackId", "right_column": "trackId", "join_type": "inner"}, "inputs": [{"name": "df", "node_id": "19ad8e80-2002-4ee9-9753-fe9a384b1166", "output_name": "default"}, {"name": "df", "node_id": "9a283380-91ca-478e-be99-6ba3bf57c680", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "fe586afa-70ed-4f81-b1a7-2635dacd48c4", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": {"operator": "Drop column", "drop_column_parameters": {"column_to_drop": "trackId_1"}}, "inputs": [{"name": "df", "node_id": "d90b4efc-1a47-4971-85f9-c65aedbff3a2", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "16e87932-15a9-4c7f-b880-c7a767df5b40", "type": "TRANSFORM", "operator": "sagemaker.spark.manage_columns_0.1", "parameters": {"operator": "Rename column", "rename_column_parameters": {"input_column": "trackId_0", "new_name": "trackId"}, "drop_column_parameters": {}}, "inputs": [{"name": "df", "node_id": "fe586afa-70ed-4f81-b1a7-2635dacd48c4", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "f35a528b-19a2-4fcd-911e-03a146ebac09", "type": "TRANSFORM", "operator": "sagemaker.spark.custom_pyspark_0.1", "parameters": {"code": "# Table is available as variable `df`\nimport pyspark.sql.functions as F\n# numeric features\nnum_feat_cols = ['energy', 'acousticness', 'valence', 'speechiness', 'instrumentalness', 'liveness', 'tempo', 'danceability', 'genre_Latin', 'genre_Folk', 'genre_Blues', 'genre_Rap', 'genre_Reggae', 'genre_Jazz', 'genre_RnB', 'genre_Country', 'genre_Electronic', 'genre_Pop_Rock']\n# calculate averages for 5-star ratings\nagg_obj = [F.avg(feat_col).alias(feat_col+\"_5star\") for feat_col in num_feat_cols]\ndf = df.filter(F.col('Rating')==5).groupBy('userId').agg(*agg_obj)\ndf = df.withColumn('EventTime', F.unix_timestamp(F.current_timestamp()))"}, "inputs": [{"name": "df", "node_id": "16e87932-15a9-4c7f-b880-c7a767df5b40", "output_name": "default"}], "outputs": [{"name": "default"}]}, {"node_id": "7a6dad19-2c80-43e3-b03d-ec23c3842ae9", "type": "TRANSFORM", "operator": "sagemaker.spark.cast_single_data_type_0.1", "parameters": {"column": "EventTime", "original_data_type": "Long", "data_type": "float"}, "inputs": [{"name": "df", "node_id": "f35a528b-19a2-4fcd-911e-03a146ebac09", "output_name": "default"}], "outputs": [{"name": "default"}]}]}