{"metadata":{"title":"demo_notebook_provisioned_formatted","kernelspec":{"display_name":"Redshift","language":"postgresql","name":"Redshift"},"language_info":{"file_extension":".sql","name":"Redshift"},"version":1},"nbformat":4,"nbformat_minor":0,"cells":[{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Create a new database named copy_data.\r\n","create database copy_data"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Create a new table for copying training data from S3\r\n","CREATE TABLE bank_details_training(\r\n"," age numeric,\r\n"," jobtype char (25),\r\n"," marital char (25),\r\n"," education char (25),\r\n"," default_col char (25),\r\n"," housing char (25),\r\n"," loan char (25),\r\n"," contact char (25),\r\n"," month char (25),\r\n"," day_of_week char (25),\r\n"," duration numeric,\r\n"," campaign numeric,\r\n"," pdays numeric,\r\n"," previous numeric,\r\n"," poutcome char (25),\r\n"," emp_var_rate numeric,\r\n"," cons_price_idx numeric, \r\n"," cons_conf_idx numeric, \r\n"," euribor3m numeric,\r\n"," nr_employed numeric,\r\n"," y char(1) ) ;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Create a new table for copying inference data from S3\r\n","CREATE TABLE bank_details_inference(\r\n"," age numeric,\r\n"," jobtype char (25),\r\n"," marital char (25),\r\n"," education char (25),\r\n"," default_col char (25),\r\n"," housing char (25),\r\n"," loan char (25),\r\n"," contact char (25),\r\n"," month char (25),\r\n"," day_of_week char (25),\r\n"," duration numeric,\r\n"," campaign numeric,\r\n"," pdays numeric,\r\n"," previous numeric,\r\n"," poutcome char (25),\r\n"," emp_var_rate numeric,\r\n"," cons_price_idx numeric, \r\n"," cons_conf_idx numeric, \r\n"," euribor3m numeric,\r\n"," nr_employed numeric,\r\n"," y char(1) ) ;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Update the placeholder for S3 bucket to point to the snapshot export location. eg: s3://bucket_name/snapshot_export/db_name/training_table_name/\r\n","-- Update the placeholder for IAM role to use the role that has access to S3 bucket. Make sure to attach it to the Redshift cluster\r\n","-- Run a COPY command to copy training data from S3 to bank_details_training\r\n","copy bank_details_training\r\n","from ''\r\n","FORMAT AS PARQUET\r\n","iam_role 'arn:aws:iam::123456789012:role/iam_role'"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Update the placeholder for S3 bucket to point to the snapshot export location. eg: s3://bucket_name/snapshot_export/db_name/inference_table_name/\r\n","-- Update the placeholder for IAM role to use the role that has access to S3 bucket. Make sure to attach it to the Redshift cluster\r\n","-- Run a COPY command to copy training data from S3 to bank_details_inference\r\n","copy bank_details_inference\r\n","from ''\r\n","FORMAT AS PARQUET\r\n","iam_role 'arn:aws:iam::123456789012:role/iam_role'"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Validation of training table load\r\n","select * from bank_details_training"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Creating a new training table to add new column for row number. This will be used for including only a subset of records from the original training table for training the ML model\r\n","create table bank_details_training_rnum as (select row_number() over (partition by 1) as rnum, t.* from bank_details_training t)"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Validation of records in the new training table. It should give 4000\r\n","select count(*) from bank_details_training_rnum where rnum > 119 order by 1"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Create ML model. Provide only the bucket name (without any prefix such as s3://)\r\n","CREATE MODEL td_subscription_demo\r\n","FROM \r\n","(SELECT age\r\n",",jobtype\r\n",",marital\r\n",",education\r\n",",default_col\r\n",",housing\r\n",",loan\r\n",",contact\r\n",",month\r\n",",day_of_week\r\n",",duration\r\n",",campaign\r\n",",pdays\r\n",",previous\r\n",",poutcome\r\n",",emp_var_rate\r\n",",cons_price_idx\r\n",",cons_conf_idx\r\n",",euribor3m\r\n",",nr_employed\r\n",",y\r\n","FROM bank_details_training_rnum\r\n","where rnum > 119\r\n",")\r\n","TARGET y\r\n","FUNCTION predict_td_subscription_demo\r\n","iam_role 'arn:aws:iam::123456789012:role/iam_role'\r\n","SETTINGS (\r\n"," S3_BUCKET 'bucket_name'\r\n",");"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Check model status. Wait until the status changes to READY. It takes approx. 1 hour for this sample dataset.\r\n","show model td_subscription_demo"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["-- Check the model accuracy by comparing the output of ML model's prediction and the actual data. \r\n","\r\n","WITH infer_data\r\n"," AS (\r\n"," SELECT y as actual, predict_td_subscription_demo(age,jobtype,marital,education,default_col,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed) AS predicted,\r\n"," CASE WHEN actual = predicted THEN 1::INT\r\n"," ELSE 0::INT END AS correct\r\n"," FROM bank_details_inference\r\n"," ),\r\n"," aggr_data AS (\r\n"," SELECT SUM(correct) as num_correct, COUNT(*) as total FROM infer_data\r\n"," )\r\n","SELECT cast(num_correct as decimal(10,2)) AS Correct_Entries, cast(total as decimal(10,2)) AS Total_Entries, (Correct_Entries/Total_Entries) AS Accuracy FROM aggr_data;\r\n","\r\n","--Predict how many will subscribe for term deposit vs not subscribe\r\n","\r\n","WITH term_data AS ( SELECT predict_td_subscription_demo( age,jobtype,marital,education,default_col,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed) AS predicted\r\n","FROM bank_details_inference )\r\n","SELECT\r\n","CASE WHEN predicted = 'y' THEN 'Yes-will-do-a-term-deposit'\r\n"," WHEN predicted = 'n' THEN 'No-term-deposit'\r\n"," ELSE 'Neither' END as deposit_prediction,\r\n","COUNT(1) AS count\r\n","from term_data GROUP BY 1;"],"cell_type":"code","execution_count":0,"outputs":[]}]}