#!/bin/bash #******************************************************** # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 #******************************************************** # Blog Post Project.....: Iot Analytical System #******************************************************** # Script Purpose: # --------------- # # Setup solution environment, running several # configuration tasks, in the following order: # # (1) Deploy the AWs SAM-based solution # (2) Upload CSV file with customer electricity meter # data to S3 bucket # "iot-analytic-bucket--" # (3) Upload CSV files with IoT electricity metering data # to S3 bucket # "iot-analytic-bucket--" # (4) Create database "iotanalyticsdb" on # AWS Glue Data Catalog. # (5) Create external tables "customer_meter_sensor" and # "iot_electricity_metering" on AWS Glue Data Catalog. # (5) Update the partitions on external table # "iot_electricity_metering" #******************************************************** # Execution: # ----------- # # > ./deploy-solution.sh #******************************************************** # Set relative directory paths, to be used througout the script. export DATASET_PATH=../../datasets export SAM_PATH=../../sam-iot-analytics # Deploy the AWs SAM-based solution, using the AWS SAM CLI # -------------------------------------------------------- echo "################################################################" echo " Building and deploying AWS SAM application sam-iot-analytics..." echo "################################################################" sam build --use-container \ --template ${SAM_PATH}/template.yaml \ --build-dir ${SAM_PATH}/.aws-sam/build/ sam deploy --stack-name sam-iot-analytics \ --template ${SAM_PATH}/.aws-sam/build/template.yaml \ --guided --capabilities CAPABILITY_NAMED_IAM # -------------------------------------------------------- # Upload CSV file with customer electricity meter data to # S3 bucket "iot-analytic-bucket--" # -------------------------------------------------------------- echo "##############################################################################" echo " Uploading CSV file with customer electricity meter to Data Lake S3 bucket..." echo "##############################################################################" # Get destination bucket name BUCKET_NAME=`aws s3api list-buckets --query "Buckets[].Name" --output=yaml | grep iot-analytic-bucket | cut -d " " -f 2` # Copy CSV file with customer data to S3 bucket "iot-analytic-bucket--" aws s3 cp ${DATASET_PATH}/customer_meter_sensor/customer_meter_sensor.csv \ s3://${BUCKET_NAME}/customer_meter_sensor/customer_meter_sensor.csv # -------------------------------------------------------------- # Upload CSV files with IoT electricity metering to S3 bucket # "iot-analytic-bucket--" # -------------------------------------------------------------- echo "#############################################################################" echo " Uploading CSV files with IoT electricity metering to Data Lake S3 bucket..." echo "#############################################################################" # Get list of sub-directories from dataset main directory, based on specific dates DIR_LIST=`ls ${DATASET_PATH}/iot_electricity_metering` # Run through DIR_LIST in a loop for DATE_DIR in ${DIR_LIST} do # Extract YEAR, MONTH and DAY components from each date sub-directory. # These date components will be used to create S3 partitions YEAR=`echo ${DATE_DIR} | cut -d "-" -f 1` MONTH=`echo ${DATE_DIR} | cut -d "-" -f 2` DAY=`echo ${DATE_DIR} | cut -d "-" -f 3` # Copy CSV files from each local dataset date sub-directory # to specific partitions on S3 bucket "iot-analytic-bucket--" aws s3 cp ${DATASET_PATH}/iot_electricity_metering/${DATE_DIR} \ s3://${BUCKET_NAME}/iot_electricity_metering/year=${YEAR}/month=${MONTH}/day=${DAY}/ --recursive --include "*.csv" done # -------------------------------------------------------------- # Create an Athena Workgroup configuration to run Athena queries # in this deploy script # -------------------------------------------------------------- echo "#####################################################################" echo " Creating an Athena Workgroup configuration to run Athena queries..." echo "#####################################################################" # Get Athena ouput bucket name ATHENA_OUTPUT_BUCKET=`aws s3api list-buckets --query "Buckets[].Name" --output=yaml | grep iot-athena-results | cut -d " " -f 2` # Create Athena workgroup aws athena create-work-group \ --name DDL_Group \ --configuration ResultConfiguration={OutputLocation="s3://${ATHENA_OUTPUT_BUCKET}/ddl_output/"} # -------------------------------------------------------------- # Create database "iotanalyticsdb" on AWS Glue Data Catalog # -------------------------------------------------------------- echo "##############################################################" echo ' Create database "iotanalyticsdb" on AWS Glue Data Catalog...' echo "##############################################################" aws athena start-query-execution \ --query-string "create database if not exists iotanalyticsdb" \ --work-group "DDL_Group" # -------------------------------------------------------------- # Create external tables "customer_meter_sensor" and # "iot_electricity_metering" on AWS Glue Data Catalog # -------------------------------------------------------------- # Create external table "customer_meter_sensor" echo "##################################################" echo ' Create external table "customer_meter_sensor"...' echo "##################################################" aws athena start-query-execution \ --query-string "CREATE EXTERNAL TABLE customer_meter_sensor( customerid bigint, sensorid bigint, sensorgroup string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 's3://${BUCKET_NAME}/customer_meter_sensor/' TBLPROPERTIES ('skip.header.line.count'='1')" \ --work-group "DDL_Group" \ --query-execution-context Database=iotanalyticsdb,Catalog=AwsDataCatalog # Create external table "iot_electricity_metering" echo "#####################################################" echo ' Create external table "iot_electricity_metering"...' echo "#####################################################" aws athena start-query-execution \ --query-string "CREATE EXTERNAL TABLE iot_electricity_metering( sensorid bigint, voltage bigint, \`current\` bigint, frequency bigint, kwh double, hourcollected string) PARTITIONED BY ( year string, month string, day string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 's3://${BUCKET_NAME}/iot_electricity_metering/' TBLPROPERTIES ('skip.header.line.count'='1')" \ --work-group "DDL_Group" \ --query-execution-context Database=iotanalyticsdb,Catalog=AwsDataCatalog # -------------------------------------------------------------- # Update the partitions on external table "iot_electricity_metering" # -------------------------------------------------------------- echo "#####################################################################" echo ' Updating partitions on external table "iot_electricity_metering"...' echo "#####################################################################" aws athena start-query-execution \ --query-string "MSCK REPAIR TABLE iot_electricity_metering" \ --work-group "DDL_Group" \ --query-execution-context Database=iotanalyticsdb,Catalog=AwsDataCatalog # --------------------------------------------------------------