{ "cells": [ { "cell_type": "markdown", "id": "8a789a6f-42e4-412f-add0-01d39b175911", "metadata": { "tags": [] }, "source": [ "## **EDIT THE BUCKET NAME** ##" ] }, { "cell_type": "code", "execution_count": 1, "id": "8ad697be-eb71-4216-98cd-a50a8dc36e7e", "metadata": { "tags": [] }, "outputs": [], "source": [ "account_id = \"\"\n", "neptune_cluster_url = \"\"\n", "region = \"\"" ] }, { "cell_type": "markdown", "id": "afb43d1a-811e-42f3-9af5-5b22434b56f2", "metadata": {}, "source": [ "# Libraries and global variable definitions" ] }, { "cell_type": "code", "execution_count": 2, "id": "b7084001-bb17-4f4c-9924-fcc5a51a5fa3", "metadata": { "tags": [] }, "outputs": [], "source": [ "import awswrangler as wr\n", "import pandas as pd\n", "import urllib.parse\n", "from datetime import datetime, timedelta\n", "from rdflib import RDF, RDFS, OWL, Namespace, Graph,URIRef, Literal\n", "from rdflib.namespace import XSD\n", "import boto3\n", "\n", "s3 = boto3.resource(\"s3\")\n", "turtleFileDirPath= \"asset-input-data/turtle-files/\"\n", "csv_name = \"SITE_01.csv\"\n", "bucket_name = f\"model-data-bucket-{account_id}\"\n", "assets_csv_path= f\"s3://{bucket_name}/{csv_name}\"\n", "turtle_s3_path = f\"s3://{bucket_name}/{turtleFileDirPath}\"\n", "load_neptune_iam_role = f\"arn:aws:iam::{account_id}:role/neptune-load-from-s3-{account_id}\"" ] }, { "cell_type": "markdown", "id": "de80b481-1019-457e-afa4-4f1308af8e9f", "metadata": {}, "source": [ "# Functions to create brick turtle file" ] }, { "cell_type": "code", "execution_count": 3, "id": "75777f37-1416-4030-bff0-0cfd119369d0", "metadata": { "tags": [] }, "outputs": [], "source": [ "def determineAssetType(slotpath_text):\n", " if \"RTU\" in slotpath_text.upper():\n", " return ('RTU')\n", " elif 'GBL' or 'globalLogicOffice'.upper or 'oaConditions'.upper in slotpath_text.upper():\n", " return ('SiteLevelAsset')\n", " else:\n", " return ('unkown')\n", "\n", "def buildBrickTurtleFile(turtleFileDirPath,assets_csv_path):\n", " \n", " print(turtleFileDirPath)\n", " print(assets_csv_path)\n", " file_name = assets_csv_path.split(\"/\")[-1] \n", " site_name = (file_name.split(\".\")[0]).lower()\n", " \n", " building_namespace = \"http://amazon.bms.com/building-\"+ site_name + \"#\"\n", " g = Graph()\n", " BMS = Namespace(building_namespace)\n", " g.bind(\"bms\", BMS)\n", "\n", " BRICK = Namespace(\"https://brickschema.org/schema/Brick#\")\n", " g.bind(\"brick\", BRICK)\n", "\n", " RDFS = Namespace(\"http://www.w3.org/2000/01/rdf-schema#\")\n", " g.bind(\"rdfs\", RDFS)\n", " \n", " #read csv file \n", " try:\n", " assets_csv = wr.s3.read_csv(path=assets_csv_path)\n", " except:\n", " print(\"no data file located\")\n", " \n", " #get values from each row and get url valid string \n", " endoded_site_name = urllib.parse.quote_plus(site_name) \n", " \n", " if(site_name):\n", " g.add(((BMS[URIRef(endoded_site_name)]), RDF.type, BRICK.Building))\n", " g.add(((BMS[URIRef(endoded_site_name)]), RDFS.label, Literal(site_name))) \n", "\n", " #parse the CSV file and create turtle file \n", " for asset_row in assets_csv.itertuples():\n", "\n", " asset_name = (asset_row[2]).strip()\n", " endoded_asset_name = urllib.parse.quote_plus(asset_name)\n", "\n", " asset_timeSeries_identify = (asset_row[3]).strip()\n", "\n", " asset_type = (asset_row[4]).strip()\n", " endoded_asset_type = urllib.parse.quote_plus(asset_type)\n", "\n", " point_name = (asset_row[5]).strip()\n", " endoded_point_name = urllib.parse.quote_plus(point_name)\n", "\n", " point_timeSeries_identify = (asset_row[6]).strip()\n", "\n", " if(asset_name):\n", " brickAssetType= determineAssetType(asset_type)\n", " g.add(((BMS[URIRef(endoded_asset_name)]), RDF.type, BRICK[(brickAssetType)]))\n", " g.add((BMS[URIRef(endoded_asset_name)], BRICK.hasLocation, (BMS[URIRef(endoded_site_name)])))\n", " g.add((BMS[URIRef(endoded_asset_name)], BRICK.timeseries, (Literal(asset_timeSeries_identify))))\n", " g.add(((BMS[URIRef(endoded_asset_name)]), RDFS.label, Literal(asset_name))) \n", "\n", " if(point_name):\n", " g.add(((BMS[URIRef((endoded_site_name))]), BRICK.hasPoint, (BMS[URIRef(endoded_point_name)])))\n", " g.add(((BMS[URIRef((endoded_asset_name))]), BRICK.hasPoint, (BMS[URIRef(endoded_point_name)])))\n", " g.add(((BMS[URIRef(endoded_point_name)]), RDF.type, (BRICK[(endoded_point_name)])))\n", " g.add((BMS[URIRef(endoded_point_name)], BRICK.timeseries, (Literal(point_timeSeries_identify))))\n", " g.add(((BMS[URIRef(endoded_point_name)]), RDFS.label, Literal(point_name))) \n", " #write the ttl to s3 \n", " file_name = site_name +\"_amazon_bms_demo.ttl\"\n", " s3_path = turtleFileDirPath + file_name\n", " s3.Bucket(bucket_name).put_object(Key=s3_path, Body=(g.serialize(format=\"turtle\")))\n", " return(building_namespace)" ] }, { "cell_type": "code", "execution_count": 4, "id": "d40ba479-7d6a-4adf-93de-0b24595514da", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "asset-input-data/turtle-files/\n", "s3://model-data-bucket-199131085527/SITE_01.csv\n", "http://amazon.bms.com/building-site_01#\n" ] } ], "source": [ "result= buildBrickTurtleFile(turtleFileDirPath,assets_csv_path)\n", "print(result)" ] }, { "cell_type": "code", "execution_count": 5, "id": "b3d23290-a525-49e1-82ff-adbef19369ed", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "'s3://model-data-bucket-199131085527/asset-input-data/turtle-files/'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "turtle_s3_path" ] }, { "cell_type": "markdown", "id": "0ff197f4-9a15-4a0c-834c-1bfd4e83ae60", "metadata": {}, "source": [ "# Upload turtle file to neptune" ] }, { "cell_type": "code", "execution_count": 15, "id": "3c6c69ec-7e6c-4b9f-8743-e08c333a5d10", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"status\" : \"200 OK\",\n", " \"payload\" : {\n", " \"loadId\" : \"82a08cfa-cabf-4c41-bf65-e41c164af1e3\"\n", " }\n", "}\n" ] } ], "source": [ "import requests\n", "import json\n", "\n", "#load team nodes from s3 \n", "neptuneLoadUrl = f'https://{neptune_cluster_url}:8182/loader'\n", "payload = {\"source\":turtle_s3_path, \"format\":\"turtle\", \"iamRoleArn\":load_neptune_iam_role, \"region\":region, \"failOnError\":\"FALSE\", \"parallelism\":\"MEDIUM\", \"updateSingleCardinalityProperties\":\"TRUE\", \"queueRequest\":\"TRUE\"}\n", "res = requests.post(neptuneLoadUrl, data=payload, timeout=120)\n", "print(res.text)" ] }, { "cell_type": "code", "execution_count": 18, "id": "13937b0b-26d6-4db3-9ebb-49dd3c458731", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'status': '200 OK',\n", " 'payload': {'feedCount': [{'LOAD_COMPLETED': 1}],\n", " 'overallStatus': {'fullUri': 's3://model-data-bucket-199131085527/asset-input-data/turtle-files/',\n", " 'runNumber': 3,\n", " 'retryNumber': 0,\n", " 'status': 'LOAD_COMPLETED',\n", " 'totalTimeSpent': 3,\n", " 'startTime': 1682467625,\n", " 'totalRecords': 51,\n", " 'totalDuplicates': 51,\n", " 'parsingErrors': 0,\n", " 'datatypeMismatchErrors': 0,\n", " 'insertErrors': 0}}}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = requests.get(neptuneLoadUrl+'/' + json.loads(res.text)[\"payload\"][\"loadId\"], timeout=30)\n", "\n", "res.json()" ] }, { "cell_type": "code", "execution_count": null, "id": "06085104-a8f2-47e3-a5d0-973bacbb3a30", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }