{ "cells": [ { "cell_type": "markdown", "id": "28bea13b-67bd-4a0e-8eab-3b8ffd37259e", "metadata": {}, "source": [ "# BasicTick: Create DB\n", "\n", "## Steps\n", "1. Untar hdb.tar.gz for the hdb data\n", "2. Upload hdb to staging S3 bucket\n", "3. Create database\n" ] }, { "cell_type": "markdown", "id": "f17a02c0-4f56-455c-a28a-dd102a88201c", "metadata": {}, "source": [ "## Setup\n", "imports, environmentId, accountId" ] }, { "cell_type": "code", "execution_count": 1, "id": "d9d543f3-1cd5-4a0e-8be7-a9eb0ac35878", "metadata": {}, "outputs": [], "source": [ "import os\n", "import boto3\n", "import json\n", "import datetime\n", "\n", "from managed_kx import *\n", "from env_kdb_1 import *\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "d5265616-6aa4-4b7b-b038-8e26e71d19e7", "metadata": {}, "outputs": [], "source": [ "# Source data directory\n", "SOURCE_DATA_DIR=\"hdb\"\n", "\n", "# S3 bucket for external data and code\n", "S3_DEST=f\"s3://{S3_BUCKET}/data/{SOURCE_DATA_DIR}/\"\n", "\n", "# Managed KX Database and Cluster names to create\n", "DB_NAME=\"basictickdb\"" ] }, { "cell_type": "code", "execution_count": 3, "id": "7e054ddd-3313-4ac3-b0b3-3c93b55e977e", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using variables ...\n" ] } ], "source": [ "# triggers credential get\n", "session=None\n", "\n", "try:\n", " # aws: use ada for credentials\n", " subprocess.call([\"which\", \"ada\"])\n", " os.system(f\"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once\")\n", "except: \n", " None\n", "\n", "if AWS_ACCESS_KEY_ID is None:\n", " print(\"Using Defaults ...\")\n", " # create AWS session: using access variables\n", " session = boto3.Session()\n", "else:\n", " print(\"Using variables ...\")\n", " session = boto3.Session(\n", " aws_access_key_id=AWS_ACCESS_KEY_ID,\n", " aws_secret_access_key=AWS_SECRET_ACCESS_KEY,\n", " aws_session_token=AWS_SESSION_TOKEN\n", " )\n", "\n", "# create finspace client\n", "client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)" ] }, { "cell_type": "markdown", "id": "bc29d8fc-c234-4c65-a633-bb9e16d6a772", "metadata": {}, "source": [ "## 1. Untar hdb.tar.gz\n", "hdb database will be found in hdb directory" ] }, { "cell_type": "code", "execution_count": 4, "id": "157b75f5-b582-490e-ae17-eb14eaafa21e", "metadata": {}, "outputs": [], "source": [ "!tar -xf hdb.tar.gz" ] }, { "cell_type": "code", "execution_count": 5, "id": "3fec4ecf-cba3-440f-a56e-4ec726c9f8a8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 52\n", "drwxr-xr-x. 12 ec2-user ec2-user 16384 Apr 24 23:17 .\n", "drwxr-xr-x. 7 ec2-user ec2-user 16384 Jun 19 23:26 ..\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.14\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.15\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.16\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.17\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.18\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.19\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.20\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.21\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.22\n", "drwxr-xr-x. 3 ec2-user ec2-user 21 Apr 24 23:17 2023.04.23\n", "-rw-r--r--. 1 ec2-user ec2-user 16392 Apr 24 23:17 sym\n" ] } ], "source": [ "!ls -la hdb" ] }, { "cell_type": "markdown", "id": "b3c8cbbe-654e-4385-92bc-5c7b80b5f0f3", "metadata": {}, "source": [ "# 2. Upload hdb data\n", "using aws cli, copy hdb to staging bucket" ] }, { "cell_type": "code", "execution_count": 6, "id": "af169292-13fc-4b1b-863d-789d5a042d52", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " PRE 2023.04.14/\n", " PRE 2023.04.15/\n", " PRE 2023.04.16/\n", " PRE 2023.04.17/\n", " PRE 2023.04.18/\n", " PRE 2023.04.19/\n", " PRE 2023.04.20/\n", " PRE 2023.04.21/\n", " PRE 2023.04.22/\n", " PRE 2023.04.23/\n", "2023-06-05 21:26:09 16392 sym\n" ] }, { "data": { "text/plain": [ "0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "if AWS_ACCESS_KEY_ID is not None:\n", " cp = f\"\"\"\n", "export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}\n", "export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}\n", "export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN}\n", "\n", "aws s3 sync --exclude .DS_Store {SOURCE_DATA_DIR} {S3_DEST}\n", "aws s3 ls {S3_DEST}\n", "\"\"\"\n", "else:\n", " cp = f\"\"\"\n", "aws s3 sync --exclude .DS_Store {SOURCE_DATA_DIR} {S3_DEST}\n", "aws s3 ls {S3_DEST}\n", "\"\"\"\n", " \n", "# execute the S3 copy\n", "os.system(cp)" ] }, { "cell_type": "markdown", "id": "67476efe-d308-4158-9e24-8fbe71509f76", "metadata": {}, "source": [ "## 3. Create database" ] }, { "cell_type": "code", "execution_count": 7, "id": "83d00c39-876a-4bba-ab66-a3aa4fb9b65a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CREATING Database: basictickdb\n", "CREATED Database: basictickdb\n", "{\n", " \"createdTimestamp\": \"2023-06-19 23:27:11.288000+00:00\",\n", " \"databaseArn\": \"arn:aws:finspace:us-east-1:829845998889:kxEnvironment/jlcenjvtkgzrdek2qqv7ic/kxDatabase/basictickdb\",\n", " \"databaseName\": \"basictickdb\",\n", " \"description\": \"Welcome kdb database\",\n", " \"environmentId\": \"jlcenjvtkgzrdek2qqv7ic\",\n", " \"lastModifiedTimestamp\": \"2023-06-19 23:27:11.288000+00:00\"\n", "}\n" ] } ], "source": [ "# assume it exists\n", "create_db=False\n", "\n", "try:\n", " resp = client.get_kx_database(environmentId=ENV_ID, databaseName=DB_NAME)\n", " resp.pop('ResponseMetadata', None)\n", "except:\n", " # does not exist, will create\n", " create_db=True\n", "\n", "if create_db:\n", " print(f\"CREATING Database: {DB_NAME}\")\n", " resp = client.create_kx_database(environmentId=ENV_ID, databaseName=DB_NAME, description=\"Basictick kdb database\")\n", " resp.pop('ResponseMetadata', None)\n", "\n", " print(f\"CREATED Database: {DB_NAME}\")\n", "\n", "print(json.dumps(resp,sort_keys=True,indent=4,default=str))" ] }, { "cell_type": "markdown", "id": "a41c84b3-2243-4abb-9032-8ae77a5e31f7", "metadata": {}, "source": [ "## 4. Add HDB data to database" ] }, { "cell_type": "code", "execution_count": 8, "id": "60b3a8df-c7ed-4837-99e3-07d95e7fbac0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Changeset...\n", "{\n", " \"changeRequests\": [\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.23/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.23/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.15/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.15/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.14/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.14/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.22/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.22/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.18/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.18/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.20/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.20/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.16/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.16/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.17/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.17/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.21/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.21/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/2023.04.19/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/2023.04.19/\"\n", " },\n", " {\n", " \"changeType\": \"PUT\",\n", " \"dbPath\": \"/\",\n", " \"s3Path\": \"s3://kdb-demo-829845998889-kms/data/hdb/sym\"\n", " }\n", " ],\n", " \"changesetId\": \"4MRqXexrXcIiOdRfJZbOJQ\",\n", " \"createdTimestamp\": \"2023-06-19 23:27:12.089000+00:00\",\n", " \"databaseName\": \"basictickdb\",\n", " \"environmentId\": \"jlcenjvtkgzrdek2qqv7ic\",\n", " \"lastModifiedTimestamp\": \"2023-06-19 23:27:12.089000+00:00\",\n", " \"status\": \"PENDING\"\n", "}\n" ] } ], "source": [ "changes=[]\n", "\n", "for f in os.listdir(\"hdb\"):\n", " if os.path.isdir(f\"hdb/{f}\"):\n", " changes.append( { 'changeType': 'PUT', 's3Path': f\"{S3_DEST}{f}/\", 'dbPath': f\"/{f}/\" } )\n", " else:\n", " changes.append( { 'changeType': 'PUT', 's3Path': f\"{S3_DEST}{f}\", 'dbPath': f\"/\" } )\n", " \n", "resp = client.create_kx_changeset(environmentId=ENV_ID, databaseName=DB_NAME, \n", " changeRequests=changes)\n", "\n", "resp.pop('ResponseMetadata', None)\n", "changeset_id = resp['changesetId']\n", "\n", "print(\"Changeset...\")\n", "print(json.dumps(resp,sort_keys=True,indent=4,default=str))" ] }, { "cell_type": "code", "execution_count": 9, "id": "4b344419-1261-43a3-89aa-f682ec54b0b2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "**Done**\n" ] } ], "source": [ "wait_for_changeset_status(client, ENV_ID, DB_NAME, changeset_id)\n", "print(\"**Done**\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "5c1d2691-fe9a-47a7-8b1c-55ee1283e702", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "====================================================================================================\n", "Database: basictickdb, Changesets: 1 \n", "====================================================================================================\n", " Changeset: 4MRqXexrXcIiOdRfJZbOJQ: Created: 2023-06-19 23:27:12.089000+00:00 (COMPLETED)\n" ] }, { "data": { "text/html": [ "\n", "
changeType | \n", "s3Path | \n", "dbPath | \n", "
---|---|---|
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.23/ | \n", "/2023.04.23/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.15/ | \n", "/2023.04.15/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.14/ | \n", "/2023.04.14/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.22/ | \n", "/2023.04.22/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.18/ | \n", "/2023.04.18/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.20/ | \n", "/2023.04.20/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.16/ | \n", "/2023.04.16/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.17/ | \n", "/2023.04.17/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.21/ | \n", "/2023.04.21/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/2023.04.19/ | \n", "/2023.04.19/ | \n", "
PUT | \n", "s3://kdb-demo-829845998889-kms/data/hdb/sym | \n", "/ | \n", "