{ "cells": [ { "cell_type": "markdown", "id": "1334d92e-34f4-448c-b6af-73cba89e9d6d", "metadata": {}, "source": [ "# PyKX Query Welcome Database\n", "This notebook demonstrates querying the welcomedb database using PyKX." ] }, { "cell_type": "code", "execution_count": 1, "id": "11e9b62a-9cda-4a57-99ca-3b13aff6b7f3", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "import os\n", "import boto3\n", "import json\n", "import datetime\n", "\n", "import pykx as kx\n", "\n", "from managed_kx import *\n", "from env_kdb_1 import *\n", "\n", "HDB_CLUSTER_NAME=\"cluster_welcomedb\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "cb429a9f-e8a4-41d2-8572-c4e8a49ad387", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using variables ...\n" ] } ], "source": [ "# triggers credential get\n", "session=None\n", "\n", "try:\n", " # aws: use ada for credentials\n", " subprocess.call([\"which\", \"ada\"])\n", " os.system(f\"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once\")\n", "except: \n", " None\n", "\n", "if AWS_ACCESS_KEY_ID is None:\n", " print(\"Using Defaults ...\")\n", " # create AWS session: using access variables\n", " session = boto3.Session()\n", "else:\n", " print(\"Using variables ...\")\n", " session = boto3.Session(\n", " aws_access_key_id=AWS_ACCESS_KEY_ID,\n", " aws_secret_access_key=AWS_SECRET_ACCESS_KEY,\n", " aws_session_token=AWS_SESSION_TOKEN\n", " )\n", "\n", "# create finspace client\n", "client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)" ] }, { "cell_type": "markdown", "id": "32c26a3e-e51e-4981-951b-efca08e20e9a", "metadata": {}, "source": [ "# Connect to Cluster" ] }, { "cell_type": "code", "execution_count": 3, "id": "125c2a8e-1880-4043-9230-1089778e5bf8", "metadata": {}, "outputs": [], "source": [ "# Query the HDB\n", "hdb = get_pykx_connection(client, \n", " environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, \n", " userName=KDB_USERNAME, boto_session=session)" ] }, { "cell_type": "markdown", "id": "96df5426-05fd-442c-b8f6-8cb336abe085", "metadata": {}, "source": [ "# Table Schema" ] }, { "cell_type": "code", "execution_count": 4, "id": "a0e9e88b-2c48-4c12-9f68-6be7119bd5e3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tfa
c
dateb'd'
symb's'p
timeb'p'
numberb'j'
\n", "
" ], "text/plain": [ " t f a\n", "c \n", "date b'd' \n", "sym b's' p\n", "time b'p' \n", "number b'j' " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "schema_pdf = hdb(\"meta(`example)\").pd()\n", "\n", "display(schema_pdf)" ] }, { "cell_type": "markdown", "id": "d797007e-355b-4fee-9c6e-a11273d27d76", "metadata": {}, "source": [ "# Query Table" ] }, { "cell_type": "code", "execution_count": 5, "id": "f6107bbb-e8fd-46df-97a1-6033d628f38b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countsavg_numavg_sq_num
date
2023-04-141000000499800.4644053.331963e+11
2023-04-151000000499865.0081593.332692e+11
2023-04-161000000499912.3791273.332060e+11
2023-04-171000000500078.3933863.334004e+11
2023-04-181000000500264.6844123.336606e+11
2023-04-191000000499849.9629123.333436e+11
2023-04-201000000500082.2578293.334472e+11
2023-04-211000000500169.4923543.336027e+11
2023-04-221000000499903.0475773.331833e+11
2023-04-231000000500277.4927903.335924e+11
\n", "
" ], "text/plain": [ " counts avg_num avg_sq_num\n", "date \n", "2023-04-14 1000000 499800.464405 3.331963e+11\n", "2023-04-15 1000000 499865.008159 3.332692e+11\n", "2023-04-16 1000000 499912.379127 3.332060e+11\n", "2023-04-17 1000000 500078.393386 3.334004e+11\n", "2023-04-18 1000000 500264.684412 3.336606e+11\n", "2023-04-19 1000000 499849.962912 3.333436e+11\n", "2023-04-20 1000000 500082.257829 3.334472e+11\n", "2023-04-21 1000000 500169.492354 3.336027e+11\n", "2023-04-22 1000000 499903.047577 3.331833e+11\n", "2023-04-23 1000000 500277.492790 3.335924e+11" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Rows: 10,000,000\n" ] } ], "source": [ "# Dates, counts, and analytics\n", "hdb_pdf = hdb(\"select counts:count i, avg_num: avg number, avg_sq_num: avg sq number by date from example\").pd()\n", "\n", "# Number of Rows\n", "hdb_rows = hdb(\"count example\").py()\n", "\n", "display(hdb_pdf)\n", "\n", "# Number of Rows\n", "print(f\"Rows: {hdb_rows:,}\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "70e7ef98-be09-4d93-ae90-2e957732de4e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Last Run: 2023-06-27 13:59:43.520313\n" ] } ], "source": [ "print( f\"Last Run: {datetime.datetime.now()}\" )" ] }, { "cell_type": "code", "execution_count": null, "id": "f00c649f-6cb9-4723-8cea-e34b0efa76e6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }