# PyKX Query Components
Query all components, RDB, HDB, and Gateway.


In [26]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import boto3
import json
import datetime

import pykx as kx

from managed_kx import *
from env_kdb_1 import *

from basictick_setup import *

In [27]:
# triggers credential get
session=None

try:
    # aws: use ada for credentials
    subprocess.call(["which", "ada"])
    os.system(f"ada credentials update --account={ACCOUNT_ID} --provider=isengard --role=Admin --once")
except: 
    None

if AWS_ACCESS_KEY_ID is None:
    print("Using Defaults ...")
    # create AWS session: using access variables
    session = boto3.Session()
else:
    print("Using variables ...")
    session = boto3.Session(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        aws_session_token=AWS_SESSION_TOKEN
    )

# create finspace client
client = session.client(service_name='finspace', endpoint_url=ENDPOINT_URL)

Using variables ...


# Current State of HDB

In [28]:
# Query the HDB
hdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=HDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
hdb_pdf = hdb("select counts:count i by date from example").pd()

# Number of Rows
hdb_rows = hdb("count example").py()

# Current State of RDB

In [29]:
# Query the RDB
rdb = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=RDB_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

# Dates and Counts
rdb_head_pdf = rdb("select [5] from example").pd()
rdb_tail_pdf = rdb("select [-5] from example").pd()

# Number of Rows
rdb_rows = rdb("count example").py()

# Current State of GW

In [30]:
# Query the GW
gw = get_pykx_connection(client, 
                          environmentId=ENV_ID, clusterName=GW_CLUSTER_NAME, 
                          userName=KDB_USERNAME, boto_session=session)

proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# are any processes not connected? if so-reconnect
if len(proc_pdf[proc_pdf.connected == False].index) > 0:
    print("reinit Gateway")
    gw("reinit[hdb_name; rdb_name]")
    proc_pdf = gw("select process, handle, connected, address from .conn.procs").pd()

# truncate with elipsis
proc_pdf['address'] = proc_pdf['address'].str.slice(0,110)+"..."

display(proc_pdf)

Unnamed: 0,process,handle,connected,address
0,rdb,16,True,:ip-192-168-7-230.ec2.internal:5000:GATEWAY_basictickdb:Host=ip-192-168-7-230.ec2.internal&Port=5000&User=GATE...
1,hdb,17,True,:ip-192-168-14-219.ec2.internal:5000:GATEWAY_basictickdb:Host=ip-192-168-14-219.ec2.internal&Port=5000&User=GA...
2,hdb,18,True,:ip-192-168-1-82.ec2.internal:5000:GATEWAY_basictickdb:Host=ip-192-168-1-82.ec2.internal&Port=5000&User=GATEWA...


In [31]:
# query GW
gw("res: `time xasc queryData[`example;`]").pd()

gw_pdf = gw("res").pd()
gw_head_pdf = gw("select [5] from res").pd()
gw_tail_pdf = gw("select [-5] from res").pd()

# Number of Rows
gw_rows = len(gw_pdf.index)

# HDB Contents

In [32]:
display(hdb_pdf)

# Number of Rows
print(f"Rows: {hdb_rows:,}")

Unnamed: 0_level_0,counts
date,Unnamed: 1_level_1
2023-04-14,1000000
2023-04-15,1000000
2023-04-16,1000000
2023-04-17,1000000
2023-04-18,1000000
2023-04-19,1000000
2023-04-20,1000000
2023-04-21,1000000
2023-04-22,1000000
2023-04-23,1000000


Rows: 43,061,601


# RDB Contents

In [33]:
display(rdb_head_pdf)
display(rdb_tail_pdf)

# Number of Rows
print(f"Rows: {rdb_rows:,}")

Unnamed: 0,sym,time,number
0,add,2023-07-26 17:23:05.695406453,9
1,lfd,2023-07-26 17:23:05.695406453,48
2,okj,2023-07-26 17:23:05.695406453,26
3,feb,2023-07-26 17:23:05.695406453,75
4,idg,2023-07-26 17:23:05.695406453,74


Unnamed: 0,sym,time,number
0,njh,2023-07-26 19:22:49.685367404,21
1,hko,2023-07-26 19:22:49.685367404,46
2,lnb,2023-07-26 19:22:49.685367404,8
3,pga,2023-07-26 19:22:49.685367404,60
4,lcd,2023-07-26 19:22:49.685367404,86


Rows: 3,234,340


# GW Contents

In [34]:
display(gw_head_pdf)
display(gw_tail_pdf)

# Number of Rows
print(f"Rows: {gw_rows:,}")

# Note: RDB will have less than GW  b/c data is always arriving to the RDB
# RDB alone was queried first and then again as part of GW query

Unnamed: 0,sym,time,number
0,aoi,2023-04-14 23:17:28.473,53231
1,aoi,2023-04-14 23:17:28.473,153560
2,aoi,2023-04-14 23:17:28.473,449428
3,aoi,2023-04-14 23:17:28.473,631966
4,aoi,2023-04-14 23:17:28.473,941566


Unnamed: 0,sym,time,number
0,egb,2023-07-26 19:22:50.685378270,92
1,jpg,2023-07-26 19:22:50.685378270,94
2,cfn,2023-07-26 19:22:50.685378270,73
3,lka,2023-07-26 19:22:50.685378270,72
4,lnj,2023-07-26 19:22:50.685378270,65


Rows: 46,296,391


In [35]:
print( f"Last Run: {datetime.datetime.now()}" )

Last Run: 2023-07-26 19:23:27.236464
