""" file name : sve_profile_report.py create date : 04Oct2022 description : This program produces a simple report by reading in the processing results stored in wrangler_kvs_demo table in your enveironment. > python3 sve_profile_report.py change log : """ from datetime import datetime from IPython.display import HTML from jinja2 import Template import numpy as np import pandas as pd import awswrangler as wr # -- name of report results_filename = "sve_profile_report.html" # -- dump table to data frame df = wr.athena.read_sql_query("SELECT * FROM wrangler_kvs_demo", database="kvs_demo") # -- generate summary results -- summary = (df.agg({"sessionid":["count","nunique"]}) .T .reset_index() .drop(columns=["index"]) ) summary.columns = ["labels_detected_count","streaming_video_count"] summary = summary[["streaming_video_count","labels_detected_count"]] # -- generate label summary -- label_summary = df.groupby('labelname')\ .agg({"videomapping_kinesisvideomapping_frameoffsetmillis":["count","mean","median","min","max"]})\ .reset_index() label_summary.columns = ["label","detection count","mean_ms","median_ms","min_ms","max_ms"] # -- generate simulator results -- tmp = pd.DataFrame() for threshold in np.linspace(1000,10000,10): _t0 = (df .query('videomapping_kinesisvideomapping_frameoffsetmillis <= @threshold') .agg({"sessionid":["count","nunique"]}) .T .reset_index() .assign(threshold_ms=threshold) ) tmp = pd.concat([tmp,_t0],axis=0) tmp.columns = ["index","label_detection_count","streaming_video_count","threshold_ms"] tmp = tmp[["threshold_ms","label_detection_count","streaming_video_count"]] # -- generate simulator results by label -- _t1 = pd.DataFrame() for threshold in np.linspace(1000,10000,10): _t2 = (df .query('videomapping_kinesisvideomapping_frameoffsetmillis <= @threshold') .groupby('labelname') .agg({"sessionid":["count"]}) .T .reset_index() .pivot(index="level_0",columns=["level_1"]) .reset_index() .assign(threshold_ms=threshold) ) _t1 = pd.concat([_t1,_t2],axis=0) _t1.columns = ['_'.join(col) for col in _t1.columns.values] _t1 = _t1.rename(columns={"threshold_ms_":"threshold_ms"}) # -- generate simulator final results -- simulator = pd.merge(tmp,_t1,on='threshold_ms', how='left') simulator['threshold_ms'] = simulator['threshold_ms']/1000 simulator = simulator.rename(columns=({"threshold_ms":"MaxDurationInSeconds"})) simulator["video_reduction_pct"] = round((simulator["streaming_video_count"]/summary["streaming_video_count"].iloc[0]) -1,3) simulator["label_reduction_pct"] = round((simulator["label_detection_count"]/summary["labels_detected_count"].iloc[0]) -1,3) simulator = simulator.drop(columns=(["level_0_"])) # -- summary template template_str = ''' Streaming Video Events Simulation & Profiler

Streaming Video Events Profiler



Profiler Summary


{% for c in columns %} {% endfor %} {% for row in rows %} {% for k, v in row.items() %} {% if v == 'a' %} {% else %} {% endif %} {% endfor %} {% endfor %}
{{ c }}
{{ row.letter }}
{{ v }}
''' # -- label summary template template_str2 = '''

Label Detection Summary (milliseconds)


{% for c in columns %} {% endfor %} {% for row in rows %} {% for k, v in row.items() %} {% if v == 'a' %} {% else %} {% endif %} {% endfor %} {% endfor %}
{{ c }}
{{ row.letter }}
{{ v }}
''' # -- simulation summary template template_str3 = '''

Simulation Results (MaxDurationInSeconds)


{% for c in columns %} {% endfor %} {% for row in rows %} {% for k, v in row.items() %} {% if v == 'a' %} {% else %} {% endif %} {% endfor %} {% endfor %}
{{ c }}
{{ row.letter }}
{{ v }}
''' # -- rendering template = Template(template_str) html = template.render( rows=summary.to_dict(orient='records'), columns=summary.columns.to_list() ) template2 = Template(template_str2) html2 = template2.render( rows=label_summary.to_dict(orient='records'), columns=label_summary.columns.to_list() ) template3 = Template(template_str3) html3 = template3.render( rows=simulator.to_dict(orient='records'), columns=simulator.columns.to_list() ) # -- write results out. with open(results_filename, mode="w", encoding="utf-8") as results: results.write(html) print(f"... wrote summary to {results_filename}") with open(results_filename, mode="a", encoding="utf-8") as results: results.write(html2) print(f"... wrote label summary to {results_filename}") with open(results_filename, mode="a", encoding="utf-8") as results: results.write(html3) print(f"... wrote simulation results to {results_filename}") print(f"... finished processing: {results_filename}")