""" file name : sve_profile_report.py create date : 04Oct2022 description : This program produces a simple report by reading in the processing results stored in wrangler_kvs_demo table in your enveironment. > python3 sve_profile_report.py change log : """ from datetime import datetime from IPython.display import HTML from jinja2 import Template import numpy as np import pandas as pd import awswrangler as wr # -- name of report results_filename = "sve_profile_report.html" # -- dump table to data frame df = wr.athena.read_sql_query("SELECT * FROM wrangler_kvs_demo", database="kvs_demo") # -- generate summary results -- summary = (df.agg({"sessionid":["count","nunique"]}) .T .reset_index() .drop(columns=["index"]) ) summary.columns = ["labels_detected_count","streaming_video_count"] summary = summary[["streaming_video_count","labels_detected_count"]] # -- generate label summary -- label_summary = df.groupby('labelname')\ .agg({"videomapping_kinesisvideomapping_frameoffsetmillis":["count","mean","median","min","max"]})\ .reset_index() label_summary.columns = ["label","detection count","mean_ms","median_ms","min_ms","max_ms"] # -- generate simulator results -- tmp = pd.DataFrame() for threshold in np.linspace(1000,10000,10): _t0 = (df .query('videomapping_kinesisvideomapping_frameoffsetmillis <= @threshold') .agg({"sessionid":["count","nunique"]}) .T .reset_index() .assign(threshold_ms=threshold) ) tmp = pd.concat([tmp,_t0],axis=0) tmp.columns = ["index","label_detection_count","streaming_video_count","threshold_ms"] tmp = tmp[["threshold_ms","label_detection_count","streaming_video_count"]] # -- generate simulator results by label -- _t1 = pd.DataFrame() for threshold in np.linspace(1000,10000,10): _t2 = (df .query('videomapping_kinesisvideomapping_frameoffsetmillis <= @threshold') .groupby('labelname') .agg({"sessionid":["count"]}) .T .reset_index() .pivot(index="level_0",columns=["level_1"]) .reset_index() .assign(threshold_ms=threshold) ) _t1 = pd.concat([_t1,_t2],axis=0) _t1.columns = ['_'.join(col) for col in _t1.columns.values] _t1 = _t1.rename(columns={"threshold_ms_":"threshold_ms"}) # -- generate simulator final results -- simulator = pd.merge(tmp,_t1,on='threshold_ms', how='left') simulator['threshold_ms'] = simulator['threshold_ms']/1000 simulator = simulator.rename(columns=({"threshold_ms":"MaxDurationInSeconds"})) simulator["video_reduction_pct"] = round((simulator["streaming_video_count"]/summary["streaming_video_count"].iloc[0]) -1,3) simulator["label_reduction_pct"] = round((simulator["label_detection_count"]/summary["labels_detected_count"].iloc[0]) -1,3) simulator = simulator.drop(columns=(["level_0_"])) # -- summary template template_str = '''
{{ c }} | {% endfor %}|
---|---|
{{ row.letter }} |
{% else %}
{{ v }} |
{% endif %}
{% endfor %}
{{ c }} | {% endfor %}|
---|---|
{{ row.letter }} |
{% else %}
{{ v }} |
{% endif %}
{% endfor %}
{{ c }} | {% endfor %}|
---|---|
{{ row.letter }} |
{% else %}
{{ v }} |
{% endif %}
{% endfor %}