from IPython.display import display, IFrame
from ipywidgets import interact, Image, VBox, HTML, GridspecLayout, Layout, widgets
import ipywidgets as ipyw
import boto3
import os
import tarfile
import pandas as pd
def search_training_jobs(job_tag_name, job_tag_value):
search_params = {
'Resource': 'TrainingJob',
'SearchExpression': {
'Filters': [{
'Name': f'Tags.{job_tag_name}',
'Operator': 'Equals',
'Value': job_tag_value
},
{
'Name': 'TrainingJobStatus',
'Operator': 'Equals',
'Value': 'Completed'
}]
}
}
smclient = boto3.client(service_name='sagemaker')
results = smclient.search(**search_params)
return results
def get_training_job_list(tag_key, tag_value):
training_jobs = search_training_jobs(tag_key, tag_value)
ag_jobs = {}
for training_job in training_jobs['Results']:
training_job_desc = training_job['TrainingJob']
training_job_name = training_job_desc['TrainingJobName']
ag_jobs[training_job_name] = training_job_desc
return ag_jobs
def split_s3_path(s3_path):
path_parts = s3_path.replace('s3://','').split('/')
bucket = path_parts.pop(0)
key = '/'.join(path_parts)
return bucket, key
def get_html_text(text, color):
html = HTML("""""" + text)
html = HTML("" + text + "")
return html
def show_in_html(text, color):
text_out = get_html_text(text, color)
display(text_out, metadata=dict(isolated=True))
def show_metrics(job_desc):
metrics = job_desc['FinalMetricDataList']
for metric in metrics:
metric_name = metric['MetricName']
metric_value = metric['Value']
show_in_html(f'{metric_name} : {metric_value:.2f}', '#ff9900')
def show_leaderboard(Job_Name):
leaderboard_fname = f'./tmp/{Job_Name}/leaderboard.csv'
if os.path.exists(leaderboard_fname):
df = pd.read_csv(leaderboard_fname)
df.set_index(df.columns[0], inplace=True)
show_in_html('Leaderboard', '#ff9900')
display(df)
def show_modelsummary(Job_Name):
modelsummary_fname = f'./tmp/{Job_Name}/SummaryOfModels.html'
if os.path.exists(modelsummary_fname):
show_in_html('Summary of models', '#ff9900')
display(IFrame(src=modelsummary_fname, width=700, height=700))
def show_leaderboard_modelsummary(Job_Name):
tab_names = ['Leaderboard', 'Summary of models']
leaderboard_fname = f'./tmp/{Job_Name}/leaderboard.csv'
modelsummary_fname = f'./tmp/{Job_Name}/SummaryOfModels.html'
if os.path.exists(leaderboard_fname):
lb_df = pd.read_csv(leaderboard_fname)
lb_df.set_index(lb_df.columns[0], inplace=True)
df_html = lb_df.style.set_table_attributes('class="table"').render()
html_widget = widgets.HTML(df_html)
if os.path.exists(modelsummary_fname):
html_file = open(modelsummary_fname, 'r')
html_content = html_file.read()
print(html_content)
modelsummary = widgets.VBox([IFrame(src=modelsummary_fname, width=700, height=700)])
children = [html_widget, modelsummary]
tab = ipyw.Tab()
tab.children = children
for i in range(len(tab_names)):
tab.set_title(i, tab_names[i])
display(tab)
def show_classification_report_confusion_matrix(Job_Name):
classificationreport_fname = f'./tmp/{Job_Name}/classification_report.csv'
featureimportance_fname = f'./tmp/{Job_Name}/feature_importance.csv'
confusionmatrix_fname = f'./tmp/{Job_Name}/confusion_matrix.png'
roc_auc_curve_fname = f'./tmp/{Job_Name}/roc_auc_curve.png'
has_data = False
# Classification report
if os.path.exists(classificationreport_fname):
df = pd.read_csv(classificationreport_fname)
df = df.rename(columns={'Unnamed: 0': 'Label'})
df.set_index('Label', inplace=True)
df = df.applymap('{0:.2f}'.format)
df_html = df.style.set_table_attributes('class="table"').render()
cr_widget_html = HTML(df_html)
has_data = True
else:
cr_widget_html = VBox([])
# Feature importance
if os.path.exists(featureimportance_fname):
df = pd.read_csv(featureimportance_fname)
df.set_index(df.columns[0], inplace=True)
df = df.applymap('{0:.3f}'.format)
df_html = df.style.set_table_attributes('class="table"').render()
fi_widget_html = HTML(df_html)
has_data = True
else:
fi_widget_html = VBox([])
cr_title_html = get_html_text('Classification report', '#ff9900')
fi_title_html = get_html_text('Feature importance', '#ff9900')
widget_tables = VBox([cr_title_html, cr_widget_html, fi_title_html, fi_widget_html])
# Confusion matrix
if os.path.exists(confusionmatrix_fname):
img_file = open(confusionmatrix_fname, 'rb')
image = img_file.read()
widget_cm_img = Image(value=image, format='png')
has_data = True
else:
widget_cm_img = VBox([])
# ROC curve
if os.path.exists(roc_auc_curve_fname):
img_file = open(roc_auc_curve_fname, 'rb')
image = img_file.read()
widget_roc_img = Image(value=image, format='png')
has_data = True
else:
widget_roc_img = VBox([])
cm_title_html = get_html_text('Confusion matrix', '#ff9900')
roc_title_html = get_html_text('ROC Curve', '#ff9900')
widget_imgs = VBox([cm_title_html, widget_cm_img, roc_title_html, widget_roc_img], layout=Layout(margin='0 0 0 10px'))
if has_data:
show_in_html('Model analysis on test dataset', '#000099')
grid = GridspecLayout(1, 2)
grid[0, 0] = widget_tables
grid[0, 1] = widget_imgs
display(grid)
def show_ensemble_model(Job_Name):
ensemblemodel_fname = f'./tmp/{Job_Name}/ensemble-model.png'
if os.path.exists(ensemblemodel_fname):
show_in_html('Ensemble model architecture', '#ff9900')
img_file = open(ensemblemodel_fname, 'rb')
image = img_file.read()
widget_img = Image(value=image, format='png')
display(widget_img)
def launch_viewer(tag_key='AlgorithmName', tag_value='AutoGluon-Tabular', is_debug=False):
global ag_jobs
# To disable ipywidget output scrollable
style = """
"""
display(HTML(style))
show_in_html('AutoGluon Model Performance Viewer', '#000099')
ag_jobs = get_training_job_list(tag_key, tag_value)
job_names = ['-- Select job --'] + list(ag_jobs.keys())
def on_change(Job_Name):
if 'Select job' in Job_Name:
show_in_html('Please choose one training job from the list', '#0000ff')
else:
job_desc = ag_jobs[Job_Name]
show_in_html(f'Training Job : {Job_Name}', '#ff9900')
show_in_html('Training dataset ', '#000099')
for channel in job_desc['InputDataConfig']:
channel_name = channel['ChannelName']
channel_source = channel['DataSource']['S3DataSource']['S3Uri']
print(f'{channel_name}: {channel_source}')
show_model_evaluation(Job_Name, job_desc, is_debug)
interact(on_change, Job_Name=job_names)
def show_model_evaluation(Job_Name, job_desc, is_debug):
global ag_jobs
if download_model_output(Job_Name, job_desc, is_debug):
# show_leaderboard_modelsummary(Job_Name)
show_leaderboard(Job_Name)
show_modelsummary(Job_Name)
show_classification_report_confusion_matrix(Job_Name)
show_ensemble_model(Job_Name)
def download_model_output(Job_name, job_desc, is_debug):
# download the s3 file into local temp directory and extract it
s3 = boto3.client('s3')
job_output_s3_uri = job_desc['ModelArtifacts']['S3ModelArtifacts']
tmp_dir = f'./tmp/{Job_name}'
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
def is_output_file_exists(s3_bucket, s3_prefix):
try:
results = s3.head_object(Bucket=s3_bucket, Key=s3_prefix)
return True
except:
return False
s3_bucket, s3_key = split_s3_path(job_output_s3_uri)
s3_key_output = s3_key.replace('model.tar.gz', 'output.tar.gz')
if is_output_file_exists(s3_bucket, s3_key_output):
s3_key = s3_key_output
else:
return False
local_tar_fname = os.path.join(tmp_dir, 'output.tar.gz')
s3.download_file(s3_bucket, s3_key, local_tar_fname)
tar = tarfile.open(local_tar_fname)
if is_debug is True:
show_in_html('Files found in output.tar.gz', '#000099')
for member in tar.getmembers():
fname = member.name
print(fname)
tar.extractall(path=tmp_dir)
return True