Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

SPDX-License-Identifier: Apache-2.0

# Train time series anomaly detection model (NAB)


## Table of Contents
1. Specify data folder containing individual CSVs
2. Specify location containing label JSON
3. Train Context OSE Model from NAB library
4. Perform inference on test set



In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
from matplotlib import pyplot as plt
import matplotlib
font = {'family' : 'normal', 'size' : 18}
matplotlib.rc('font', **font)

import matplotlib.cm as cm
plt.rcParams["figure.figsize"] = (20,12)

In [None]:
import sys

sys.path.append('../../src/')

from anomaly_detection_spatial_temporal_data.model.time_series import NABAnomalyDetector

## Load in one example time series data

The models within the NAB use a window of historical context to predict if a future time step is an anomaly. Therefore, one time series is sufficient to demonstrate its usage.

In [None]:
#["'C1001065306'" "'es_health'"] is a good example 
example_c = """'C1001065306'"""
example_m = """'es_health'"""

In [None]:
example_ts_file_path = f"""../../data/02_intermediate/financial_fraud/ts_data/{example_c}_{example_m}_transaction_data.csv"""

ts_example_data = pd.read_csv(example_ts_file_path)

ts_example_data.head(10)

In [None]:
example_ts_label_file_path = f"""../../data/02_intermediate/financial_fraud/ts_label/{example_c}_{example_m}_transaction_label.csv"""

ts_example_label = pd.read_csv(example_ts_label_file_path)

ts_example_label.head(10)

In [None]:
ts_example_data.shape, ts_example_label.shape

### Plot time series with anomaly

In [None]:
plt.plot(ts_example_data.timestamp, ts_example_data.value, label='amount')
label_pos =np.where(ts_example_label.label==1)
plt.scatter(ts_example_data.iloc[label_pos].timestamp, ts_example_data.iloc[label_pos].value, label='fraud', color='red')
plt.xlabel('Timestep')
plt.ylabel('Amount')
plt.legend()
plt.show()

## Run NAB model training and inference 

In [None]:
model_name = "contextOSE"
model_path = "../../src/anomaly_detection_spatial_temporal_data/model/NAB"
input_dir = "../../data/02_intermediate/financial_fraud/ts_data"
output_dir = "../../data/07_model_output/financial_fraud/ts_result"
label_dict_path = "../../data/02_intermediate/financial_fraud/ts_label/labels-combined.json"

In [None]:
model_obj = NABAnomalyDetector(
 model_name, 
 model_path,
 input_dir,
 label_dict_path,
 output_dir,
)

In [None]:
model_obj.predict()

## Load inference result 

In [None]:
output_dir= f'../../data/07_model_output/financial_fraud/ts_result/{model_name}'

In [None]:
example_result_file_path = os.path.join(
 output_dir, 
 f"""{model_name}_{example_c}_{example_m}_transaction_data.csv""")

In [None]:
example_result_file_path

In [None]:
ts_example_result = pd.read_csv(example_result_file_path)

In [None]:
ts_example_result

In [None]:
anomaly_score_threshold = 0.95

In [None]:
plt.plot(ts_example_data.timestamp, ts_example_data.value, label='amount')
predict_pos =np.where(ts_example_result.anomaly_score>=anomaly_score_threshold)
if predict_pos:
 plt.scatter(ts_example_data.iloc[predict_pos].timestamp, ts_example_data.iloc[predict_pos].value, label='predicted_fraud', color='red')
plt.xlabel('Timestep')
plt.ylabel('Amount')
plt.legend()
plt.show()

# References

Edgar Alonso Lopez-Rojas and Stefan Axelsson. 2014. BANKSIM: A BANK PAYMENTS SIMULATOR FOR FRAUD DETECTION RESEARCH.

Alexander Lavin and Subutai Ahmad. 2015. Evaluating Real-Time Anomaly Detection Algorithms – The Numenta Anomaly Benchmark.