#!/usr/bin/env python from __future__ import print_function import os import json import pickle import sys import traceback import pandas as pd from keras.models import Sequential from keras.layers import Dense import keras.backend as K # These are the paths to where SageMaker mounts interesting things in your container. prefix = '/opt/ml/' input_path = os.path.join(prefix, 'input/data') model_path = os.path.join(prefix, 'model') output_path = os.path.join(prefix, 'output') param_path = os.path.join(prefix, 'input/config/hyperparameters.json') model_name = 'custom_loss_blog_trained_model.h5' # This algorithm has a single channel of input data called 'training'. Since we run in # File mode, the input files are copied to the directory specified here. channel_name='train' training_path = os.path.join(input_path, channel_name) #custom loss function that presents the option of place unequal weights on different #types of classification weights def custom_loss_wrapper(fn_cost=1, fp_cost=1): def custom_loss(y_true, y_pred, fn_cost=fn_cost, fp_cost=fp_cost): h = K.ones_like(y_pred) fn_cost = fn_cost * h fp_cost = fp_cost * h weighted_values = y_true * K.abs(1-y_pred)*fn_cost + (1-y_true) * K.abs(y_pred)*fp_cost loss = K.mean(weighted_values) return loss return custom_loss # The function to execute the training. def train(): print('Starting the training.') try: with open(param_path, 'r') as tc: hyperparameters = json.load(tc) #Based upon the value assigned to the loss_function_type variable, either Keras's built-in #binary_crossentropy (if assigned 'builtin') or the custom loss function (if assigned 'custom') #will be used to train model. loss_function_type = hyperparameters['loss_function_type'] if loss_function_type == 'builtin': loss_function = 'binary_crossentropy' elif loss_function_type == 'custom': fn_cost = int(hyperparameters['fn_cost']) fp_cost = int(hyperparameters['fp_cost']) print('fn_cost: ', fn_cost) print('fp_cost: ', fp_cost) loss_function = custom_loss_wrapper(fn_cost=fn_cost, fp_cost=fp_cost) print('loss_function_type: ', loss_function_type) #trying to predict only one class num_classes = 1 #the data set contains 30 features input_dim = 30 # Take the set of files and read them all into a single pandas dataframe input_files = [ os.path.join(training_path, file) for file in os.listdir(training_path) ] raw_data = [ pd.read_csv(file, header=None) for file in input_files ] train_data = pd.concat(raw_data) # labels are in the first column train_y = train_data.ix[:,0] train_x = train_data.ix[:,1:] #build binary classification model model = Sequential() model.add(Dense(units=num_classes, input_dim=input_dim, activation='sigmoid')) #compile model model.compile(loss=loss_function, optimizer='sgd', metrics=['accuracy']) #train model model.fit(train_x, train_y, epochs=50, batch_size=32, verbose=0) # save the model, only saving architecture and weights, not configuration since custom loss causes # problems when trying to load model configuration (due to bug in Keras 2.2.0) #save architecture with open(os.path.join(model_path, 'model_architecture.json'),'w') as f: f.write(model.to_json()) #save weights model.save_weights(os.path.join(model_path, 'model_weights.h5')) print('Saved trained model at %s ' % model_path) except Exception as e: # Write out an error file. This will be returned as the failureReason in the # DescribeTrainingJob result. trc = traceback.format_exc() with open(os.path.join(output_path, 'failure'), 'w') as s: s.write('Exception during training: ' + str(e) + '\n' + trc) # Printing this causes the exception to be in the training job logs, as well. print('Exception during training: ' + str(e) + '\n' + trc, file=sys.stderr) # A non-zero exit code causes the training job to be marked as Failed. sys.exit(255) if __name__ == '__main__': train() # A zero exit code causes the job to be marked a Succeeded. sys.exit(0)