# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 from __future__ import print_function import argparse import logging import os import numpy as np import json import time import mxnet as mx from mxnet import gluon from mxnet.gluon import nn from mxnet import autograd as ag from mxnet.gluon.data.vision import transforms import gluoncv as gcv from gluoncv.data.transforms import video from gluoncv.data import VideoClsCustom from gluoncv.model_zoo import get_model from gluoncv.utils import makedirs, LRSequential, LRScheduler, split_and_load, TrainingHistory logging.basicConfig(level=logging.DEBUG) # ------------------------------------------------------------ # # Training methods # # ------------------------------------------------------------ # def train(args): # SageMaker passes num_cpus, num_gpus and other args we can use to tailor training to # the current container environment num_gpus = mx.context.num_gpus() ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] # retrieve the hyperparameters we set in notebook (with some defaults) #number of training examples utilized in one iteration. batch_size = args.batch_size #number of times an entire dataset is passed forward and backward through the neural network epochs = args.epochs #tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function. learning_rate = args.learning_rate #Momentum remembers the update Δ w at each iteration, and determines the next update as a linear combination of the gradient and the previous update momentum = args.momentum #Optimizers are algorithms or methods used to change the attributes of your neural network such as weights and learning rate in order to reduce the losses. optimizer = args.optimizer #after each update, the weights are multiplied by a factor slightly less than 1. wd = args.wd optimizer_params = {'learning_rate': learning_rate, 'wd': wd, 'momentum': momentum} log_interval = args.log_interval #In this example, we use Inflated 3D model (I3D) with ResNet50 backbone trained on Kinetics400 dataset. We want to replace the last classification (dense) layer to the number of classes in the dataset. model_name = 'i3d_resnet50_v1_custom' #number of classes in the dataset nclass = 101 #number of workers for the data loader num_workers = 8 current_host = args.current_host hosts = args.hosts model_dir = args.model_dir CHECKPOINTS_DIR = '/opt/ml/checkpoints' checkpoints_enabled = os.path.exists(CHECKPOINTS_DIR) data_dir = args.train segments = 'rawframes' train ='ucfTrainTestlist/ucf101_train_split_2_rawframes.txt' #load the data with data loader train_data = load_data(data_dir,batch_size,num_workers,segments,train) # define the network net = define_network(ctx,model_name,nclass) #define the gluon trainer trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params) #define loss function loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() #define training metric train_metric = mx.metric.Accuracy() train_history = TrainingHistory(['training-acc']) net.hybridize() #learning rate decay hyperparameters lr_decay_count = 0 lr_decay = 0.1 lr_decay_epoch = [40, 80, 100] for epoch in range(epochs): tic = time.time() train_metric.reset() train_loss = 0 # Learning rate decay if epoch == lr_decay_epoch[lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate*lr_decay) lr_decay_count += 1 # Loop through each batch of training data for i, batch in enumerate(train_data): # Extract data and label data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0,even_split=False) label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0,even_split=False) # AutoGrad with ag.record(): output = [] for _, X in enumerate(data): X = X.reshape((-1,) + X.shape[2:]) pred = net(X) output.append(pred) loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] # Backpropagation for l in loss: l.backward() # Optimize trainer.step(batch_size) # Update metrics train_loss += sum([l.mean().asscalar() for l in loss]) train_metric.update(label, output) if i == 100: break name, acc = train_metric.get() # Update history and print metrics train_history.update([acc]) print('[Epoch %d] train=%f loss=%f time: %f' % (epoch, acc, train_loss / (i+1), time.time()-tic)) print('saving the model') save(net, model_dir) def save(net, model_dir): # save the model net.export('%s/model'% model_dir) def define_network(ctx,model_name,nclass): #In GluonCV, we can get a customized model with one line of code. net = get_model(name=model_name, nclass=nclass) net.collect_params().reset_ctx(ctx) print(net) return net def load_data(data_dir, batch_size,num_workers,segments,train): #The transformation function does three things: center crop the image to 224x224 in size, transpose it to num_channels,num_frames,height*width, and normalize with mean and standard deviation calculated across all ImageNet images. #Use the general gluoncv dataloader VideoClsCustom to load the data with num_frames = 32 as the length. For another dataset, you can just replace the value of root and setting to your data directory and your prepared text file. transform_train = video.VideoGroupTrainTransform(size=(224, 224), scale_ratios=[1.0, 0.8], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = VideoClsCustom(root=data_dir + '/' + segments,setting=data_dir + '/' + train,train=True,new_length=32,transform=transform_train) print(os.listdir(data_dir+ '/' + segments)) print('Load %d training samples.' % len(train_dataset)) return gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) # ------------------------------------------------------------ # # Training execution # # ------------------------------------------------------------ # def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--learning-rate', type=float, default=0.001) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--wd', type=float, default=0.0001) parser.add_argument('--log-interval', type=float, default=100) parser.add_argument('--optimizer', type=str, default='sgd') parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAINING']) parser.add_argument('--current-host', type=str, default=os.environ['SM_CURRENT_HOST']) parser.add_argument('--hosts', type=list, default=json.loads(os.environ['SM_HOSTS'])) return parser.parse_args() if __name__ == '__main__': args = parse_args() train(args)