import mxnet as mx import os import time from concurrent import futures import numpy as np path='http://data.mxnet.io/models/imagenet/' [mx.test_utils.download(path+'resnet/50-layers/resnet-50-0000.params'), mx.test_utils.download(path+'resnet/50-layers/resnet-50-symbol.json'), mx.test_utils.download(path+'synset.txt')] ctx = mx.gpu(0) ngpu = 1 group2ctx = {'embed': mx.gpu(0),\ 'decode': mx.gpu(ngpu - 1)} with open('synset.txt', 'r') as f: labels = [l.rstrip() for l in f] sym, args, aux = mx.model.load_checkpoint('resnet-50',0) #fname = mx.test_utils.download('https://github.com/dmlc/web-data/blob/master/mxnet/doc/tutorials/python/predict_image/cat.jpg?raw=true') fname = mx.test_utils.download('https://raw.githubusercontent.com/awslabs/mxnet-model-server/master/docs/images/kitten_small.jpg?raw=true') img = mx.image.imread(fname) # convert into format (batch, RGB, width, height) img = mx.image.imresize(img, 224, 224) # resize img = img.transpose((2, 0, 1)) # Channel first img = img.expand_dims(axis=0) # batchify img = img.astype(dtype='float32') args['data'] = img softmax = mx.nd.random_normal(shape=(1,)) args['softmax_label'] = softmax #exe = sym.bind(ctx=ctx, args=args, aux_states=aux, grad_req='null',group2ctx=group2ctx) #exe.forward() #prob = exe.outputs[0].asnumpy() # print the top-5 #prob = np.squeeze(prob) #a = np.argsort(prob)[::-1] #for i in a[0:5]: # print('probability=%f, class=%s' %(prob[i], labels[i])) USER_BATCH_SIZE = 50 NUM_LOOPS_PER_THREAD = 100 pred_list = [sym.bind(ctx=ctx, args=args, aux_states=aux, grad_req='null',group2ctx=group2ctx) for _ in range(4)] pred_list = [ pred_list[0], pred_list[0], pred_list[0], pred_list[0], pred_list[1], pred_list[1], pred_list[1], pred_list[1], pred_list[2], pred_list[2], pred_list[2], pred_list[2], pred_list[3], pred_list[3], pred_list[3], pred_list[3], ] num_infer_per_thread = [] for i in range(len(pred_list)): num_infer_per_thread.append(0) def one_thread(pred, index): global num_infer_per_thread for _ in range(NUM_LOOPS_PER_THREAD): # print("_",_) # print("NUM_LOOPS_PER_THREAD",NUM_LOOPS_PER_THREAD) pred.forward() prob = pred.outputs[0].asnumpy() # print the top-5 # print the top-5 # prob = np.squeeze(prob) # a = np.argsort(prob)[::-1] # for i in a[0:5]: # print('probability=%f, class=%s' %(prob[i], labels[i])) num_infer_per_thread[index] += USER_BATCH_SIZE # print(num_infer_per_thread[index]) def current_throughput(): global num_infer_per_thread num_infer = 0 last_num_infer = num_infer print("NUM THREADS: ", len(pred_list)) print("NUM_LOOPS_PER_THREAD: ", NUM_LOOPS_PER_THREAD) print("USER_BATCH_SIZE: ", USER_BATCH_SIZE) while num_infer < NUM_LOOPS_PER_THREAD * USER_BATCH_SIZE * len(pred_list): num_infer = 0 for i in range(len(pred_list)): num_infer = num_infer + num_infer_per_thread[i] current_num_infer = num_infer throughput = current_num_infer - last_num_infer print('current throughput: {} images/sec'.format(throughput)) last_num_infer = current_num_infer time.sleep(1.0) # Run inference #model_feed_dict={'input_1:0': img_arr3} executor = futures.ThreadPoolExecutor(max_workers=16+1) executor.submit(current_throughput) for i,pred in enumerate(pred_list): executor.submit(one_thread, pred, i)