# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) 2018-2019 NVIDIA CORPORATION. All rights reserved. import datetime import logging import time import os import torch from tqdm import tqdm from sagemakercv.data.datasets.evaluation import evaluate from sagemakercv.data import datasets from sagemakercv.utils.comm import is_main_process, all_gather, synchronize def compute_on_dataset(model, data_loader, device): model.eval() results_dict = {} cpu_device = torch.device("cpu") #for i, batch in enumerate(tqdm(data_loader)): logger = logging.getLogger("maskrcnn_benchmark.inference") logger.info("Running Evaluation") for i, batch in enumerate(data_loader): images, targets, image_ids = batch images = images.to(device) with torch.no_grad(): output = model(images) output = [o.to(cpu_device) for o in output] results_dict.update( {img_id: result for img_id, result in zip(image_ids, output)} ) return results_dict def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): all_predictions = all_gather(predictions_per_gpu) if not is_main_process(): return # merge the list of dicts predictions = {} for p in all_predictions: predictions.update(p) # convert a dict where the key is the index in a list image_ids = list(sorted(predictions.keys())) if len(image_ids) != image_ids[-1] + 1: logger = logging.getLogger("maskrcnn_benchmark.inference") logger.warning( "Number of images that were gathered from multiple processes is not " "a contiguous set. Some images might be missing from the evaluation" ) # convert to a list predictions = [predictions[i] for i in image_ids] return predictions def inference( model, data_loader, dataset_name, iou_types=("bbox",), box_only=False, device="cuda", expected_results=(), expected_results_sigma_tol=4, output_folder=None, ): # convert to a torch.device for efficiency device = torch.device(device) num_devices = ( torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1 ) logger = logging.getLogger("maskrcnn_benchmark.inference") dataset = data_loader.dataset logger.info("Start evaluation on {} dataset({} images).".format(dataset_name, len(dataset))) start_time = time.time() predictions = compute_on_dataset(model, data_loader, device) # wait for all processes to complete before measuring the time synchronize() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) logger.info( "Total inference time: {} ({} s / img per device, on {} devices)".format( total_time_str, total_time * num_devices / len(dataset), num_devices ) ) # We have an optimised path for COCO which takes advantage of more parallelism. # If not using COCO, fall back to regular path: gather predictions from all ranks # and call evaluate on those results. if not isinstance(dataset, datasets.COCODataset): predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return if output_folder and is_main_process(): torch.save(predictions, os.path.join(output_folder, "predictions.pth")) extra_args = dict( box_only=box_only, iou_types=iou_types, expected_results=expected_results, expected_results_sigma_tol=expected_results_sigma_tol, ) return evaluate(dataset=dataset, predictions=predictions, output_folder=output_folder, **extra_args)