# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) 2018-2019 NVIDIA CORPORATION. All rights reserved. from __future__ import division import torch class ImageList(object): """ Structure that holds a list of images (of possibly varying sizes) as a single tensor. This works by padding the images to the same size, and storing in a field the original sizes of each image """ def __init__(self, tensors, image_sizes): """ Arguments: tensors (tensor) image_sizes (list[tuple[int, int]]) """ self.tensors = tensors self.image_sizes = image_sizes self.image_sizes_wh = [[ims[1], ims[0]] for ims in image_sizes] N = len(self.image_sizes) if tensors.is_cuda: if isinstance(image_sizes, torch.Tensor) and image_sizes.is_cuda: self.image_sizes_tensor = image_sizes else: self.image_sizes_tensor = torch.tensor(self.image_sizes_wh, dtype=torch.float32, pin_memory=True).to(device=tensors.device, non_blocking=True).view(N, -1) else: self.image_sizes_tensor = torch.tensor(self.image_sizes_wh, dtype=torch.float32, device=tensors.device).view(N, -1) def to(self, *args, **kwargs): if self.tensors.is_cuda: return ImageList(self.tensors, self.image_sizes) else: cast_tensor = torch.empty_like(self.tensors, device='cuda') cast_tensor.copy_(self.tensors, non_blocking=True) #cast_tensor = self.tensors.to(*args, **kwargs) return ImageList(cast_tensor, self.image_sizes) def pin_memory(self): pinned_tensor = self.tensors.pin_memory() return ImageList(pinned_tensor, self.image_sizes) def to_image_list(tensors, size_divisible=0, shapes=None): """ tensors can be an ImageList, a torch.Tensor or an iterable of Tensors. It can't be a numpy array. When tensors is an iterable of Tensors, it pads the Tensors with zeros so that they have the same shape """ if isinstance(tensors, torch.Tensor) and size_divisible > 0: assert False, "code path not tested with cuda graphs" tensors = [tensors] if isinstance(tensors, ImageList): return tensors elif isinstance(tensors, torch.Tensor): assert False, "code path not tested with cuda graphs" # single tensor shape can be inferred assert tensors.dim() == 4 image_sizes = [tensor.shape[-2:] for tensor in tensors] return ImageList(tensors, image_sizes) elif isinstance(tensors, (tuple, list)): max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) if shapes is None: # TODO Ideally, just remove this and let me model handle arbitrary # input sizs if size_divisible > 0: import math stride = size_divisible max_size = list(max_size) max_size[1] = int(math.ceil(max_size[1] / stride) * stride) max_size[2] = int(math.ceil(max_size[2] / stride) * stride) max_size = tuple(max_size) batch_shape = (len(tensors),) + max_size batched_imgs = tensors[0].new(*batch_shape).zero_() for img, pad_img in zip(tensors, batched_imgs): pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) else: # All images returned by torchvision are NCHW cost, H_best, W_best = None, None, None C, H, W = max_size for H_pad, W_pad in shapes: if H <= H_pad and W <= W_pad: if cost is None or H_pad*W_pad < cost: cost, H_best, W_best = H_pad*W_pad, H_pad, W_pad #print("Padding from %dx%d to %dx%d" % (H, W, H_best, W_best)) batch_shape = (len(tensors),C,H_best,W_best) batched_imgs = tensors[0].new(*batch_shape).zero_() for img, pad_img in zip(tensors, batched_imgs): pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) image_sizes = [im.shape[-2:] for im in tensors] return ImageList(batched_imgs, image_sizes) else: raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))