# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) 2018-2019 NVIDIA CORPORATION. All rights reserved. import os from yacs.config import CfgNode as CN # ----------------------------------------------------------------------------- # Convention about Training / Test specific parameters # ----------------------------------------------------------------------------- # Whenever an argument can be either used for training or for testing, the # corresponding name will be post-fixed by a _TRAIN for a training parameter, # or _TEST for a test-specific parameter. # For example, the number of images during training will be # IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be # IMAGES_PER_BATCH_TEST # ----------------------------------------------------------------------------- # Config definition # ----------------------------------------------------------------------------- _C = CN() # AMP otimization level # O0 is fully FP32 # O1 weights are stored in FP32, but some ops are performed in FP16 # O1 is the recommended setting for stability # O2 Mostly FP16, model cast to FP16 except batch norm # O2 is faster by introduces some stability issues # O3 Pure FP16, faster still, but less stable # O4 FP16 and NHWC uses custom ops to go even faster # O4 is MLPerf settings, crazy fast, but hard to tune # and keep stable _C.OPT_LEVEL = "O1" # Distribution type # torch = Pytorch DDP # smd = Sagemaker Distributed Data Parallel # smd accelerates training when running distributed on multiple # p3.16, p3dn, and p4d instances. It is only available on these # instance types. # auto = Automatically enable smd when available, otherwise use # pytorch DDP _C.DISTRIBUTION = "auto" _C.OUTPUT_DIR = "/opt/ml/checkpoints" _C.MODEL = CN() _C.MODEL.RPN_ONLY = False _C.MODEL.MASK_ON = False _C.MODEL.KEYPOINT_ON = False _C.MODEL.DEVICE = "cuda" _C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" _C.MODEL.CLS_AGNOSTIC_BBOX_REG = False # If the WEIGHT starts with a catalog://, like :R-50, the code will look for # the path in paths_catalog. Else, it will use it as the specified absolute # path _C.MODEL.WEIGHT = '/opt/ml/input/data/weights/pytorch/R-50.pkl' # ----------------------------------------------------------------------------- # INPUT # ----------------------------------------------------------------------------- _C.INPUT = CN() # Dataloader type _C.INPUT.DATALOADER = 'COCO' # Train input dir _C.INPUT.TRAIN_INPUT_DIR = '/opt/ml/input/data/coco/train' # Validation input dir _C.INPUT.VAL_INPUT_DIR = '/opt/ml/input/data/coco/val' # Train annotations _C.INPUT.TRAIN_ANNO_DIR = '/opt/ml/input/data/coco/annotations/instances_train2017.json' # Validation annotations _C.INPUT.VAL_ANNO_DIR = '/opt/ml/input/data/coco/annotations/instances_val2017.json' # Size of the smallest side of the image during training _C.INPUT.MIN_SIZE_TRAIN = (800,) # (800,) # Maximum size of the side of the image during training _C.INPUT.MAX_SIZE_TRAIN = 1333 # Size of the smallest side of the image during testing _C.INPUT.MIN_SIZE_TEST = 800 # Maximum size of the side of the image during testing _C.INPUT.MAX_SIZE_TEST = 1333 # Values to be used for image normalization _C.INPUT.PIXEL_MEAN = [102.9801, 115.9465, 122.7717] # Values to be used for image normalization _C.INPUT.PIXEL_STD = [1., 1., 1.] # Convert image to BGR format (for Caffe2 models), in range 0-255 _C.INPUT.TO_BGR255 = True # Add multiplicative gaussian noise _C.INPUT.ADD_NOISE = False # ----------------------------------------------------------------------------- # Dataset # ----------------------------------------------------------------------------- _C.DATASETS = CN() # List of the dataset names for training, as present in paths_catalog.py _C.DATASETS.TRAIN = () # List of the dataset names for testing, as present in paths_catalog.py _C.DATASETS.TEST = () # ----------------------------------------------------------------------------- # DataLoader # ----------------------------------------------------------------------------- _C.DATALOADER = CN() # Number of data loading threads _C.DATALOADER.NUM_WORKERS = 4 # If > 0, this enforces that each collated batch should have a size divisible # by SIZE_DIVISIBILITY _C.DATALOADER.SIZE_DIVISIBILITY = 0 # If True, each batch should contain only images for which the aspect ratio # is compatible. This groups portrait images together, and landscape images # are not batched with portrait images. _C.DATALOADER.ASPECT_RATIO_GROUPING = True # ---------------------------------------------------------------------------- # # Backbone options # ---------------------------------------------------------------------------- # _C.MODEL.BACKBONE = CN() # The backbone conv body to use # The string must match a function that is imported in modeling.model_builder # (e.g., 'FPN.add_fpn_ResNet101_conv5_body' to specify a ResNet-101-FPN # backbone) _C.MODEL.BACKBONE.CONV_BODY = "R-50-FPN" # Add StopGrad at a specified stage so the bottom layers are frozen _C.MODEL.BACKBONE.FREEZE_CONV_BODY_AT = 2 _C.MODEL.BACKBONE.OUT_CHANNELS = 256 * 4 # GN for backbone _C.MODEL.BACKBONE.USE_GN = False # ---------------------------------------------------------------------------- # # FPN options # ---------------------------------------------------------------------------- # _C.MODEL.FPN = CN() _C.MODEL.FPN.USE_GN = False _C.MODEL.FPN.USE_RELU = False # ---------------------------------------------------------------------------- # # Group Norm options # ---------------------------------------------------------------------------- # _C.MODEL.GROUP_NORM = CN() # Number of dimensions per group in GroupNorm (-1 if using NUM_GROUPS) _C.MODEL.GROUP_NORM.DIM_PER_GP = -1 # Number of groups in GroupNorm (-1 if using DIM_PER_GP) _C.MODEL.GROUP_NORM.NUM_GROUPS = 32 # GroupNorm's small constant in the denominator _C.MODEL.GROUP_NORM.EPSILON = 1e-5 # ---------------------------------------------------------------------------- # # RPN options # ---------------------------------------------------------------------------- # _C.MODEL.RPN = CN() _C.MODEL.RPN.USE_FPN = False # Base RPN anchor sizes given in absolute pixels w.r.t. the scaled network input _C.MODEL.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512) # Stride of the feature map that RPN is attached. # For FPN, number of strides should match number of scales _C.MODEL.RPN.ANCHOR_STRIDE = (16,) # RPN anchor aspect ratios _C.MODEL.RPN.ASPECT_RATIOS = (0.5, 1.0, 2.0) # Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels # Set to -1 or a large value, e.g. 100000, to disable pruning anchors _C.MODEL.RPN.STRADDLE_THRESH = 0 # Minimum overlap required between an anchor and ground-truth box for the # (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD # ==> positive RPN example) _C.MODEL.RPN.FG_IOU_THRESHOLD = 0.7 # Maximum overlap allowed between an anchor and ground-truth box for the # (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD # ==> negative RPN example) _C.MODEL.RPN.BG_IOU_THRESHOLD = 0.3 # Total number of RPN examples per image _C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256 # Target fraction of foreground (positive) examples per RPN minibatch _C.MODEL.RPN.POSITIVE_FRACTION = 0.5 # Number of top scoring RPN proposals to keep before applying NMS # When FPN is used, this is *per FPN level* (not total) _C.MODEL.RPN.PRE_NMS_TOP_N_TRAIN = 12000 _C.MODEL.RPN.PRE_NMS_TOP_N_TEST = 6000 # Number of top scoring RPN proposals to keep after applying NMS _C.MODEL.RPN.POST_NMS_TOP_N_TRAIN = 2000 _C.MODEL.RPN.POST_NMS_TOP_N_TEST = 1000 # NMS threshold used on RPN proposals _C.MODEL.RPN.NMS_THRESH = 0.7 # Proposal height and width both need to be greater than RPN_MIN_SIZE # (a the scale used during training or inference) _C.MODEL.RPN.MIN_SIZE = 0 # Number of top scoring RPN proposals to keep after combining proposals from # all FPN levels _C.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN = 2000 _C.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 2000 _C.MODEL.RPN.FPN_POST_NMS_TOP_N_PER_IMAGE = True # Custom rpn head, empty to use default conv or separable conv _C.MODEL.RPN.RPN_HEAD = "SingleConvRPNHead" _C.MODEL.RPN.LS = 0.0 # ---------------------------------------------------------------------------- # # ROI HEADS options # ---------------------------------------------------------------------------- # _C.MODEL.ROI_HEADS = CN() _C.MODEL.ROI_HEADS.USE_FPN = False # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD) _C.MODEL.ROI_HEADS.FG_IOU_THRESHOLD = 0.5 # Overlap threshold for an RoI to be considered background # (class = 0 if overlap in [0, BG_IOU_THRESHOLD)) _C.MODEL.ROI_HEADS.BG_IOU_THRESHOLD = 0.5 # Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets # These are empirically chosen to approximately lead to unit variance targets _C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10., 10., 5., 5.) # RoI minibatch size *per image* (number of regions of interest [ROIs]) # Total number of RoIs per training minibatch = # TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH # E.g., a common configuration is: 512 * 2 * 8 = 8192 _C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 # Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) _C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25 # Only used on test mode # Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to # balance obtaining high recall with not having too many low precision # detections that will slow down inference post processing steps (like NMS) _C.MODEL.ROI_HEADS.SCORE_THRESH = 0.05 # Overlap threshold used for non-maximum suppression (suppress boxes with # IoU >= this threshold) _C.MODEL.ROI_HEADS.NMS = 0.5 # Maximum number of detections to return per image (100 is based on the limit # established for the COCO dataset) _C.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 100 # ---------------------------------------------------------------------------- # # CASCADE ROI HEADS options # ---------------------------------------------------------------------------- # _C.MODEL.ROI_HEADS.CASCADE = CN() _C.MODEL.ROI_HEADS.CASCADE.FG_IOU_THRESHOLD = (0.5, 0.6, 0.7) _C.MODEL.ROI_HEADS.CASCADE.BG_IOU_THRESHOLD = (0.5, 0.6, 0.7) _C.MODEL.ROI_HEADS.CASCADE.BBOX_REG_WEIGHTS = ((10., 10., 5., 5.), (20., 20., 10., 10.), (30., 30., 15., 15.)) _C.MODEL.ROI_HEADS.CASCADE.STAGES = 3 _C.MODEL.ROI_HEADS.CASCADE.STAGE_WEIGHTS = (1., 1., 1.) # ---------------------------------------------------------------------------- # # BOX ROI HEADS options # ---------------------------------------------------------------------------- # _C.MODEL.ROI_BOX_HEAD = CN() _C.MODEL.ROI_BOX_HEAD.TYPE = "StandardBoxHead" # whether to decode prediction bbox _C.MODEL.ROI_BOX_HEAD.DECODE = False _C.MODEL.ROI_BOX_HEAD.LOSS = "SmoothL1Loss" _C.MODEL.ROI_BOX_HEAD.CARL = False _C.MODEL.ROI_BOX_HEAD.ISR_P = False _C.MODEL.ROI_BOX_HEAD.ISR_N = False _C.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor" _C.MODEL.ROI_BOX_HEAD.PREDICTOR = "FastRCNNPredictor" _C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14 _C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0 _C.MODEL.ROI_BOX_HEAD.POOLER_SCALES = (1.0 / 16,) _C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 81 # Hidden layer dimension when using an MLP for the RoI box head _C.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM = 1024 # GN _C.MODEL.ROI_BOX_HEAD.USE_GN = False # Dilation _C.MODEL.ROI_BOX_HEAD.DILATION = 1 _C.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM = 256 _C.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS = 4 # ---------------------------------------------------------------------------- # # MASK ROI HEADS options # ---------------------------------------------------------------------------- # _C.MODEL.ROI_MASK_HEAD = CN() _C.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor" _C.MODEL.ROI_MASK_HEAD.PREDICTOR = "MaskRCNNC4Predictor" _C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14 _C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0 _C.MODEL.ROI_MASK_HEAD.POOLER_SCALES = (1.0 / 16,) _C.MODEL.ROI_MASK_HEAD.MLP_HEAD_DIM = 1024 _C.MODEL.ROI_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256) _C.MODEL.ROI_MASK_HEAD.RESOLUTION = 14 _C.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True # Whether or not resize and translate masks to the input image. _C.MODEL.ROI_MASK_HEAD.POSTPROCESS_MASKS = False _C.MODEL.ROI_MASK_HEAD.POSTPROCESS_MASKS_THRESHOLD = 0.5 # Dilation _C.MODEL.ROI_MASK_HEAD.DILATION = 1 # GN _C.MODEL.ROI_MASK_HEAD.USE_GN = False #Label smoothing _C.MODEL.ROI_MASK_HEAD.LS = 0.0 _C.MODEL.ROI_KEYPOINT_HEAD = CN() _C.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR = "KeypointRCNNFeatureExtractor" _C.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR = "KeypointRCNNPredictor" _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14 _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0 _C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES = (1.0 / 16,) _C.MODEL.ROI_KEYPOINT_HEAD.MLP_HEAD_DIM = 1024 _C.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS = tuple(512 for _ in range(8)) _C.MODEL.ROI_KEYPOINT_HEAD.RESOLUTION = 14 _C.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES = 17 _C.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True # ---------------------------------------------------------------------------- # # ResNe[X]t options (ResNets = {ResNet, ResNeXt} # Note that parts of a resnet may be used for both the backbone and the head # These options apply to both # ---------------------------------------------------------------------------- # _C.MODEL.RESNETS = CN() # Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt _C.MODEL.RESNETS.NUM_GROUPS = 1 # Baseline width of each group _C.MODEL.RESNETS.WIDTH_PER_GROUP = 64 # Place the stride 2 conv on the 1x1 filter # Use True only for the original MSRA ResNet; use False for C2 and Torch models _C.MODEL.RESNETS.STRIDE_IN_1X1 = True # Residual transformation function _C.MODEL.RESNETS.TRANS_FUNC = "BottleneckWithFixedBatchNorm" # ResNet's stem function (conv1 and pool1) _C.MODEL.RESNETS.STEM_FUNC = "StemWithFixedBatchNorm" # Apply dilation in stage "res5" _C.MODEL.RESNETS.RES5_DILATION = 1 _C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256 _C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64 # ---------------------------------------------------------------------------- # # Cascade Options # ---------------------------------------------------------------------------- # _C.MODEL.ROI_HEADS.CASCADE_STAGES = 3 _C.MODEL.ROI_HEADS.CASCADE_STAGE_WEIGHTS = (1., 1., 1.) # ---------------------------------------------------------------------------- # # Solver # ---------------------------------------------------------------------------- # _C.SOLVER = CN() _C.SOLVER.MAX_ITER = 40000 _C.SOLVER.BASE_LR = 0.001 _C.SOLVER.BIAS_LR_FACTOR = 2 _C.SOLVER.MOMENTUM = 0.9 _C.SOLVER.WEIGHT_DECAY = 0.0005 _C.SOLVER.WEIGHT_DECAY_BIAS = 0 _C.SOLVER.GAMMA = 0.1 _C.SOLVER.ALPHA = 0.001 _C.SOLVER.STEPS = (30000,) _C.SOLVER.WARMUP_FACTOR = 1.0 / 3 _C.SOLVER.WARMUP_ITERS = 500 _C.SOLVER.WARMUP_METHOD = "linear" _C.SOLVER.CHECKPOINT_PERIOD = 2500 _C.SOLVER.OPTIMIZER = "SGD" _C.SOLVER.BETA1 = 0.9 _C.SOLVER.BETA2 = 0.5 _C.SOLVER.LR_SCHEDULE = "MULTISTEP" _C.SOLVER.GRADIENT_CLIPPING = 0.0 # Number of images per batch # This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will # see 2 images per batch _C.SOLVER.IMS_PER_BATCH = 16 # ---------------------------------------------------------------------------- # # Specific test options # ---------------------------------------------------------------------------- # _C.TEST = CN() _C.TEST.EXPECTED_RESULTS = [] _C.TEST.EXPECTED_RESULTS_SIGMA_TOL = 4 # Number of images per batch # This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will # see 2 images per batch _C.TEST.IMS_PER_BATCH = 8 # Number of detections per image _C.TEST.DETECTIONS_PER_IMG = 100 # Run eval each epoch _C.TEST.PER_EPOCH_EVAL = True # ---------------------------------------------------------------------------- # # Misc options # ---------------------------------------------------------------------------- # _C.SAVE_CHECKPOINTS = True _C.SAVE_INTERVAL = 1 _C.LOG_INTERVAL = 50 # Disable reduced logging _C.DISABLE_REDUCED_LOGGING = False # Runner hooks _C.HOOKS = []