# Header meta spec_version = "0.1.0" # These fields don't have any impact on the job to run, they contain # merely informative data so the benchmark can be categorized when displayed # in the dashboard. [info] description = """ MXNet training single-node {MXNet_version} CPU {py_version} resnet18v2 cifar10 """ scheduling = 'single_run' [info.labels] # Labels and values must be 63 characters or less, beginning and ending with an alphanumeric character # ([a-z0-9A-Z]) with dashes (-), underscores (_), dots (.), and alphanumerics between task_name = "mx_train_single_node_cpu_{py_version}_resnet18v2_cifar10" # 1. Hardware [hardware] instance_type = "c5.18xlarge" strategy = "single_node" aws_zone_id="use1-az2" # 2. Environment [env] docker_image = "mxnet-training:{mxnet_version}-cpu-{py_version}" [env.vars] OMP_NUM_THREADS = "36" KMP_AFFINITY='granularity=fine,verbose,compact,1,0' # 3. Machine learning related settings: # dataset, benchmark code and parameters it takes [ml] benchmark_code = "python $(BAI_SCRIPTS_PATH)/deeplearning-benchmark/image_classification/image_classification.py" args = "--model resnet18_v2 --dataset cifar10 --mode symbolic --gpus 0 --epochs 2 --log-interval 50 --kvstore local --dtype='float32' --batch-size=64" # 5. Output [output] # [Opt] Custom metrics descriptions # List all required metrics descriptions below. # Make an entry in same format as the one below. [[output.metrics]] # Name of the metric that will appear in the dashboards. name = "throughput" # Metric unit (required) units = "img/sec" # Pattern for log parsing for this metric. pattern = 'Speed: (\d+\.\d+|\d+) samples\/sec'