# gaogilb @ a483e7024cd8 in /Volumes/unix/workplace/eks-efa-work/src/EKSEFAWorker on git:mainline x [0:43:59] $ k get pods -A NAMESPACE NAME READY STATUS RESTARTS AGE kube-system aws-node-ltscf 1/1 Running 0 12d kube-system aws-node-w9j89 1/1 Running 0 12d kube-system coredns-86d5cbb4bd-5zhcq 1/1 Running 0 12d kube-system coredns-86d5cbb4bd-skpqd 1/1 Running 0 12d kube-system kube-proxy-6v526 1/1 Running 0 12d kube-system kube-proxy-ngsvh 1/1 Running 0 12d kube-system nvidia-device-plugin-daemonset-lzns2 1/1 Running 0 12d kube-system nvidia-device-plugin-daemonset-tn8tc 1/1 Running 0 12d kubectl-6296 update-demo-nautilus-6qrgz 1/1 Running 0 12d kubectl-6296 update-demo-nautilus-dk4v6 1/1 Running 0 12d mpi-operator mpi-operator-5b7469bbf8-6xhvw 1/1 Running 0 5h4m # gaogilb @ a483e7024cd8 in /Volumes/unix/workplace/eks-efa-work/src/EKSEFAWorker on git:mainline x [0:46:03] $ k logs -f tensorflow-benchmarks-efa-launcher-prdkf Error from server (NotFound): pods "tensorflow-benchmarks-efa-launcher-prdkf" not found # gaogilb @ a483e7024cd8 in /Volumes/unix/workplace/eks-efa-work/src/EKSEFAWorker on git:mainline x [0:46:15] C:1 $ k delete -f mpi-tf-bc.yaml Error from server (NotFound): error when deleting "mpi-tf-bc.yaml": mpijobs.kubeflow.org "tensorflow-benchmarks-efa" not found # gaogilb @ a483e7024cd8 in /Volumes/unix/workplace/eks-efa-work/src/EKSEFAWorker on git:mainline x [0:46:17] C:1 $ k delete -f mpi-tf-bc.yaml Error from server (NotFound): error when deleting "mpi-tf-bc.yaml": mpijobs.kubeflow.org "tensorflow-benchmarks-efa" not found # gaogilb @ a483e7024cd8 in /Volumes/unix/workplace/eks-efa-work/src/EKSEFAWorker on git:mainline x [0:46:22] C:1 $ k create -f mpi-tf-bc.yaml mpijob.kubeflow.org/tensorflow-benchmarks-efa created # gaogilb @ a483e7024cd8 in /Volumes/unix/workplace/eks-efa-work/src/EKSEFAWorker on git:mainline x [0:46:25] $ k logs -f tensorflow-benchmarks-efa-launcher-s8btl + POD_NAME=tensorflow-benchmarks-efa-worker-0 + shift + /opt/kube/kubectl exec tensorflow-benchmarks-efa-worker-0 -- /bin/sh -c PATH=/usr/local/bin:$PATH ; export PATH ; LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; DYLD_LIBRARY_PATH=/usr/local/lib:$DYLD_LIBRARY_PATH ; export DYLD_LIBRARY_PATH ; /usr/local/bin/orted -mca ess "env" -mca ess_base_jobid "1462566912" -mca ess_base_vpid 1 -mca ess_base_num_procs "3" -mca orte_node_regex "tensorflow-benchmarks-efa-launcher-s[1:8]btl,tensorflow-benchmarks-efa-worker-[1:0-1]@0(3)" -mca orte_hnp_uri "1462566912.0;tcp://192.168.26.172:42231" --mca plm_rsh_no_tree_spawn "1" --mca pml "ob1" --mca btl_vader_single_copy_mechanism "none" --mca oob_tcp_if_include "eth0" --mca btl_tcp_if_include "eth0" --mca btl "^openib" -mca plm "rsh" -mca orte_default_hostfile "/etc/mpi/hostfile" -mca plm_rsh_agent "/etc/mpi/kubexec.sh" -mca hwloc_base_binding_policy "none" -mca pmix "^s1,s2,cray,isolated" + POD_NAME=tensorflow-benchmarks-efa-worker-1 + shift + /opt/kube/kubectl exec tensorflow-benchmarks-efa-worker-1 -- /bin/sh -c PATH=/usr/local/bin:$PATH ; export PATH ; LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; DYLD_LIBRARY_PATH=/usr/local/lib:$DYLD_LIBRARY_PATH ; export DYLD_LIBRARY_PATH ; /usr/local/bin/orted -mca ess "env" -mca ess_base_jobid "1462566912" -mca ess_base_vpid 2 -mca ess_base_num_procs "3" -mca orte_node_regex "tensorflow-benchmarks-efa-launcher-s[1:8]btl,tensorflow-benchmarks-efa-worker-[1:0-1]@0(3)" -mca orte_hnp_uri "1462566912.0;tcp://192.168.26.172:42231" --mca plm_rsh_no_tree_spawn "1" --mca pml "ob1" --mca btl_vader_single_copy_mechanism "none" --mca oob_tcp_if_include "eth0" --mca btl_tcp_if_include "eth0" --mca btl "^openib" -mca plm "rsh" -mca orte_default_hostfile "/etc/mpi/hostfile" -mca plm_rsh_agent "/etc/mpi/kubexec.sh" -mca hwloc_base_binding_policy "none" -mca pmix "^s1,s2,cray,isolated" WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. Instructions for updating: non-resource variables are not supported in the long term 2020-06-30 07:46:33.320780: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F 2020-06-30 07:46:33.321028: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F 2020-06-30 07:46:33.321320: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F 2020-06-30 07:46:33.321306: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F 2020-06-30 07:46:33.321371: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F 2020-06-30 07:46:33.321418: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F 2020-06-30 07:46:33.321530: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F 2020-06-30 07:46:33.321691: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F 2020-06-30 07:46:33.330896: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2499995000 Hz 2020-06-30 07:46:33.330882: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2499995000 Hz 2020-06-30 07:46:33.330875: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2499995000 Hz 2020-06-30 07:46:33.330884: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2499995000 Hz 2020-06-30 07:46:33.330895: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2499995000 Hz 2020-06-30 07:46:33.330875: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2499995000 Hz 2020-06-30 07:46:33.331038: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x541c650 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:33.331064: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2020-06-30 07:46:33.331057: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3ce89c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:33.331082: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2020-06-30 07:46:33.331057: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4892d10 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:33.331081: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2020-06-30 07:46:33.331114: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4937750 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:33.331144: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2020-06-30 07:46:33.331107: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4c85320 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:33.331130: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2020-06-30 07:46:33.331107: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x40ba560 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:33.331130: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2020-06-30 07:46:33.331232: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2499995000 Hz 2020-06-30 07:46:33.331307: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2499995000 Hz 2020-06-30 07:46:33.331452: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x42ca6a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:33.331489: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2020-06-30 07:46:33.331497: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x426c760 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:33.331518: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2020-06-30 07:46:33.333835: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 2020-06-30 07:46:33.333814: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 2020-06-30 07:46:33.333815: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 2020-06-30 07:46:33.333813: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 2020-06-30 07:46:33.333816: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 2020-06-30 07:46:33.334004: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 2020-06-30 07:46:33.334052: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 2020-06-30 07:46:33.334055: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1 2020-06-30 07:46:35.503585: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.506218: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4275110 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:35.506249: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0 2020-06-30 07:46:35.507127: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.509138: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.509906: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:19.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:35.509984: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.511975: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:35.513895: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:35.513942: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x3c99550 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:35.513975: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0 2020-06-30 07:46:35.514200: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:35.514917: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.516053: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:35.516336: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.517108: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:35.518677: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.521012: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:35.521132: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.522609: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:17.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:35.522723: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.524659: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:35.526316: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:35.526636: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:35.527960: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.528100: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4027310 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:35.528126: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0 2020-06-30 07:46:35.528606: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:35.529395: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.529632: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:35.529655: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.530880: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.531304: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.533003: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x484c1b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:35.533033: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0 2020-06-30 07:46:35.533555: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:35.533679: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.535056: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.536964: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.543338: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4063b90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:35.543370: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0 2020-06-30 07:46:35.544786: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x45a0580 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:35.544812: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0 2020-06-30 07:46:35.545125: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x54660a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:35.545152: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0 2020-06-30 07:46:35.545878: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:1c.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:35.545955: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.547300: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x492d400 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2020-06-30 07:46:35.547328: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0 2020-06-30 07:46:35.547703: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:35.547873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.548666: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.548850: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.549313: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:35.549537: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.549618: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:35.549747: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.550504: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 3 2020-06-30 07:46:35.550562: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.551193: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:1b.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:35.551256: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.551401: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:35.552475: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:35.553026: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:35.554612: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:35.554902: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:35.556278: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:35.556381: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.556658: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:35.557645: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:35.559988: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:16.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:35.560073: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.560681: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:18.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:35.560758: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.560850: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:1a.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:35.560919: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.561306: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:1d.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:35.561390: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.561390: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 1 2020-06-30 07:46:35.561483: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.561401: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:35.561518: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.561785: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:35.562433: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:35.562517: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:35.563188: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:35.563418: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:35.563769: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:35.563812: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:35.563851: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:35.564121: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:35.564161: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:35.564729: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:35.564995: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.565035: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:35.565554: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:35.565838: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:35.565836: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:35.566538: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:35.566795: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:35.566818: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:35.566882: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:35.567842: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:35.568615: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.570305: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:35.570417: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.570434: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:35.570480: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:35.570532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.570592: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.571642: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:35.571764: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.573502: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 6 2020-06-30 07:46:35.573543: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.584111: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 5 2020-06-30 07:46:35.584170: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.586302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.586735: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.586817: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.588962: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.604197: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 2020-06-30 07:46:35.604256: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.604549: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 4 2020-06-30 07:46:35.604612: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.604727: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 2 2020-06-30 07:46:35.604783: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.605511: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 7 2020-06-30 07:46:35.605576: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:35.841027: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:35.841090: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 3 2020-06-30 07:46:35.841099: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 3: N 2020-06-30 07:46:35.841461: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.843227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.844918: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 3, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:19.0, compute capability: 7.0) 2020-06-30 07:46:35.846574: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:35.846609: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 1 2020-06-30 07:46:35.846616: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 1: N 2020-06-30 07:46:35.846967: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.848691: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.850637: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 1, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:17.0, compute capability: 7.0) TensorFlow: 2.1 Model: resnet50 Dataset: imagenet (synthetic) Mode: training SingleSess: False Batch size: 512 global 64 per device Num batches: 100 Num epochs: 0.04 Devices: ['horovod/gpu:0', 'horovod/gpu:1', 'horovod/gpu:2', 'horovod/gpu:3', 'horovod/gpu:4', 'horovod/gpu:5', 'horovod/gpu:6', 'horovod/gpu:7'] NUMA bind: False Data format: NCHW Optimizer: sgd Variables: horovod ========== Generating training model TensorFlow: 2.1 Model: resnet50 Dataset: imagenet (synthetic) Mode: training SingleSess: False Batch size: 512 global 64 per device Num batches: 100 Num epochs: 0.04 Devices: ['horovod/gpu:0', 'horovod/gpu:1', 'horovod/gpu:2', 'horovod/gpu:3', 'horovod/gpu:4', 'horovod/gpu:5', 'horovod/gpu:6', 'horovod/gpu:7'] NUMA bind: False Data format: NCHW Optimizer: sgd Variables: horovod ========== Generating training model WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. W0630 07:46:35.864739 139626098124608 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.866204 139626098124608 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. 2020-06-30 07:46:35.867712: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:35.867765: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 6 2020-06-30 07:46:35.867774: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 6: N 2020-06-30 07:46:35.868105: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.869844: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. W0630 07:46:35.870497 140610220824384 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. 2020-06-30 07:46:35.871548: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 6, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:1c.0, compute capability: 7.0) WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.872159 140610220824384 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. 2020-06-30 07:46:35.873902: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:35.873942: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 5 2020-06-30 07:46:35.873949: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 5: N 2020-06-30 07:46:35.874244: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.876000: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero TensorFlow: 2.1 Model: resnet50 Dataset: imagenet (synthetic) Mode: training SingleSess: False Batch size: 512 global 64 per device Num batches: 100 Num epochs: 0.04 Devices: ['horovod/gpu:0', 'horovod/gpu:1', 'horovod/gpu:2', 'horovod/gpu:3', 'horovod/gpu:4', 'horovod/gpu:5', 'horovod/gpu:6', 'horovod/gpu:7'] NUMA bind: False Data format: NCHW Optimizer: sgd Variables: horovod ========== Generating training model 2020-06-30 07:46:35.877670: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 5, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:1b.0, compute capability: 7.0) 2020-06-30 07:46:35.881092: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:35.881128: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 2020-06-30 07:46:35.881135: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N 2020-06-30 07:46:35.881165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:35.881204: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 7 2020-06-30 07:46:35.881212: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 7: N 2020-06-30 07:46:35.881466: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.881593: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.882065: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:35.882103: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 2 2020-06-30 07:46:35.882111: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 2: N 2020-06-30 07:46:35.882605: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.883388: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:35.883419: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 4 2020-06-30 07:46:35.883426: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 4: N TensorFlow: 2.1 Model: resnet50 Dataset: imagenet (synthetic) Mode: training SingleSess: False Batch size: 512 global 64 per device Num batches: 100 Num epochs: 0.04 Devices: ['horovod/gpu:0', 'horovod/gpu:1', 'horovod/gpu:2', 'horovod/gpu:3', 'horovod/gpu:4', 'horovod/gpu:5', 'horovod/gpu:6', 'horovod/gpu:7'] NUMA bind: False Data format: NCHW Optimizer: sgd Variables: horovod ========== Generating training model 2020-06-30 07:46:35.884624: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.885877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.886317: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:35.889499: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. W0630 07:46:35.890618 139626098124608 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. W0630 07:46:35.890635 140143171553088 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. 2020-06-30 07:46:35.891474: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.892248 140143171553088 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. 2020-06-30 07:46:35.892978: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:16.0, compute capability: 7.0) 2020-06-30 07:46:35.893255: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 7, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:1d.0, compute capability: 7.0) 2020-06-30 07:46:35.895531: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 2, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:18.0, compute capability: 7.0) 2020-06-30 07:46:35.895787: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 4, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:1a.0, compute capability: 7.0) WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. W0630 07:46:35.896726 140610220824384 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. W0630 07:46:35.896861 140466343798592 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.898398 140466343798592 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. TensorFlow: 2.1 Model: resnet50 Dataset: imagenet (synthetic) Mode: training SingleSess: False Batch size: 512 global 64 per device Num batches: 100 Num epochs: 0.04 Devices: ['horovod/gpu:0', 'horovod/gpu:1', 'horovod/gpu:2', 'horovod/gpu:3', 'horovod/gpu:4', 'horovod/gpu:5', 'horovod/gpu:6', 'horovod/gpu:7'] NUMA bind: False Data format: NCHW Optimizer: sgd Variables: horovod ========== Generating training model TensorFlow: 2.1 Model: resnet50 Dataset: imagenet (synthetic) Mode: training SingleSess: False Batch size: 512 global 64 per device Num batches: 100 Num epochs: 0.04 Devices: ['horovod/gpu:0', 'horovod/gpu:1', 'horovod/gpu:2', 'horovod/gpu:3', 'horovod/gpu:4', 'horovod/gpu:5', 'horovod/gpu:6', 'horovod/gpu:7'] NUMA bind: False Data format: NCHW Optimizer: sgd Variables: horovod ========== Generating training model TensorFlow: 2.1 Model: resnet50 Dataset: imagenet (synthetic) Mode: training SingleSess: False Batch size: 512 global 64 per device Num batches: 100 Num epochs: 0.04 Devices: ['horovod/gpu:0', 'horovod/gpu:1', 'horovod/gpu:2', 'horovod/gpu:3', 'horovod/gpu:4', 'horovod/gpu:5', 'horovod/gpu:6', 'horovod/gpu:7'] NUMA bind: False Data format: NCHW Optimizer: sgd Variables: horovod ========== Generating training model TensorFlow: 2.1 Model: resnet50 Dataset: imagenet (synthetic) Mode: training SingleSess: False Batch size: 512 global 64 per device Num batches: 100 Num epochs: 0.04 Devices: ['horovod/gpu:0', 'horovod/gpu:1', 'horovod/gpu:2', 'horovod/gpu:3', 'horovod/gpu:4', 'horovod/gpu:5', 'horovod/gpu:6', 'horovod/gpu:7'] NUMA bind: False Data format: NCHW Optimizer: sgd Variables: horovod ========== Generating training model WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. W0630 07:46:35.912871 139688332601152 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. W0630 07:46:35.913911 139775798007616 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.914348 139688332601152 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. W0630 07:46:35.914829 139831491540800 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.915418 139775798007616 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.915336 140086273185600 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:134: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version. Instructions for updating: Use `tf.keras.layers.Conv2D` instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.916163 139831491540800 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.916851 140086273185600 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/layers/convolutional.py:424: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: Please use `layer.__call__` method instead. W0630 07:46:35.916841 140143171553088 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. W0630 07:46:35.922286 140466343798592 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. W0630 07:46:35.938261 139688332601152 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. W0630 07:46:35.939834 139831491540800 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead.WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. W0630 07:46:35.939791 139775798007616 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. W0630 07:46:35.940898 140086273185600 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/convnet_builder.py:266: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.MaxPooling2D instead. Initializing graph Initializing graph Initializing graph Initializing graph Initializing graph Initializing graph Initializing graph Initializing graph WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession W0630 07:46:37.962550 140610220824384 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession W0630 07:46:37.974137 140466343798592 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession W0630 07:46:37.985156 139626098124608 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession W0630 07:46:37.987121 140143171553088 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession W0630 07:46:37.994172 139831491540800 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession W0630 07:46:38.023112 140086273185600 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession W0630 07:46:38.031354 139775798007616 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession WARNING:tensorflow:From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession W0630 07:46:38.135419 139688332601152 deprecation.py:323] From /workspace/benchmarks/scripts/tf_cnn_benchmarks/benchmark_cnn.py:2268: Supervisor.__init__ (from tensorflow.python.training.supervisor) is deprecated and will be removed in a future version. Instructions for updating: Please switch to tf.train.MonitoredTrainingSession 2020-06-30 07:46:38.247110: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.248880: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:17.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:38.248934: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:38.248966: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:38.248978: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:38.248988: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:38.248999: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:38.249009: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:38.249021: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:38.249074: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.250741: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.252439: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 1 2020-06-30 07:46:38.252476: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:38.252484: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 1 2020-06-30 07:46:38.252490: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 1: N 2020-06-30 07:46:38.252648: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.254310: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.255967: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 1, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:17.0, compute capability: 7.0) 2020-06-30 07:46:38.259760: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.261510: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:1b.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:38.261561: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:38.261587: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:38.261598: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:38.261609: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:38.261619: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:38.261629: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:38.261639: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:38.261705: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.263413: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.265051: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 5 2020-06-30 07:46:38.265082: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:38.265089: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 5 2020-06-30 07:46:38.265096: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 5: N 2020-06-30 07:46:38.265238: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.266900: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.268566: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 5, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:1b.0, compute capability: 7.0) 2020-06-30 07:46:38.273054: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.274822: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:1c.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:38.274876: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:38.274905: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:38.274916: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:38.274927: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:38.274938: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:38.274947: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:38.274958: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:38.275018: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.276703: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.278331: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 6 2020-06-30 07:46:38.278362: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:38.278370: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 6 2020-06-30 07:46:38.278377: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 6: N 2020-06-30 07:46:38.278512: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.280186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.281865: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 6, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:1c.0, compute capability: 7.0) 2020-06-30 07:46:38.282893: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.284660: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:1a.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:38.284706: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:38.284736: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:38.284748: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:38.284758: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:38.284768: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:38.284777: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:38.284787: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:38.284843: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.286510: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.288153: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 4 2020-06-30 07:46:38.288184: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:38.288190: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 4 2020-06-30 07:46:38.288196: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 4: N 2020-06-30 07:46:38.288334: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.290000: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.291685: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 4, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:1a.0, compute capability: 7.0) 2020-06-30 07:46:38.305044: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.306822: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:19.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:38.306878: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:38.306911: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:38.306923: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:38.306934: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:38.306945: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:38.306956: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:38.306968: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:38.307024: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.308716: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.310349: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 3 2020-06-30 07:46:38.310385: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:38.310393: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 3 2020-06-30 07:46:38.310399: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 3: N 2020-06-30 07:46:38.310538: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.312285: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.313955: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 3, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:19.0, compute capability: 7.0) 2020-06-30 07:46:38.314988: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.316753: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:18.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:38.316822: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:38.316862: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:38.316876: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:38.316889: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:38.316902: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:38.316914: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:38.316927: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:38.316995: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.318665: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.320306: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 2 2020-06-30 07:46:38.320340: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:38.320347: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 2 2020-06-30 07:46:38.320353: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 2: N 2020-06-30 07:46:38.320498: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.320791: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.323782: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.324427: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:1d.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:38.324503: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:38.324539: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:38.324553: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:38.324566: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:38.324578: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:38.324591: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:38.324605: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:38.324685: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.326370: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 2, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:18.0, compute capability: 7.0) 2020-06-30 07:46:38.327636: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.329287: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 7 2020-06-30 07:46:38.329323: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:38.329331: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 7 2020-06-30 07:46:38.329338: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 7: N 2020-06-30 07:46:38.329489: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.331142: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.332873: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 7, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:1d.0, compute capability: 7.0) 2020-06-30 07:46:38.438706: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.445676: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: pciBusID: 0000:00:16.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0 coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s 2020-06-30 07:46:38.445749: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1 2020-06-30 07:46:38.445781: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:38.445792: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10 2020-06-30 07:46:38.445802: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10 2020-06-30 07:46:38.445812: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10 2020-06-30 07:46:38.445822: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10 2020-06-30 07:46:38.445832: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:38.445914: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.453086: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.462868: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0 2020-06-30 07:46:38.462906: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix: 2020-06-30 07:46:38.462913: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] 0 2020-06-30 07:46:38.462919: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0: N 2020-06-30 07:46:38.463297: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.473397: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2020-06-30 07:46:38.482393: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 30525 MB memory) -> physical GPU (device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:00:16.0, compute capability: 7.0) INFO:tensorflow:Running local_init_op. I0630 07:46:40.086916 140610220824384 session_manager.py:504] Running local_init_op. INFO:tensorflow:Running local_init_op. I0630 07:46:40.154705 140143171553088 session_manager.py:504] Running local_init_op. INFO:tensorflow:Done running local_init_op. I0630 07:46:40.155350 140610220824384 session_manager.py:507] Done running local_init_op. INFO:tensorflow:Running local_init_op. I0630 07:46:40.157222 140466343798592 session_manager.py:504] Running local_init_op. INFO:tensorflow:Running local_init_op. I0630 07:46:40.217778 139831491540800 session_manager.py:504] Running local_init_op. INFO:tensorflow:Done running local_init_op. I0630 07:46:40.219643 140143171553088 session_manager.py:507] Done running local_init_op. INFO:tensorflow:Done running local_init_op. I0630 07:46:40.224036 140466343798592 session_manager.py:507] Done running local_init_op. INFO:tensorflow:Running local_init_op. I0630 07:46:40.273762 139775798007616 session_manager.py:504] Running local_init_op. INFO:tensorflow:Done running local_init_op. I0630 07:46:40.282853 139831491540800 session_manager.py:507] Done running local_init_op. INFO:tensorflow:Running local_init_op. I0630 07:46:40.285004 140086273185600 session_manager.py:504] Running local_init_op. INFO:tensorflow:Running local_init_op. I0630 07:46:40.286236 139626098124608 session_manager.py:504] Running local_init_op. INFO:tensorflow:Done running local_init_op. I0630 07:46:40.331514 139775798007616 session_manager.py:507] Done running local_init_op. INFO:tensorflow:Done running local_init_op. I0630 07:46:40.350394 140086273185600 session_manager.py:507] Done running local_init_op. INFO:tensorflow:Done running local_init_op. I0630 07:46:40.350909 139626098124608 session_manager.py:507] Done running local_init_op. INFO:tensorflow:Running local_init_op. I0630 07:46:40.365083 139688332601152 session_manager.py:504] Running local_init_op. INFO:tensorflow:Done running local_init_op. I0630 07:46:40.432911 139688332601152 session_manager.py:507] Done running local_init_op. Running warm up Running warm up Running warm up Running warm up Running warm up Running warm up Running warm up Running warm up 2020-06-30 07:46:42.801788: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:42.832306: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:42.844861: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:42.857515: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:42.877508: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:42.878547: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:42.932361: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:42.975070: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10 2020-06-30 07:46:43.133516: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:43.164275: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:43.181669: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:43.192289: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:43.239649: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:43.252805: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:43.271896: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 2020-06-30 07:46:43.344010: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Bootstrap : Using [0]eth0:192.168.2.225<0> tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1. tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI Forcing AWS OFI ndev 4 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI Selected Provider is efa tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol. tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Using network AWS Libfabric NCCL version 2.6.4+cuda10.1 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Bootstrap : Using [0]eth0:192.168.2.225<0> tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Bootstrap : Using [0]eth0:192.168.2.225<0> tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Bootstrap : Using [0]eth0:192.168.2.225<0> tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Bootstrap : Using [0]eth0:192.168.2.225<0> tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Bootstrap : Using [0]eth0:192.168.2.225<0> tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Bootstrap : Using [0]eth0:192.168.2.225<0> tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1. tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1. tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1. tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1. tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1. tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1. tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI Forcing AWS OFI ndev 4 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI Selected Provider is efa tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol. tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Using network AWS Libfabric tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI Forcing AWS OFI ndev 4 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI Selected Provider is efa tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol. tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Using network AWS Libfabric tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI Forcing AWS OFI ndev 4 tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI Selected Provider is efa tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI Forcing AWS OFI ndev 4 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI Selected Provider is efa tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI Forcing AWS OFI ndev 4 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI Selected Provider is efa tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI Forcing AWS OFI ndev 4 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI Selected Provider is efa tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol. tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Using network AWS Libfabric tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol. tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Using network AWS Libfabric tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol. tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Using network AWS Libfabric tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol. tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Using network AWS Libfabric tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Bootstrap : Using [0]eth0:192.168.2.225<0> tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1. tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI Forcing AWS OFI ndev 4 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI Selected Provider is efa tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol. tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Using network AWS Libfabric tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI [0] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI [6] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI [1] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI [7] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI [2] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI [2] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI [3] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI [4] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI [5] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI [5] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI [4] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI [0] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI [6] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI [1] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI [7] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI [2] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI [3] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI [5] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI [3] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI [4] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI [0] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI [6] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI [6] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI [1] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI [1] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI [7] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI [7] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI [2] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI [5] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI [3] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI [4] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI [0] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI [5] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI [5] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI [5] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO NET/OFI [5] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI [6] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI [6] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI [6] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO NET/OFI [6] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI [4] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI [4] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI [4] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO NET/OFI [4] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI [1] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI [1] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI [1] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO NET/OFI [1] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI [2] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI [2] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI [2] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO NET/OFI [2] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI [7] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI [7] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI [7] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO NET/OFI [7] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI [3] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI [3] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI [3] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO NET/OFI [3] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI [0] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00/ tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI [0] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI [0] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO NET/OFI [0] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00 tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 8/8/64 tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Trees [0] 2/-1/-1->3->0|0->3->2/-1/-1 [1] 2/-1/-1->3->0|0->3->2/-1/-1 [2] -1/-1/-1->3->2|2->3->-1/-1/-1 [3] -1/-1/-1->3->2|2->3->-1/-1/-1 [4] 7/-1/-1->3->1|1->3->7/-1/-1 [5] 1/-1/-1->3->7|7->3->1/-1/-1 [6] 2/-1/-1->3->0|0->3->2/-1/-1 [7] 2/-1/-1->3->0|0->3->2/-1/-1 [8] -1/-1/-1->3->2|2->3->-1/-1/-1 [9] -1/-1/-1->3->2|2->3->-1/-1/-1 [10] 7/-1/-1->3->1|1->3->7/-1/-1 [11] 1/-1/-1->3->7|7->3->1/-1/-1 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 8/8/64 tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Trees [0] 7/-1/-1->6->5|5->6->7/-1/-1 [1] 7/-1/-1->6->5|5->6->7/-1/-1 [2] 5/-1/-1->6->7|7->6->5/-1/-1 [3] 5/-1/-1->6->7|7->6->5/-1/-1 [4] 2/-1/-1->6->4|4->6->2/-1/-1 [5] 4/-1/-1->6->2|2->6->4/-1/-1 [6] 7/-1/-1->6->5|5->6->7/-1/-1 [7] 7/-1/-1->6->5|5->6->7/-1/-1 [8] 5/-1/-1->6->7|7->6->5/-1/-1 [9] 5/-1/-1->6->7|7->6->5/-1/-1 [10] 2/-1/-1->6->4|4->6->2/-1/-1 [11] 4/-1/-1->6->2|2->6->4/-1/-1 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 8/8/64 tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Trees [0] 6/-1/-1->5->1|1->5->6/-1/-1 [1] 6/-1/-1->5->1|1->5->6/-1/-1 [2] 1/-1/-1->5->6|6->5->1/-1/-1 [3] 1/-1/-1->5->6|6->5->1/-1/-1 [4] 4/-1/-1->5->7|7->5->4/-1/-1 [5] 7/-1/-1->5->4|4->5->7/-1/-1 [6] 6/-1/-1->5->1|1->5->6/-1/-1 [7] 6/-1/-1->5->1|1->5->6/-1/-1 [8] 1/-1/-1->5->6|6->5->1/-1/-1 [9] 1/-1/-1->5->6|6->5->1/-1/-1 [10] 4/-1/-1->5->7|7->5->4/-1/-1 [11] 7/-1/-1->5->4|4->5->7/-1/-1 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 8/8/64 tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Trees [0] -1/-1/-1->4->7|7->4->-1/-1/-1 [1] -1/-1/-1->4->7|7->4->-1/-1/-1 [2] 7/-1/-1->4->0|0->4->7/-1/-1 [3] 7/-1/-1->4->0|0->4->7/-1/-1 [4] 6/-1/-1->4->5|5->4->6/-1/-1 [5] 5/-1/-1->4->6|6->4->5/-1/-1 [6] -1/-1/-1->4->7|7->4->-1/-1/-1 [7] -1/-1/-1->4->7|7->4->-1/-1/-1 [8] 7/-1/-1->4->0|0->4->7/-1/-1 [9] 7/-1/-1->4->0|0->4->7/-1/-1 [10] 6/-1/-1->4->5|5->4->6/-1/-1 [11] 5/-1/-1->4->6|6->4->5/-1/-1 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 8/8/64 tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Trees [0] 4/-1/-1->7->6|6->7->4/-1/-1 [1] 4/-1/-1->7->6|6->7->4/-1/-1 [2] 6/-1/-1->7->4|4->7->6/-1/-1 [3] 6/-1/-1->7->4|4->7->6/-1/-1 [4] 5/-1/-1->7->3|3->7->5/-1/-1 [5] 3/-1/-1->7->5|5->7->3/-1/-1 [6] 4/-1/-1->7->6|6->7->4/-1/-1 [7] 4/-1/-1->7->6|6->7->4/-1/-1 [8] 6/-1/-1->7->4|4->7->6/-1/-1 [9] 6/-1/-1->7->4|4->7->6/-1/-1 [10] 5/-1/-1->7->3|3->7->5/-1/-1 [11] 3/-1/-1->7->5|5->7->3/-1/-1 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 00/12 : 0 3 2 1 5 6 7 4 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 01/12 : 0 3 2 1 5 6 7 4 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 02/12 : 0 4 7 6 5 1 2 3 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 8/8/64 tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Trees [0] 5/-1/-1->1->2|2->1->5/-1/-1 [1] 5/-1/-1->1->2|2->1->5/-1/-1 [2] 2/-1/-1->1->5|5->1->2/-1/-1 [3] 2/-1/-1->1->5|5->1->2/-1/-1 [4] 3/-1/-1->1->0|0->1->3/-1/-1 [5] -1/-1/-1->1->3|3->1->-1/-1/-1 [6] 5/-1/-1->1->2|2->1->5/-1/-1 [7] 5/-1/-1->1->2|2->1->5/-1/-1 [8] 2/-1/-1->1->5|5->1->2/-1/-1 [9] 2/-1/-1->1->5|5->1->2/-1/-1 [10] 3/-1/-1->1->0|0->1->3/-1/-1 [11] -1/-1/-1->1->3|3->1->-1/-1/-1 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 8/8/64 tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Trees [0] 1/-1/-1->2->3|3->2->1/-1/-1 [1] 1/-1/-1->2->3|3->2->1/-1/-1 [2] 3/-1/-1->2->1|1->2->3/-1/-1 [3] 3/-1/-1->2->1|1->2->3/-1/-1 [4] -1/-1/-1->2->6|6->2->-1/-1/-1 [5] 6/-1/-1->2->0|0->2->6/-1/-1 [6] 1/-1/-1->2->3|3->2->1/-1/-1 [7] 1/-1/-1->2->3|3->2->1/-1/-1 [8] 3/-1/-1->2->1|1->2->3/-1/-1 [9] 3/-1/-1->2->1|1->2->3/-1/-1 [10] -1/-1/-1->2->6|6->2->-1/-1/-1 [11] 6/-1/-1->2->0|0->2->6/-1/-1 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 03/12 : 0 4 7 6 5 1 2 3 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 04/12 : 0 1 3 7 5 4 6 2 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 05/12 : 0 2 6 4 5 7 3 1 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 06/12 : 0 3 2 1 5 6 7 4 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 07/12 : 0 3 2 1 5 6 7 4 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 08/12 : 0 4 7 6 5 1 2 3 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 09/12 : 0 4 7 6 5 1 2 3 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 10/12 : 0 1 3 7 5 4 6 2 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Channel 11/12 : 0 2 6 4 5 7 3 1 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 8/8/64 tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Trees [0] 3/-1/-1->0->-1|-1->0->3/-1/-1 [1] 3/-1/-1->0->-1|-1->0->3/-1/-1 [2] 4/-1/-1->0->-1|-1->0->4/-1/-1 [3] 4/-1/-1->0->-1|-1->0->4/-1/-1 [4] 1/-1/-1->0->-1|-1->0->1/-1/-1 [5] 2/-1/-1->0->-1|-1->0->2/-1/-1 [6] 3/-1/-1->0->-1|-1->0->3/-1/-1 [7] 3/-1/-1->0->-1|-1->0->3/-1/-1 [8] 4/-1/-1->0->-1|-1->0->4/-1/-1 [9] 4/-1/-1->0->-1|-1->0->4/-1/-1 [10] 1/-1/-1->0->-1|-1->0->1/-1/-1 [11] 2/-1/-1->0->-1|-1->0->2/-1/-1 tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 00 : 3[190] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 00 : 4[1a0] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 00 : 5[1b0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 00 : 6[1c0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 00 : 7[1d0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 00 : 1[170] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 00 : 2[180] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 00 : 0[160] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 00 : 4[1a0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 00 : 3[190] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 00 : 5[1b0] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 00 : 6[1c0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 00 : 7[1d0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 00 : 1[170] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 00 : 2[180] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 01 : 4[1a0] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 01 : 0[160] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 01 : 3[190] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 01 : 5[1b0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 01 : 6[1c0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 01 : 7[1d0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 01 : 1[170] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 01 : 2[180] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 01 : 4[1a0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 01 : 3[190] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 01 : 5[1b0] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 01 : 6[1c0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 01 : 7[1d0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 01 : 1[170] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 01 : 2[180] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 02 : 4[1a0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 02 : 0[160] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 02 : 3[190] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 02 : 5[1b0] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 02 : 6[1c0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 02 : 7[1d0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 02 : 1[170] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 02 : 2[180] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 02 : 3[190] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 02 : 4[1a0] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 02 : 6[1c0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 02 : 5[1b0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 02 : 7[1d0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 02 : 1[170] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 02 : 2[180] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 03 : 0[160] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 03 : 3[190] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 03 : 4[1a0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 03 : 6[1c0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 03 : 5[1b0] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 03 : 7[1d0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 03 : 1[170] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 03 : 2[180] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 03 : 3[190] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 03 : 4[1a0] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 03 : 5[1b0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 03 : 6[1c0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 03 : 7[1d0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 03 : 1[170] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 03 : 2[180] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 04 : 3[190] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 04 : 0[160] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 04 : 4[1a0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 04 : 5[1b0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 04 : 6[1c0] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 04 : 7[1d0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 04 : 1[170] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 04 : 2[180] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 04 : 3[190] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 04 : 2[180] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 04 : 4[1a0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 04 : 5[1b0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 04 : 6[1c0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 04 : 7[1d0] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 04 : 1[170] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 05 : 2[180] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 05 : 3[190] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 05 : 4[1a0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 05 : 0[160] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 05 : 5[1b0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 05 : 6[1c0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 05 : 7[1d0] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 05 : 1[170] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 05 : 2[180] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 05 : 1[170] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 05 : 4[1a0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 05 : 3[190] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 05 : 5[1b0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 05 : 6[1c0] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 05 : 7[1d0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 06 : 0[160] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 06 : 1[170] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 06 : 2[180] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 06 : 4[1a0] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 06 : 3[190] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 06 : 5[1b0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 06 : 6[1c0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 06 : 7[1d0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 06 : 4[1a0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 06 : 1[170] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 06 : 2[180] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 06 : 3[190] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 06 : 5[1b0] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 06 : 6[1c0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 06 : 7[1d0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 07 : 4[1a0] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 07 : 0[160] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 07 : 1[170] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 07 : 2[180] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 07 : 3[190] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 07 : 5[1b0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 07 : 6[1c0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 07 : 7[1d0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 07 : 4[1a0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 07 : 1[170] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 07 : 2[180] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 07 : 3[190] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 07 : 5[1b0] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 07 : 6[1c0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 07 : 7[1d0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 08 : 4[1a0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 08 : 0[160] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 08 : 1[170] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 08 : 2[180] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 08 : 3[190] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 08 : 5[1b0] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 08 : 6[1c0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 08 : 7[1d0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 08 : 3[190] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 08 : 1[170] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 08 : 4[1a0] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 08 : 2[180] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 08 : 5[1b0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 08 : 6[1c0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 08 : 7[1d0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 09 : 3[190] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 09 : 0[160] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 09 : 1[170] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 09 : 2[180] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 09 : 4[1a0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 09 : 5[1b0] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 09 : 6[1c0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 09 : 7[1d0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 09 : 3[190] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 09 : 1[170] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 09 : 2[180] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 09 : 4[1a0] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 09 : 5[1b0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 09 : 6[1c0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 09 : 7[1d0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 10 : 3[190] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 10 : 0[160] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 10 : 1[170] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 10 : 2[180] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 10 : 4[1a0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 10 : 5[1b0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 10 : 6[1c0] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 10 : 7[1d0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 10 : 2[180] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 10 : 1[170] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 10 : 3[190] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 10 : 4[1a0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 10 : 5[1b0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 10 : 6[1c0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 10 : 7[1d0] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Ring 11 : 0[160] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 11 : 2[180] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 11 : 1[170] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 11 : 4[1a0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 11 : 3[190] -> 1[170] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 11 : 5[1b0] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 11 : 6[1c0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 11 : 7[1d0] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO Ring 11 : 1[170] -> 3[190] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO Ring 11 : 2[180] -> 0[160] via P2P/IPC tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO Ring 11 : 4[1a0] -> 6[1c0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:15:574 [1] NCCL INFO comm 0x7fe17c391da0 rank 1 nranks 8 cudaDev 1 busId 170 - Init COMPLETE tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO Ring 11 : 3[190] -> 7[1d0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO Ring 11 : 5[1b0] -> 4[1a0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO comm 0x7f0ad4385080 rank 0 nranks 8 cudaDev 0 busId 160 - Init COMPLETE tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO Ring 11 : 6[1c0] -> 2[180] via P2P/IPC tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO Ring 11 : 7[1d0] -> 5[1b0] via P2P/IPC tensorflow-benchmarks-efa-worker-0:16:575 [2] NCCL INFO comm 0x7f677c392ad0 rank 2 nranks 8 cudaDev 2 busId 180 - Init COMPLETE tensorflow-benchmarks-efa-worker-0:18:576 [4] NCCL INFO comm 0x7f2cdc35d550 rank 4 nranks 8 cudaDev 4 busId 1a0 - Init COMPLETE tensorflow-benchmarks-efa-worker-0:17:589 [3] NCCL INFO comm 0x7efd2035dfb0 rank 3 nranks 8 cudaDev 3 busId 190 - Init COMPLETE tensorflow-benchmarks-efa-worker-0:19:593 [5] NCCL INFO comm 0x7fc0c4362de0 rank 5 nranks 8 cudaDev 5 busId 1b0 - Init COMPLETE tensorflow-benchmarks-efa-worker-0:20:592 [6] NCCL INFO comm 0x7f758035dea0 rank 6 nranks 8 cudaDev 6 busId 1c0 - Init COMPLETE tensorflow-benchmarks-efa-worker-0:21:578 [7] NCCL INFO comm 0x7f1ffc365550 rank 7 nranks 8 cudaDev 7 busId 1d0 - Init COMPLETE tensorflow-benchmarks-efa-worker-0:14:577 [0] NCCL INFO Launch mode Parallel Done warm up Step Img/sec total_loss Done warm up Step Img/sec total_loss Done warm up Step Img/sec total_loss Done warm up Done warm up Step Img/sec total_loss Done warm up Step Img/sec total_loss Step Img/sec total_loss Done warm up Step Img/sec total_loss Done warm up Step Img/sec total_loss 1 images/sec: 368.8 +/- 0.0 (jitter = 0.0) 8.007 1 images/sec: 368.9 +/- 0.0 (jitter = 0.0) 7.826 1 images/sec: 368.7 +/- 0.0 (jitter = 0.0) 7.900 1 images/sec: 368.6 +/- 0.0 (jitter = 0.0) 7.827 1 images/sec: 369.2 +/- 0.0 (jitter = 0.0) 7.836 1 images/sec: 368.2 +/- 0.0 (jitter = 0.0) 8.052 1 images/sec: 368.7 +/- 0.0 (jitter = 0.0) 8.180 1 images/sec: 367.7 +/- 0.0 (jitter = 0.0) 7.824 10 images/sec: 368.6 +/- 0.4 (jitter = 1.0) 8.000 10 images/sec: 368.6 +/- 0.4 (jitter = 0.8) 7.714 10 images/sec: 368.6 +/- 0.4 (jitter = 0.6) 7.881 10 images/sec: 368.6 +/- 0.4 (jitter = 1.5) 7.744 10 images/sec: 368.6 +/- 0.4 (jitter = 0.8) 7.749 10 images/sec: 368.6 +/- 0.3 (jitter = 1.3) 7.836 10 images/sec: 368.5 +/- 0.4 (jitter = 1.1) 7.793 10 images/sec: 368.6 +/- 0.4 (jitter = 1.7) 7.770 20 images/sec: 368.7 +/- 0.3 (jitter = 0.9) 7.834 20 images/sec: 368.8 +/- 0.2 (jitter = 0.9) 7.781 20 images/sec: 368.7 +/- 0.2 (jitter = 1.2) 7.772 20 images/sec: 368.7 +/- 0.3 (jitter = 1.0) 7.703 20 images/sec: 368.7 +/- 0.3 (jitter = 0.9) 7.770 20 images/sec: 368.7 +/- 0.2 (jitter = 0.9) 7.792 20 images/sec: 368.8 +/- 0.2 (jitter = 1.2) 7.731 20 images/sec: 368.7 +/- 0.3 (jitter = 1.2) 7.722 30 images/sec: 368.5 +/- 0.2 (jitter = 1.1) 7.511 30 images/sec: 368.5 +/- 0.2 (jitter = 0.9) 7.848 30 images/sec: 368.5 +/- 0.2 (jitter = 1.0) 7.864 30 images/sec: 368.5 +/- 0.2 (jitter = 0.8) 7.730 30 images/sec: 368.5 +/- 0.2 (jitter = 1.2) 7.615 30 images/sec: 368.5 +/- 0.2 (jitter = 0.9) 7.705 30 images/sec: 368.5 +/- 0.2 (jitter = 1.2) 7.682 30 images/sec: 368.5 +/- 0.2 (jitter = 0.8) 7.659 40 images/sec: 368.4 +/- 0.2 (jitter = 0.9) 7.475 40 images/sec: 368.4 +/- 0.2 (jitter = 0.9) 7.713 40 images/sec: 368.4 +/- 0.2 (jitter = 1.3) 7.637 40 images/sec: 368.4 +/- 0.2 (jitter = 1.2) 7.682 40 images/sec: 368.4 +/- 0.2 (jitter = 1.0) 7.629 40 images/sec: 368.4 +/- 0.2 (jitter = 1.0) 7.769 40 images/sec: 368.4 +/- 0.2 (jitter = 1.2) 7.586 40 images/sec: 368.4 +/- 0.2 (jitter = 1.1) 7.544 50 images/sec: 368.4 +/- 0.2 (jitter = 1.1) 7.614 50 images/sec: 368.4 +/- 0.2 (jitter = 0.9) 7.784 50 images/sec: 368.4 +/- 0.2 (jitter = 1.3) 7.620 50 images/sec: 368.4 +/- 0.2 (jitter = 1.2) 7.597 50 images/sec: 368.4 +/- 0.2 (jitter = 1.0) 7.542 50 images/sec: 368.4 +/- 0.2 (jitter = 0.9) 7.584 50 images/sec: 368.4 +/- 0.2 (jitter = 1.1) 7.732 50 images/sec: 368.4 +/- 0.2 (jitter = 1.0) 7.662 60 images/sec: 368.4 +/- 0.2 (jitter = 1.2) 7.622 60 images/sec: 368.3 +/- 0.2 (jitter = 1.1) 7.553 60 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.589 60 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.534 60 images/sec: 368.3 +/- 0.2 (jitter = 1.1) 7.584 60 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.654 60 images/sec: 368.3 +/- 0.2 (jitter = 1.0) 7.616 60 images/sec: 368.3 +/- 0.2 (jitter = 1.1) 7.475 70 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.514 70 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.578 70 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.536 70 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.627 70 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.621 70 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.416 70 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.532 70 images/sec: 368.3 +/- 0.1 (jitter = 1.2) 7.583 80 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.564 80 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.521 80 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.509 80 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.523 80 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.483 80 images/sec: 368.3 +/- 0.1 (jitter = 1.2) 7.590 80 images/sec: 368.3 +/- 0.1 (jitter = 1.2) 7.513 80 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.472 90 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.484 90 images/sec: 368.3 +/- 0.1 (jitter = 1.2) 7.453 90 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.525 90 images/sec: 368.3 +/- 0.1 (jitter = 1.1) 7.484 90 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.551 90 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.493 90 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.499 90 images/sec: 368.3 +/- 0.1 (jitter = 1.0) 7.516 100 images/sec: 368.4 +/- 0.1 (jitter = 1.2) 7.504 ---------------------------------------------------------------- total images/sec: 2946.03 ---------------------------------------------------------------- 100 images/sec: 368.4 +/- 0.1 (jitter = 1.0) 7.450 ---------------------------------------------------------------- total images/sec: 2946.00 ---------------------------------------------------------------- 100 images/sec: 368.4 +/- 0.1 (jitter = 1.0) 7.458 ---------------------------------------------------------------- total images/sec: 2945.98 ---------------------------------------------------------------- 100 images/sec: 368.4 +/- 0.1 (jitter = 1.2) 7.491 ---------------------------------------------------------------- total images/sec: 2945.95 ---------------------------------------------------------------- 100 images/sec: 368.4 +/- 0.1 (jitter = 1.0) 7.442 100 images/sec: 368.4 +/- 0.1 (jitter = 1.0) 7.527 ---------------------------------------------------------------- total images/sec: 2945.98 ---------------------------------------------------------------- ---------------------------------------------------------------- total images/sec: 2946.00 ---------------------------------------------------------------- 100 images/sec: 368.4 +/- 0.1 (jitter = 1.0) 7.467 ---------------------------------------------------------------- total images/sec: 2945.89 ---------------------------------------------------------------- 100 images/sec: 368.4 +/- 0.1 (jitter = 1.0) 7.454 ---------------------------------------------------------------- total images/sec: 2945.90 ---------------------------------------------------------------- # gaogilb @ a483e7024cd8 in /Volumes/unix/workplace/eks-efa-work/src/EKSEFAWorker on git:mainline x [0:47:09] $ cat mpi-tf-bc.yaml apiVersion: kubeflow.org/v1alpha2 kind: MPIJob metadata: name: tensorflow-benchmarks-efa spec: slotsPerWorker: 8 cleanPodPolicy: Running mpiReplicaSpecs: Launcher: replicas: 1 template: spec: containers: - image: 898739678081.dkr.ecr.us-west-2.amazonaws.com/efa-eks-benchmark:0.2gamma name: tensorflow-benchmarks-efa env: - name: LD_LIBRARY_PATH value: /efa/lib:/usr/local/lib:/nccl/build/lib:/aws-ofi-nccl/install/lib:$LD_LIBRARY_PATH command: ["/bin/sh"] args: ["-c", "mpirun \ --allow-run-as-root \ -x OMPI_ALLOW_RUN_AS_ROOT=1 \ -x OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \ -np 8 \ -x NCCL_DEBUG=INFO \ -x NCCL_ALGO=ring \ -x FI_EFA_TX_MIN_CREDITS=32 \ -x FI_EFA_ENABLE_SHM_TRANSFER=0 \ --mca plm_rsh_no_tree_spawn 1 \ --bind-to none --mca pml ob1 \ --mca btl_vader_single_copy_mechanism none \ --mca oob_tcp_if_include eth0 \ --mca btl_tcp_if_include eth0 \ --mca btl \ ^openib \ python \ benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py \ --model=resnet50 \ --batch_size=64 \ --variable_update=horovod"] Worker: replicas: 2 template: spec: containers: - image: 898739678081.dkr.ecr.us-west-2.amazonaws.com/efa-eks-benchmark:0.2gamma name: tensorflow-benchmarks-efa securityContext: privileged: true volumeMounts: - mountPath: /dev/infiniband/uverbs0 name: infiniband-efa resources: limits: nvidia.com/gpu: 4 volumes: - name: infiniband-efa hostPath: path: /dev/infiniband/uverbs0 # gaogilb @ a483e7024cd8 in /Volumes/unix/workplace/eks-efa-work/src/EKSEFAWorker on git:mainline x [0:47:32] $