$ k logs -f nccl-test-debug-launcher-cptfj
+ POD_NAME=nccl-test-debug-worker-0
+ shift
+ /opt/kube/kubectl exec nccl-test-debug-worker-0 -- /bin/sh -c        PATH=/opt/amazon/openmpi/bin:$PATH ; export PATH ; LD_LIBRARY_PATH=/opt/amazon/openmpi/lib:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; DYLD_LIBRARY_PATH=/opt/amazon/openmpi/lib:$DYLD_LIBRARY_PATH ; export DYLD_LIBRARY_PATH ;   /opt/amazon/openmpi/bin/orted -mca ess "env" -mca ess_base_jobid "1008140288" -mca ess_base_vpid 1 -mca ess_base_num_procs "3" -mca orte_node_regex "nccl-test-debug-launcher-cptfj,nccl-test-debug-worker-[1:0-1]@0(3)" -mca orte_hnp_uri "1008140288.0;tcp://192.168.9.191:60385" --mca plm_rsh_no_tree_spawn "1" --mca pml "ob1" --mca mtl "ofi" --mca mtl_ofi_provider_include "efa" --mca oob_tcp_if_include "eth0" --mca btl_tcp_if_include "eth0" -mca plm "rsh" -mca orte_default_hostfile "/etc/mpi/hostfile" -mca plm_rsh_agent "/etc/mpi/kubexec.sh" -mca rmaps_ppr_n_pernode "1" -mca hwloc_base_binding_policy "none" -mca rmaps_base_oversubscribe "1" -mca pmix "^s1,s2,cray,isolated"
+ POD_NAME=nccl-test-debug-worker-1
+ shift
+ /opt/kube/kubectl exec nccl-test-debug-worker-1 -- /bin/sh -c        PATH=/opt/amazon/openmpi/bin:$PATH ; export PATH ; LD_LIBRARY_PATH=/opt/amazon/openmpi/lib:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; DYLD_LIBRARY_PATH=/opt/amazon/openmpi/lib:$DYLD_LIBRARY_PATH ; export DYLD_LIBRARY_PATH ;   /opt/amazon/openmpi/bin/orted -mca ess "env" -mca ess_base_jobid "1008140288" -mca ess_base_vpid 2 -mca ess_base_num_procs "3" -mca orte_node_regex "nccl-test-debug-launcher-cptfj,nccl-test-debug-worker-[1:0-1]@0(3)" -mca orte_hnp_uri "1008140288.0;tcp://192.168.9.191:60385" --mca plm_rsh_no_tree_spawn "1" --mca pml "ob1" --mca mtl "ofi" --mca mtl_ofi_provider_include "efa" --mca oob_tcp_if_include "eth0" --mca btl_tcp_if_include "eth0" -mca plm "rsh" -mca orte_default_hostfile "/etc/mpi/hostfile" -mca plm_rsh_agent "/etc/mpi/kubexec.sh" -mca rmaps_ppr_n_pernode "1" -mca hwloc_base_binding_policy "none" -mca rmaps_base_oversubscribe "1" -mca pmix "^s1,s2,cray,isolated"
# nThread 1 nGpus 1 minBytes 8 maxBytes 2147483648 step: 2(factor) warmup iters: 5 iters: 100 validation: 1
#
# Using devices
#   Rank  0 Pid     14 on nccl-test-debug-worker-0 device  0 [0x00] Tesla V100-SXM2-32GB
#   Rank  1 Pid     14 on nccl-test-debug-worker-1 device  0 [0x00] Tesla V100-SXM2-32GB
nccl-test-debug-worker-0:14:14 [0] NCCL INFO Bootstrap : Using [0]eth0:192.168.5.165<0>
nccl-test-debug-worker-0:14:14 [0] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1.
nccl-test-debug-worker-0:14:14 [0] NCCL INFO NET/OFI Forcing AWS OFI ndev 4
nccl-test-debug-worker-0:14:14 [0] NCCL INFO NET/OFI Selected Provider is efa
nccl-test-debug-worker-0:14:14 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol.
nccl-test-debug-worker-0:14:14 [0] NCCL INFO Using network AWS Libfabric
nccl-test-debug-worker-1:14:14 [0] NCCL INFO Bootstrap : Using [0]eth0:192.168.22.129<0>
nccl-test-debug-worker-1:14:14 [0] NCCL INFO NET/OFI Setting RDMAV_FORK_SAFE environment variable to 1.
NCCL version 2.6.4+cuda10.2
nccl-test-debug-worker-1:14:14 [0] NCCL INFO NET/OFI Forcing AWS OFI ndev 4
nccl-test-debug-worker-1:14:14 [0] NCCL INFO NET/OFI Selected Provider is efa
nccl-test-debug-worker-1:14:14 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v3 symbol.
nccl-test-debug-worker-1:14:14 [0] NCCL INFO Using network AWS Libfabric
nccl-test-debug-worker-0:14:20 [0] NCCL INFO NET/OFI [0] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00/
nccl-test-debug-worker-0:14:20 [0] NCCL INFO NET/OFI [0] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-0:14:20 [0] NCCL INFO NET/OFI [0] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-0:14:20 [0] NCCL INFO NET/OFI [0] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-1:14:19 [0] NCCL INFO NET/OFI [0] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00/
nccl-test-debug-worker-1:14:19 [0] NCCL INFO NET/OFI [0] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-1:14:19 [0] NCCL INFO NET/OFI [0] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-1:14:19 [0] NCCL INFO NET/OFI [0] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-1:14:19 [0] NCCL INFO NET/OFI [0] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00/
nccl-test-debug-worker-1:14:19 [0] NCCL INFO NET/OFI [0] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-1:14:19 [0] NCCL INFO NET/OFI [0] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-1:14:19 [0] NCCL INFO NET/OFI [0] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-0:14:20 [0] NCCL INFO NET/OFI [0] getCudaPath dev 0 busId 0000:00:16.0 path /sys/devices/pci0000:00/
nccl-test-debug-worker-0:14:20 [0] NCCL INFO NET/OFI [0] getCudaPath dev 1 busId 0000:00:17.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-0:14:20 [0] NCCL INFO NET/OFI [0] getCudaPath dev 2 busId 0000:00:18.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-0:14:20 [0] NCCL INFO NET/OFI [0] getCudaPath dev 3 busId 0000:00:19.0 path /sys/devices/pci0000:00
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Channel 00/04 :    0   1
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Channel 01/04 :    0   1
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Channel 02/04 :    0   1
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Channel 03/04 :    0   1
nccl-test-debug-worker-0:14:20 [0] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 8/8/64
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1|-1->0->1/-1/-1 [1] 1/-1/-1->0->-1|-1->0->1/-1/-1 [2] -1/-1/-1->0->1|1->0->-1/-1/-1 [3] -1/-1/-1->0->1|1->0->-1/-1/-1
nccl-test-debug-worker-1:14:19 [0] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 8/8/64
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Trees [0] -1/-1/-1->1->0|0->1->-1/-1/-1 [1] -1/-1/-1->1->0|0->1->-1/-1/-1 [2] 0/-1/-1->1->-1|-1->1->0/-1/-1 [3] 0/-1/-1->1->-1|-1->1->0/-1/-1
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Ring 00 : 1[160] -> 0[160] [receive] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Ring 00 : 0[160] -> 1[160] [receive] via NET/AWS Libfabric/0
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Ring 00 : 0[160] -> 1[160] [send] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Ring 00 : 1[160] -> 0[160] [send] via NET/AWS Libfabric/0
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Ring 01 : 1[160] -> 0[160] [receive] via NET/AWS Libfabric/0
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Ring 01 : 0[160] -> 1[160] [send] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Ring 01 : 0[160] -> 1[160] [receive] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Ring 01 : 1[160] -> 0[160] [send] via NET/AWS Libfabric/0
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Ring 02 : 1[160] -> 0[160] [receive] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Ring 02 : 0[160] -> 1[160] [receive] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Ring 02 : 1[160] -> 0[160] [send] via NET/AWS Libfabric/0
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Ring 02 : 0[160] -> 1[160] [send] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Ring 03 : 0[160] -> 1[160] [receive] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO Ring 03 : 1[160] -> 0[160] [send] via NET/AWS Libfabric/0
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Ring 03 : 1[160] -> 0[160] [receive] via NET/AWS Libfabric/0
nccl-test-debug-worker-0:14:20 [0] NCCL INFO Ring 03 : 0[160] -> 1[160] [send] via NET/AWS Libfabric/0
nccl-test-debug-worker-1:14:19 [0] NCCL INFO comm 0x7fb27c000dc0 rank 1 nranks 2 cudaDev 0 busId 160 - Init COMPLETE
nccl-test-debug-worker-0:14:20 [0] NCCL INFO comm 0x7f7108000dc0 rank 0 nranks 2 cudaDev 0 busId 160 - Init COMPLETE
#
#                                                     out-of-place                       in-place
#       size         count    type   redop     time   algbw   busbw  error     time   algbw   busbw  error
#        (B)    (elements)                     (us)  (GB/s)  (GB/s)            (us)  (GB/s)  (GB/s)
nccl-test-debug-worker-0:14:14 [0] NCCL INFO Launch mode Parallel
           8             2   float     sum    53.45    0.00    0.00  0e+00    52.83    0.00    0.00  0e+00
          16             4   float     sum    51.32    0.00    0.00  0e+00    52.92    0.00    0.00  0e+00
          32             8   float     sum    52.32    0.00    0.00  0e+00    51.87    0.00    0.00  0e+00
          64            16   float     sum    52.69    0.00    0.00  0e+00    51.81    0.00    0.00  0e+00
         128            32   float     sum    52.19    0.00    0.00  0e+00    52.20    0.00    0.00  0e+00
         256            64   float     sum    53.97    0.00    0.00  0e+00    53.23    0.00    0.00  0e+00
         512           128   float     sum    54.60    0.01    0.01  0e+00    54.56    0.01    0.01  0e+00
        1024           256   float     sum    60.01    0.02    0.02  0e+00    59.00    0.02    0.02  0e+00
        2048           512   float     sum    60.56    0.03    0.03  0e+00    61.16    0.03    0.03  0e+00
        4096          1024   float     sum    64.32    0.06    0.06  0e+00    65.09    0.06    0.06  0e+00
        8192          2048   float     sum    71.07    0.12    0.12  0e+00    70.22    0.12    0.12  0e+00
       16384          4096   float     sum    77.29    0.21    0.21  0e+00    77.38    0.21    0.21  0e+00
       32768          8192   float     sum    96.55    0.34    0.34  0e+00    98.55    0.33    0.33  0e+00
       65536         16384   float     sum    146.8    0.45    0.45  0e+00    145.7    0.45    0.45  0e+00
      131072         32768   float     sum    189.9    0.69    0.69  0e+00    182.5    0.72    0.72  0e+00
      262144         65536   float     sum    255.9    1.02    1.02  0e+00    255.5    1.03    1.03  0e+00
      524288        131072   float     sum    354.9    1.48    1.48  0e+00    353.8    1.48    1.48  0e+00
     1048576        262144   float     sum    625.6    1.68    1.68  0e+00    622.1    1.69    1.69  0e+00
     2097152        524288   float     sum   1092.0    1.92    1.92  0e+00   1085.8    1.93    1.93  0e+00
     4194304       1048576   float     sum   1961.0    2.14    2.14  0e+00   1948.0    2.15    2.15  0e+00
     8388608       2097152   float     sum   3657.2    2.29    2.29  0e+00   3647.5    2.30    2.30  0e+00
    16777216       4194304   float     sum   7044.7    2.38    2.38  0e+00   7080.4    2.37    2.37  0e+00
    33554432       8388608   float     sum    13742    2.44    2.44  0e+00    13950    2.41    2.41  0e+00
    67108864      16777216   float     sum    26935    2.49    2.49  0e+00    27179    2.47    2.47  0e+00
   134217728      33554432   float     sum    53375    2.51    2.51  0e+00    53287    2.52    2.52  0e+00
   268435456      67108864   float     sum   106254    2.53    2.53  0e+00   105937    2.53    2.53  0e+00
   536870912     134217728   float     sum   211701    2.54    2.54  0e+00   211068    2.54    2.54  0e+00
  1073741824     268435456   float     sum   421915    2.54    2.54  0e+00   421391    2.55    2.55  0e+00
  2147483648     536870912   float     sum   842028    2.55    2.55  0e+00   842996    2.55    2.55  0e+00
# Out of bounds values : 0 OK
# Avg bus bandwidth    : 1.11948