#--------------------------------------------------- # PALLAS MPI Benchmark Suite V2.2, MPI-1 part #--------------------------------------------------- # Date : Wed Sep 17 09:14:30 2003 # Machine : i686# System : Linux # Release : 2.4.21 # Version : #2 SMP Sun Jul 13 19:21:19 EDT 2003 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 1.08 0.00 1 1000 1.09 0.88 2 1000 1.09 1.76 4 1000 1.15 3.32 8 1000 1.14 6.69 16 1000 1.22 12.47 32 1000 1.20 25.37 64 1000 1.30 46.97 128 1000 1.49 81.87 256 1000 1.73 141.45 512 1000 1.95 251.04 1024 1000 2.46 397.06 2048 1000 3.39 575.38 4096 1000 5.19 751.99 8192 1000 8.77 890.87 16384 1000 18.49 844.87 32768 1000 33.99 919.28 65536 640 66.20 944.08 131072 320 218.58 571.88 262144 160 544.97 458.74 524288 80 1089.19 459.06 1048576 40 2197.82 455.00 2097152 20 4404.50 454.08 4194304 10 8927.35 448.06 #--------------------------------------------------- # Benchmarking PingPing # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 2.00 0.00 1 1000 2.32 0.41 2 1000 2.35 0.81 4 1000 2.42 1.58 8 1000 2.42 3.15 16 1000 2.48 6.17 32 1000 2.51 12.18 64 1000 2.63 23.24 128 1000 2.88 42.40 256 1000 3.41 71.55 512 1000 3.80 128.36 1024 1000 4.76 205.20 2048 1000 6.48 301.22 4096 1000 10.01 390.31 8192 1000 17.02 459.15 16384 1000 31.45 496.79 32768 1000 58.19 537.04 65536 640 115.60 540.68 131072 320 422.01 296.20 262144 160 1092.52 228.83 524288 80 2230.65 224.15 1048576 40 4470.12 223.71 2097152 20 8936.60 223.80 4194304 10 17977.40 222.50 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.00 2.00 2.00 0.00 1 1000 2.13 2.13 2.13 0.89 2 1000 2.18 2.19 2.19 1.75 4 1000 2.23 2.24 2.23 3.41 8 1000 2.31 2.31 2.31 6.61 16 1000 2.30 2.30 2.30 13.28 32 1000 2.29 2.29 2.29 26.68 64 1000 2.43 2.43 2.43 50.15 128 1000 2.66 2.66 2.66 91.71 256 1000 3.05 3.05 3.05 159.94 512 1000 3.61 3.61 3.61 270.82 1024 1000 4.44 4.44 4.44 439.50 2048 1000 6.18 6.18 6.18 632.28 4096 1000 9.60 9.60 9.60 813.64 8192 1000 16.08 16.08 16.08 971.46 16384 1000 31.81 31.81 31.81 982.49 32768 1000 58.42 58.42 58.42 1069.86 65536 640 115.23 115.24 115.23 1084.73 131072 320 422.09 422.14 422.12 592.22 262144 160 1096.93 1097.02 1096.98 455.78 524288 80 2238.36 2238.75 2238.56 446.68 1048576 40 4478.10 4478.10 4478.10 446.62 2097152 20 8951.05 8951.15 8951.10 446.87 4194304 10 17976.39 17977.09 17976.74 445.01 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 10.22 10.23 10.23 0.00 1 1000 10.26 10.28 10.27 0.19 2 1000 10.27 10.29 10.28 0.37 4 1000 10.32 10.34 10.33 0.74 8 1000 10.26 10.28 10.27 1.48 16 1000 10.19 10.21 10.20 2.99 32 1000 10.12 10.15 10.14 6.02 64 1000 10.21 10.24 10.23 11.92 128 1000 10.71 10.74 10.72 22.74 256 1000 11.21 11.23 11.22 43.50 512 1000 11.91 11.93 11.92 81.84 1024 1000 13.32 13.35 13.33 146.32 2048 1000 17.55 17.61 17.58 221.87 4096 1000 28.47 28.49 28.48 274.18 8192 1000 44.54 44.60 44.58 350.31 16384 1000 162.63 162.72 162.68 192.05 32768 1000 233.08 233.25 233.16 267.96 65536 640 379.04 379.49 379.27 329.39 131072 320 719.45 721.18 720.33 346.65 262144 160 1583.29 1590.36 1586.86 314.39 524288 80 3160.43 3188.05 3174.56 313.67 1048576 40 6225.23 6334.80 6281.29 315.72 2097152 20 11408.10 11838.10 11627.37 337.89 4194304 10 24166.00 25889.90 25043.60 309.00 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 10.08 10.11 10.09 0.00 1 1000 10.12 10.14 10.13 0.19 2 1000 10.10 10.15 10.13 0.38 4 1000 10.11 10.13 10.12 0.75 8 1000 10.10 10.12 10.10 1.51 16 1000 10.16 10.19 10.18 3.00 32 1000 10.26 10.28 10.27 5.94 64 1000 10.33 10.35 10.34 11.79 128 1000 10.92 10.96 10.94 22.28 256 1000 11.62 11.64 11.63 41.93 512 1000 12.53 12.56 12.54 77.76 1024 1000 14.18 14.23 14.20 137.30 2048 1000 18.98 19.00 18.99 205.55 4096 1000 30.17 30.22 30.20 258.56 8192 1000 47.61 47.68 47.64 327.73 16384 1000 160.32 160.49 160.39 194.71 32768 1000 259.50 259.89 259.71 240.48 65536 640 450.34 451.42 450.92 276.90 131072 320 820.03 823.18 821.63 303.70 262144 160 1727.30 1742.48 1735.42 286.95 524288 80 3243.18 3310.76 3279.63 302.05 1048576 40 6410.10 6543.57 6477.07 305.64 2097152 20 12029.10 13115.40 12590.93 304.98 4194304 10 22042.80 26430.20 24397.69 302.68 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 10.05 10.08 10.06 0.00 1 1000 10.15 10.18 10.17 0.19 2 1000 10.10 10.12 10.11 0.38 4 1000 10.10 10.13 10.12 0.75 8 1000 10.12 10.15 10.13 1.50 16 1000 10.25 10.29 10.27 2.97 32 1000 10.27 10.30 10.28 5.92 64 1000 10.38 10.41 10.40 11.72 128 1000 10.96 11.00 10.99 22.20 256 1000 11.87 11.91 11.90 40.98 512 1000 12.76 12.81 12.78 76.24 1024 1000 14.59 14.64 14.61 133.45 2048 1000 19.39 19.48 19.44 200.52 4096 1000 30.12 30.19 30.15 258.81 8192 1000 48.63 48.73 48.69 320.62 16384 1000 164.84 165.25 165.02 189.11 32768 1000 260.05 260.90 260.46 239.55 65536 640 461.73 463.95 462.87 269.43 131072 320 822.53 830.41 827.31 301.06 262144 160 1704.20 1744.24 1725.13 286.66 524288 80 3347.37 3496.25 3423.37 286.02 1048576 40 6553.45 6815.25 6690.46 293.46 2097152 20 12798.35 13729.95 13284.27 291.33 4194304 10 23229.49 27606.50 26074.74 289.79 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 32 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 10.07 10.10 10.08 0.00 1 1000 10.09 10.15 10.12 0.19 2 1000 10.11 10.14 10.13 0.38 4 1000 10.10 10.14 10.11 0.75 8 1000 10.13 10.17 10.15 1.50 16 1000 10.23 10.26 10.24 2.97 32 1000 10.28 10.32 10.30 5.92 64 1000 10.37 10.41 10.39 11.72 128 1000 11.07 11.11 11.09 21.97 256 1000 11.85 11.90 11.87 41.04 512 1000 12.96 13.01 12.98 75.08 1024 1000 14.54 14.59 14.56 133.87 2048 1000 19.04 19.15 19.10 203.96 4096 1000 30.16 30.24 30.21 258.38 8192 1000 48.18 48.32 48.26 323.39 16384 1000 164.48 165.24 164.93 189.12 32768 1000 264.11 265.90 264.97 235.05 65536 640 438.79 442.45 440.77 282.51 131072 320 822.51 833.43 829.29 299.96 262144 160 1713.40 1773.08 1742.60 282.00 524288 80 3306.91 3544.20 3422.22 282.15 1048576 40 6404.73 7139.25 6798.39 280.14 2097152 20 12471.65 14344.25 13347.59 278.86 4194304 10 21373.59 29716.80 25431.53 269.21 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 3.58 3.58 3.58 0.00 1 1000 4.18 4.18 4.18 0.91 2 1000 4.19 4.19 4.19 1.82 4 1000 4.32 4.32 4.32 3.53 8 1000 4.51 4.51 4.51 6.77 16 1000 4.48 4.48 4.48 13.63 32 1000 4.59 4.59 4.59 26.62 64 1000 4.75 4.75 4.75 51.38 128 1000 5.37 5.37 5.37 90.96 256 1000 6.06 6.06 6.06 161.07 512 1000 6.95 6.95 6.95 280.90 1024 1000 8.76 8.76 8.76 445.97 2048 1000 12.25 12.25 12.25 637.75 4096 1000 19.60 19.60 19.60 797.07 8192 1000 33.47 33.47 33.47 933.64 16384 1000 64.08 64.08 64.08 975.28 32768 1000 117.21 117.21 117.21 1066.49 65536 640 232.40 232.41 232.41 1075.69 131072 320 875.52 875.56 875.54 571.06 262144 160 2196.89 2196.99 2196.94 455.17 524288 80 4483.08 4483.29 4483.18 446.10 1048576 40 8947.55 8947.75 8947.65 447.04 2097152 20 17939.35 17939.40 17939.37 445.95 4194304 10 36054.00 36055.10 36054.55 443.77 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 20.18 20.20 20.19 0.00 1 1000 20.02 20.05 20.04 0.19 2 1000 20.00 20.02 20.01 0.38 4 1000 19.92 19.94 19.93 0.77 8 1000 19.79 19.81 19.80 1.54 16 1000 20.38 20.41 20.40 2.99 32 1000 20.29 20.31 20.30 6.01 64 1000 20.41 20.43 20.42 11.95 128 1000 21.72 21.74 21.74 22.46 256 1000 22.58 22.61 22.59 43.20 512 1000 23.86 23.88 23.87 81.78 1024 1000 26.43 26.46 26.44 147.62 2048 1000 34.86 34.90 34.88 223.82 4096 1000 55.96 56.03 56.00 278.87 8192 1000 89.66 89.73 89.69 348.28 16384 1000 258.38 258.46 258.42 241.82 32768 1000 383.45 383.46 383.45 325.98 65536 640 728.72 728.91 728.83 342.98 131072 320 1873.48 1874.63 1874.06 266.72 262144 160 3869.21 3876.84 3873.96 257.94 524288 80 8391.30 8416.84 8406.70 237.62 1048576 40 15349.65 15389.20 15369.41 259.92 2097152 20 31108.15 31511.65 31357.68 253.87 4194304 10 61643.70 63278.71 62676.10 252.85 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 20.79 20.83 20.82 0.00 1 1000 20.90 20.93 20.91 0.18 2 1000 21.11 21.13 21.12 0.36 4 1000 20.87 20.89 20.88 0.73 8 1000 20.82 20.85 20.83 1.46 16 1000 21.06 21.07 21.06 2.90 32 1000 21.09 21.13 21.11 5.78 64 1000 21.32 21.35 21.33 11.44 128 1000 22.56 22.59 22.58 21.61 256 1000 23.58 23.62 23.60 41.35 512 1000 25.04 25.08 25.06 77.88 1024 1000 27.88 27.92 27.90 139.91 2048 1000 37.16 37.18 37.17 210.12 4096 1000 60.76 60.83 60.80 256.86 8192 1000 115.55 115.71 115.63 270.08 16384 1000 309.27 309.34 309.30 202.04 32768 1000 562.66 562.91 562.78 222.06 65536 640 1118.44 1119.32 1118.99 223.35 131072 320 2619.91 2628.18 2624.66 190.25 262144 160 5102.24 5135.19 5121.41 194.73 524288 80 9987.34 10112.12 10061.74 197.78 1048576 40 17095.93 17285.83 17236.28 231.40 2097152 20 35280.00 37370.65 36501.73 214.07 4194304 10 62960.10 66134.10 65072.87 241.93 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 21.00 21.04 21.02 0.00 1 1000 21.03 21.06 21.04 0.18 2 1000 21.04 21.09 21.06 0.36 4 1000 21.06 21.10 21.07 0.72 8 1000 21.00 21.04 21.01 1.45 16 1000 21.11 21.16 21.13 2.88 32 1000 21.19 21.22 21.21 5.75 64 1000 21.35 21.40 21.38 11.41 128 1000 22.68 22.72 22.71 21.49 256 1000 23.58 23.64 23.60 41.32 512 1000 25.25 25.31 25.28 77.17 1024 1000 28.52 28.58 28.55 136.66 2048 1000 36.49 36.54 36.52 213.78 4096 1000 60.33 60.47 60.40 258.39 8192 1000 109.89 110.20 110.06 283.57 16384 1000 301.68 301.85 301.80 207.05 32768 1000 552.55 553.30 552.96 225.92 65536 640 1111.99 1113.18 1112.65 224.58 131072 320 2453.15 2470.50 2462.54 202.39 262144 160 4738.46 4761.17 4752.60 210.03 524288 80 8912.60 9061.75 8998.92 220.71 1048576 40 16665.27 17539.95 17216.08 228.05 2097152 20 34392.30 38967.50 36920.74 205.30 4194304 10 65161.61 73221.90 68981.46 218.51 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 32 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 21.05 21.24 21.15 0.00 1 1000 21.27 21.35 21.30 0.18 2 1000 21.03 21.09 21.06 0.36 4 1000 21.15 21.20 21.18 0.72 8 1000 21.06 21.13 21.09 1.44 16 1000 21.32 21.42 21.37 2.85 32 1000 21.22 21.27 21.24 5.74 64 1000 21.42 21.48 21.45 11.36 128 1000 22.71 22.75 22.73 21.46 256 1000 23.60 23.66 23.63 41.27 512 1000 25.07 25.12 25.10 77.74 1024 1000 28.44 28.50 28.47 137.05 2048 1000 36.59 36.74 36.66 212.64 4096 1000 60.24 60.40 60.32 258.67 8192 1000 103.84 104.30 104.06 299.62 16384 1000 301.61 302.27 301.99 206.77 32768 1000 552.72 553.75 553.19 225.73 65536 640 1093.87 1095.97 1094.64 228.11 131072 320 2240.44 2253.10 2245.13 221.92 262144 160 4536.44 4578.14 4552.91 218.43 524288 80 8739.10 8968.03 8867.49 223.01 1048576 40 17021.10 18171.37 17471.94 220.13 2097152 20 34968.90 38830.00 36876.60 206.03 4194304 10 60281.00 73831.30 67044.06 216.71 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 4 1000 3.27 3.27 3.27 8 1000 3.23 3.23 3.23 16 1000 3.31 3.31 3.31 32 1000 3.41 3.41 3.41 64 1000 3.59 3.59 3.59 128 1000 4.17 4.17 4.17 256 1000 4.87 4.87 4.87 512 1000 5.94 5.94 5.94 1024 1000 8.03 8.03 8.03 2048 1000 12.17 12.17 12.17 4096 1000 20.50 20.51 20.50 8192 1000 38.42 38.42 38.42 16384 1000 78.22 78.23 78.23 32768 1000 151.49 151.49 151.49 65536 640 318.49 318.50 318.49 131072 320 1021.88 1021.90 1021.89 262144 160 2551.91 2551.94 2551.92 524288 80 5276.98 5277.14 5277.06 1048576 40 10477.42 10477.78 10477.60 2097152 20 20923.40 20924.20 20923.80 4194304 10 42043.40 42050.60 42047.00 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.04 0.04 0.04 4 1000 22.68 22.70 22.69 8 1000 22.62 22.65 22.64 16 1000 23.01 23.02 23.02 32 1000 23.16 23.17 23.17 64 1000 24.11 24.12 24.11 128 1000 25.17 25.18 25.18 256 1000 27.76 27.79 27.77 512 1000 31.52 31.55 31.53 1024 1000 36.55 36.57 36.56 2048 1000 49.12 49.16 49.14 4096 1000 79.62 79.66 79.64 8192 1000 137.65 137.73 137.69 16384 1000 303.54 303.60 303.57 32768 1000 531.28 531.34 531.32 65536 640 1055.16 1055.21 1055.18 131072 320 2947.76 2950.45 2949.10 262144 160 5908.43 5911.11 5909.75 524288 80 11759.36 11766.23 11762.73 1048576 40 23828.65 23856.58 23842.55 2097152 20 48252.65 48357.70 48304.51 4194304 10 96819.29 97182.61 97002.68 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.04 0.04 0.04 4 1000 44.26 44.29 44.28 8 1000 44.30 44.33 44.32 16 1000 44.64 44.68 44.67 32 1000 45.00 45.01 45.00 64 1000 45.42 45.49 45.47 128 1000 48.78 48.81 48.80 256 1000 52.38 52.41 52.39 512 1000 57.19 57.24 57.22 1024 1000 65.62 65.64 65.63 2048 1000 86.76 86.78 86.77 4096 1000 143.60 143.66 143.63 8192 1000 251.41 251.50 251.46 16384 1000 534.74 534.87 534.81 32768 1000 922.44 922.61 922.53 65536 640 1910.99 1912.46 1911.77 131072 320 4818.08 4819.71 4818.89 262144 160 8847.58 8857.77 8852.73 524288 80 17055.94 17070.22 17062.55 1048576 40 34183.25 34236.27 34209.20 2097152 20 69845.70 69944.00 69894.06 4194304 10 140397.50 140786.10 140601.59 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.04 0.06 0.05 4 1000 65.52 65.56 65.53 8 1000 65.70 65.73 65.72 16 1000 66.36 66.40 66.38 32 1000 66.49 66.52 66.51 64 1000 67.27 67.31 67.29 128 1000 71.70 71.74 71.72 256 1000 76.89 76.92 76.91 512 1000 83.12 83.18 83.16 1024 1000 94.65 94.69 94.67 2048 1000 123.83 123.88 123.86 4096 1000 202.50 202.58 202.53 8192 1000 352.21 352.34 352.28 16384 1000 861.21 861.50 861.37 32768 1000 1524.31 1524.64 1524.50 65536 640 3084.98 3086.07 3085.43 131072 320 7002.66 7005.61 7004.07 262144 160 13402.27 13407.62 13405.44 524288 80 25004.40 25018.21 25010.80 1048576 40 49470.28 49537.38 49502.80 2097152 20 91351.10 91449.85 91401.39 4194304 10 184656.69 185085.90 184855.48 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 32 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.04 0.06 0.05 4 1000 87.08 87.12 87.10 8 1000 87.13 87.17 87.15 16 1000 87.83 87.86 87.85 32 1000 88.32 88.36 88.34 64 1000 89.24 89.28 89.26 128 1000 95.76 95.79 95.77 256 1000 101.54 101.57 101.55 512 1000 110.71 110.75 110.73 1024 1000 127.64 127.69 127.66 2048 1000 180.52 180.61 180.56 4096 1000 320.27 320.40 320.34 8192 1000 609.98 610.24 610.10 16384 1000 1490.84 1491.31 1491.08 32768 1000 2803.26 2804.18 2803.68 65536 640 5596.11 5598.26 5596.97 131072 320 12014.12 12026.31 12019.78 262144 160 24779.65 24826.61 24804.73 524288 80 50184.01 50355.89 50258.84 1048576 40 101477.42 102127.27 101786.91 2097152 20 203300.40 205450.15 204233.00 4194304 10 403343.20 411704.90 406930.73 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.08 0.08 4 1000 1.69 1.69 1.69 8 1000 1.78 1.78 1.78 16 1000 1.79 1.79 1.79 32 1000 1.84 1.84 1.84 64 1000 1.96 1.96 1.96 128 1000 2.27 2.27 2.27 256 1000 2.74 2.74 2.74 512 1000 3.18 3.19 3.19 1024 1000 4.11 4.11 4.11 2048 1000 5.69 5.69 5.69 4096 1000 8.92 8.93 8.93 8192 1000 15.17 15.18 15.18 16384 1000 30.48 30.51 30.50 32768 1000 57.47 57.50 57.49 65536 640 275.79 275.97 275.88 131072 320 685.36 686.20 685.78 262144 160 1707.17 1710.96 1709.06 524288 80 3455.65 3469.99 3462.82 1048576 40 6807.85 6861.18 6834.51 2097152 20 13478.85 13689.60 13584.23 4194304 10 27345.50 27966.50 27656.00 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.08 0.08 4 1000 12.75 12.78 12.77 8 1000 12.74 12.76 12.75 16 1000 13.04 13.05 13.04 32 1000 13.32 13.34 13.33 64 1000 13.55 13.57 13.56 128 1000 15.53 15.56 15.55 256 1000 19.23 19.28 19.26 512 1000 22.63 22.67 22.64 1024 1000 28.08 28.12 28.09 2048 1000 40.31 40.39 40.35 4096 1000 58.81 58.92 58.87 8192 1000 96.53 96.70 96.61 16384 1000 218.20 218.38 218.29 32768 1000 351.14 351.42 351.29 65536 640 1175.45 1176.35 1175.82 131072 320 2247.26 2250.57 2248.95 262144 160 4566.37 4580.56 4573.64 524288 80 8928.51 8981.03 8954.57 1048576 40 17509.00 17716.45 17611.43 2097152 20 31620.90 32387.55 31997.45 4194304 10 57214.19 60656.70 59064.84 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.08 0.08 4 1000 19.33 19.38 19.35 8 1000 19.40 19.45 19.42 16 1000 19.56 19.61 19.59 32 1000 19.79 19.85 19.82 64 1000 20.64 20.68 20.65 128 1000 24.22 24.29 24.26 256 1000 30.73 30.80 30.76 512 1000 36.39 36.48 36.43 1024 1000 45.17 45.28 45.22 2048 1000 64.86 65.01 64.93 4096 1000 93.62 93.83 93.72 8192 1000 154.17 154.51 154.35 16384 1000 325.49 325.87 325.68 32768 1000 565.86 566.53 566.20 65536 640 1572.55 1574.72 1573.61 131072 320 3061.10 3069.25 3064.94 262144 160 6289.97 6327.84 6309.59 524288 80 12278.95 12425.76 12354.68 1048576 40 24113.55 24701.97 24415.38 2097152 20 43966.85 46281.25 45182.91 4194304 10 76801.80 85778.90 81692.49 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.08 0.08 4 1000 25.56 25.63 25.60 8 1000 25.60 25.68 25.64 16 1000 26.03 26.09 26.06 32 1000 26.41 26.50 26.46 64 1000 27.71 27.78 27.74 128 1000 33.99 34.07 34.03 256 1000 42.99 43.11 43.04 512 1000 49.94 50.07 50.00 1024 1000 61.61 61.76 61.68 2048 1000 88.50 88.72 88.61 4096 1000 130.99 131.31 131.15 8192 1000 221.64 222.19 221.91 16384 1000 428.97 429.74 429.36 32768 1000 738.75 740.09 739.45 65536 640 2073.26 2077.83 2075.60 131072 320 3956.50 3973.20 3965.17 262144 160 7852.07 7924.29 7889.10 524288 80 15290.11 15572.75 15433.44 1048576 40 29823.35 30968.45 30408.43 2097152 20 52910.00 57194.65 55144.49 4194304 10 89716.99 104875.49 97610.29 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 32 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.08 0.08 4 1000 34.20 34.30 34.24 8 1000 34.14 34.25 34.20 16 1000 34.49 34.59 34.53 32 1000 34.86 34.96 34.91 64 1000 35.98 36.11 36.04 128 1000 41.51 41.63 41.57 256 1000 53.42 53.59 53.50 512 1000 63.86 64.08 63.97 1024 1000 79.40 79.63 79.52 2048 1000 113.63 113.96 113.79 4096 1000 164.67 165.17 164.93 8192 1000 270.20 271.00 270.60 16384 1000 559.20 560.45 559.85 32768 1000 977.43 979.60 978.55 65536 640 2511.03 2518.58 2515.01 131072 320 4870.97 4900.53 4886.23 262144 160 9593.46 9711.34 9652.87 524288 80 18685.51 19152.70 18923.00 1048576 40 36465.50 38341.63 37409.27 2097152 20 65432.20 72512.95 69059.48 4194304 10 106665.29 133991.60 121057.03 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 4.83 4.83 4.83 4 1000 5.77 5.77 5.77 8 1000 7.00 7.00 7.00 16 1000 7.40 7.40 7.40 32 1000 7.39 7.39 7.39 64 1000 7.56 7.56 7.56 128 1000 8.32 8.32 8.32 256 1000 10.01 10.01 10.01 512 1000 11.09 11.09 11.09 1024 1000 6.40 6.40 6.40 2048 1000 8.54 8.54 8.54 4096 1000 12.64 12.64 12.64 8192 1000 48.84 48.84 48.84 16384 1000 38.39 38.39 38.39 32768 1000 78.17 78.17 78.17 65536 640 155.00 155.00 155.00 131072 320 353.71 353.71 353.71 262144 160 1170.66 1170.75 1170.71 524288 80 2591.66 2591.70 2591.68 1048576 40 5246.90 5246.98 5246.94 2097152 20 10455.85 10456.00 10455.92 4194304 10 20818.40 20820.61 20819.50 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 23.56 23.57 23.57 4 1000 26.67 26.68 26.67 8 1000 27.76 27.79 27.77 16 1000 27.66 27.68 27.66 32 1000 27.72 27.73 27.72 64 1000 28.22 28.24 28.23 128 1000 29.59 29.60 29.60 256 1000 31.44 31.47 31.46 512 1000 31.97 31.99 31.98 1024 1000 50.10 50.11 50.11 2048 1000 55.35 55.37 55.36 4096 1000 66.26 66.28 66.27 8192 1000 94.10 94.12 94.11 16384 1000 162.82 162.87 162.85 32768 1000 233.56 233.62 233.59 65536 640 502.16 502.17 502.16 131072 320 862.31 862.32 862.32 262144 160 1773.30 1774.13 1773.69 524288 80 4296.96 4300.41 4298.71 1048576 40 8574.83 8583.78 8579.17 2097152 20 15785.10 15808.95 15797.11 4194304 10 32756.01 32849.00 32802.98 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 45.52 45.55 45.54 4 1000 49.99 50.01 50.00 8 1000 50.72 50.76 50.74 16 1000 51.28 51.31 51.29 32 1000 52.06 52.09 52.07 64 1000 52.67 52.70 52.68 128 1000 54.46 54.49 54.47 256 1000 58.49 58.52 58.51 512 1000 64.06 64.09 64.07 1024 1000 140.17 140.20 140.18 2048 1000 149.90 149.91 149.91 4096 1000 161.19 161.22 161.21 8192 1000 183.15 183.20 183.17 16384 1000 241.46 241.51 241.48 32768 1000 410.97 411.03 411.00 65536 640 657.11 657.25 657.18 131072 320 1773.17 1773.36 1773.26 262144 160 2988.23 2988.42 2988.33 524288 80 5304.06 5307.60 5305.91 1048576 40 10747.18 10757.85 10752.35 2097152 20 21345.95 21380.80 21360.64 4194304 10 42098.80 42264.81 42198.58 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 67.11 67.13 67.12 4 1000 72.10 72.13 72.11 8 1000 72.54 72.58 72.56 16 1000 73.16 73.19 73.18 32 1000 74.15 74.19 74.17 64 1000 75.26 75.30 75.28 128 1000 78.95 78.97 78.96 256 1000 83.32 83.36 83.34 512 1000 91.93 91.98 91.96 1024 1000 306.46 306.49 306.47 2048 1000 326.40 326.43 326.42 4096 1000 347.34 347.38 347.36 8192 1000 370.21 370.26 370.23 16384 1000 419.67 419.72 419.69 32768 1000 546.47 546.52 546.50 65536 640 938.76 938.87 938.81 131072 320 1674.77 1675.14 1674.96 262144 160 4262.07 4263.13 4262.61 524288 80 7171.55 7172.85 7172.44 1048576 40 12603.23 12613.75 12609.44 2097152 20 24088.55 24125.00 24107.56 4194304 10 48502.60 48632.60 48566.43 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 32 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 89.36 89.40 89.38 4 1000 94.55 94.59 94.57 8 1000 95.09 95.14 95.12 16 1000 95.39 95.43 95.41 32 1000 96.11 96.14 96.13 64 1000 97.50 97.54 97.52 128 1000 102.69 102.74 102.72 256 1000 109.51 109.55 109.53 512 1000 119.90 119.95 119.93 1024 1000 653.26 653.31 653.28 2048 1000 659.96 659.99 659.98 4096 1000 704.59 704.64 704.62 8192 1000 746.87 746.92 746.90 16384 1000 797.48 797.53 797.51 32768 1000 936.04 936.09 936.06 65536 640 1399.23 1399.35 1399.28 131072 320 2688.00 2688.32 2688.17 262144 160 5256.52 5258.65 5257.56 524288 80 12064.11 12067.21 12065.71 1048576 40 21715.70 21723.25 21719.58 2097152 20 40247.25 40268.90 40256.72 4194304 10 78055.19 78229.89 78136.33 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 3.05 3.05 3.05 2 1000 3.08 3.08 3.08 4 1000 3.12 3.12 3.12 8 1000 3.11 3.11 3.11 16 1000 3.14 3.14 3.14 32 1000 3.21 3.21 3.21 64 1000 3.41 3.41 3.41 128 1000 3.76 3.76 3.76 256 1000 4.40 4.40 4.40 512 1000 5.45 5.45 5.45 1024 1000 7.32 7.32 7.32 2048 1000 11.05 11.05 11.05 4096 1000 18.48 18.48 18.48 8192 1000 33.12 33.12 33.12 16384 1000 68.65 68.65 68.65 32768 1000 135.83 135.84 135.83 65536 640 331.64 331.68 331.66 131072 320 789.39 789.47 789.43 262144 160 1796.31 1796.32 1796.32 524288 80 3672.14 3673.30 3672.72 1048576 40 7302.40 7303.18 7302.79 2097152 20 14544.50 14546.10 14545.30 4194304 10 28886.20 28887.30 28886.75 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 22.39 22.40 22.39 2 1000 22.50 22.53 22.51 4 1000 22.62 22.63 22.62 8 1000 22.93 22.96 22.94 16 1000 23.20 23.21 23.21 32 1000 23.60 23.62 23.61 64 1000 25.54 25.57 25.55 128 1000 26.98 26.99 26.99 256 1000 30.60 30.61 30.61 512 1000 35.50 35.54 35.52 1024 1000 43.44 43.46 43.45 2048 1000 73.07 73.12 73.09 4096 1000 117.99 118.05 118.02 8192 1000 240.31 240.38 240.35 16384 1000 426.45 426.54 426.50 32768 1000 805.67 805.81 805.74 65536 640 1900.99 1902.12 1901.57 131072 320 4389.35 4393.45 4391.40 262144 160 10444.01 10445.02 10444.48 524288 80 16417.88 16476.39 16447.10 1048576 40 33814.78 34029.12 33922.01 2097152 20 74047.45 74053.90 74050.60 4194304 10 164788.50 164796.40 164792.35 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 43.99 44.02 44.00 2 1000 44.26 44.28 44.26 4 1000 44.52 44.55 44.54 8 1000 44.93 44.96 44.95 16 1000 44.99 45.03 45.01 32 1000 46.99 47.01 47.00 64 1000 49.67 49.70 49.68 128 1000 53.79 53.82 53.80 256 1000 60.07 60.10 60.08 512 1000 72.47 72.48 72.47 1024 1000 107.15 107.19 107.18 2048 1000 181.91 182.01 181.96 4096 1000 334.49 334.59 334.54 8192 1000 711.26 711.48 711.39 16384 1000 1340.99 1341.22 1341.13 32768 1000 2605.97 2606.35 2606.17 65536 640 5628.85 5632.72 5630.99 131072 320 12028.50 12035.54 12032.03 262144 160 24165.61 24218.99 24198.92 524288 80 45706.74 45921.32 45825.54 1048576 40 94486.12 95286.15 94895.82 2097152 20 197646.55 197666.00 197657.31 4194304 10 406784.00 419246.90 414683.05 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 65.41 65.43 65.41 2 1000 65.77 65.81 65.79 4 1000 66.15 66.18 66.16 8 1000 66.60 66.64 66.62 16 1000 68.63 68.67 68.65 32 1000 71.59 71.62 71.61 64 1000 75.88 75.92 75.90 128 1000 84.19 84.22 84.20 256 1000 101.71 101.75 101.73 512 1000 134.07 134.13 134.09 1024 1000 210.17 210.27 210.21 2048 1000 427.71 427.83 427.78 4096 1000 848.52 848.77 848.63 8192 1000 1779.40 1779.81 1779.63 16384 1000 3451.88 3452.73 3452.40 32768 1000 7109.96 7110.49 7110.25 65536 640 14411.13 14412.69 14412.02 131072 320 28566.01 28568.27 28567.16 262144 160 58652.21 58656.44 58654.46 524288 80 116648.83 116654.27 116651.42 1048576 40 236605.20 236641.57 236627.97 2097152 20 474505.90 481195.15 478650.15 4194304 10 937362.99 950889.10 944152.71 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 32 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.10 0.09 1 1000 87.00 87.03 87.02 2 1000 87.32 87.35 87.34 4 1000 88.34 88.37 88.36 8 1000 90.45 90.50 90.47 16 1000 93.38 93.42 93.40 32 1000 98.32 98.38 98.35 64 1000 107.71 107.76 107.74 128 1000 137.31 137.39 137.35 256 1000 205.97 206.11 206.03 512 1000 369.81 370.03 369.90 1024 1000 809.66 810.05 809.85 2048 1000 1598.82 1599.67 1599.26 4096 1000 3201.85 3203.74 3202.72 8192 1000 6248.79 6252.35 6250.74 16384 1000 12747.99 12756.46 12751.81 32768 1000 26708.36 26724.36 26715.54 65536 640 53877.54 53915.80 53890.97 131072 320 109425.37 109609.78 109531.63 262144 160 224429.89 225064.41 224722.77 524288 80 447561.38 450071.78 448641.17 1048576 40 909352.35 922482.75 916811.30 2097152 20 1806077.00 1846855.20 1826631.48 4194304 10 3235542.30 3393720.40 3315048.63 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.10 0.10 0.10 1 1000 4.46 4.46 4.46 2 1000 4.42 4.42 4.42 4 1000 4.58 4.58 4.58 8 1000 4.59 4.59 4.59 16 1000 4.82 4.82 4.82 32 1000 4.96 4.96 4.96 64 1000 5.21 5.21 5.21 128 1000 5.92 5.92 5.92 256 1000 7.12 7.12 7.12 512 1000 8.93 8.93 8.93 1024 1000 12.68 12.68 12.68 2048 1000 20.43 20.43 20.43 4096 1000 35.75 35.75 35.75 8192 1000 65.81 65.81 65.81 16384 1000 131.47 131.47 131.47 32768 1000 263.18 263.20 263.19 65536 640 846.12 846.16 846.14 131072 320 1743.27 1743.35 1743.31 262144 160 1843.36 1843.43 1843.39 524288 80 3656.36 3656.86 3656.61 1048576 40 7270.03 7270.70 7270.36 2097152 20 14494.30 14494.35 14494.32 4194304 10 28855.10 28855.60 28855.35 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.10 0.10 0.10 1 1000 23.89 23.91 23.90 2 1000 24.04 24.06 24.05 4 1000 24.01 24.03 24.02 8 1000 24.33 24.35 24.35 16 1000 24.94 24.97 24.96 32 1000 25.48 25.49 25.48 64 1000 27.84 27.86 27.85 128 1000 29.97 29.99 29.99 256 1000 33.37 33.39 33.38 512 1000 39.30 39.33 39.31 1024 1000 53.36 53.39 53.37 2048 1000 84.69 84.72 84.71 4096 1000 148.49 148.57 148.52 8192 1000 311.55 311.55 311.55 16384 1000 591.30 591.32 591.31 32768 1000 2010.49 2010.84 2010.67 65536 640 3822.48 3823.15 3822.80 131072 320 2753.42 2755.23 2754.31 262144 160 5268.58 5275.58 5272.16 524288 80 10253.36 10281.15 10267.64 1048576 40 22045.87 22155.35 22101.51 2097152 20 42672.85 43104.00 42891.42 4194304 10 85896.69 87609.29 86765.49 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.10 0.11 0.10 1 1000 48.13 48.15 48.14 2 1000 48.38 48.41 48.39 4 1000 48.65 48.68 48.67 8 1000 49.41 49.45 49.44 16 1000 49.84 49.87 49.86 32 1000 52.16 52.18 52.17 64 1000 56.37 56.40 56.38 128 1000 61.18 61.21 61.19 256 1000 69.15 69.18 69.17 512 1000 89.82 89.86 89.84 1024 1000 132.75 132.79 132.77 2048 1000 227.34 227.43 227.39 4096 1000 407.09 407.16 407.13 8192 1000 856.46 856.49 856.48 16384 1000 2366.52 2367.16 2366.91 32768 1000 4327.57 4328.01 4327.80 65536 640 3549.70 3550.88 3550.31 131072 320 6390.95 6393.36 6392.19 262144 160 12109.39 12118.40 12113.98 524288 80 24207.71 24274.25 24243.28 1048576 40 48539.57 48809.65 48685.30 2097152 20 96185.95 96713.55 96448.57 4194304 10 209326.00 214059.90 211952.89 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.12 0.12 1 1000 75.08 75.11 75.10 2 1000 75.38 75.44 75.41 4 1000 75.88 75.92 75.90 8 1000 76.87 76.90 76.89 16 1000 79.65 79.69 79.67 32 1000 83.36 83.40 83.38 64 1000 89.72 89.76 89.75 128 1000 98.68 98.72 98.70 256 1000 121.31 121.37 121.34 512 1000 166.74 166.79 166.77 1024 1000 270.54 270.69 270.61 2048 1000 493.60 493.68 493.65 4096 1000 955.54 955.86 955.68 8192 1000 2618.15 2618.87 2618.58 16384 1000 4739.98 4740.45 4740.22 32768 1000 4157.27 4158.30 4157.83 65536 640 7387.37 7390.06 7388.76 131072 320 13828.84 13838.45 13833.60 262144 160 26952.58 26992.08 26973.15 524288 80 53289.29 53444.83 53369.87 1048576 40 107473.03 108070.08 107783.77 2097152 20 220551.95 223105.70 221863.69 4194304 10 453611.40 462627.80 458281.48 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 32 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.15 0.17 0.15 1 1000 107.79 107.83 107.81 2 1000 107.70 107.74 107.72 4 1000 108.54 108.58 108.56 8 1000 112.05 112.10 112.08 16 1000 115.87 115.92 115.90 32 1000 122.64 122.68 122.66 64 1000 135.71 135.77 135.74 128 1000 165.79 165.86 165.83 256 1000 235.27 235.38 235.32 512 1000 392.29 392.52 392.41 1024 1000 875.06 875.49 875.25 2048 1000 1720.69 1721.44 1721.03 4096 1000 3928.24 3930.38 3929.36 8192 1000 7956.25 7960.26 7958.25 16384 1000 5199.08 5199.83 5199.43 32768 1000 8602.99 8605.33 8604.27 65536 640 15362.12 15366.54 15364.12 131072 320 29182.10 29200.02 29190.25 262144 160 56792.29 56880.91 56837.79 524288 80 111406.38 111769.65 111594.12 1048576 40 237313.57 238580.78 237988.59 2097152 20 464709.90 469585.05 467227.88 4194304 10 960429.41 974209.90 966326.96 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 4.36 4.36 4.36 1 1000 4.71 4.71 4.71 2 1000 4.86 4.87 4.86 4 1000 4.84 4.84 4.84 8 1000 4.98 4.98 4.98 16 1000 4.91 4.91 4.91 32 1000 5.31 5.31 5.31 64 1000 5.89 5.89 5.89 128 1000 4.19 4.19 4.19 256 1000 4.80 4.80 4.80 512 1000 5.82 5.82 5.82 1024 1000 7.66 7.66 7.66 2048 1000 11.43 11.43 11.43 4096 1000 18.82 18.82 18.82 8192 1000 32.86 32.86 32.86 16384 1000 65.18 65.18 65.18 32768 1000 130.16 130.16 130.16 65536 640 414.10 414.11 414.11 131072 320 918.10 918.15 918.13 262144 160 1797.09 1797.10 1797.09 524288 80 3568.90 3568.99 3568.94 1048576 40 7131.25 7132.00 7131.62 2097152 20 14223.05 14224.45 14223.75 4194304 10 28238.30 28242.80 28240.55 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 23.58 23.60 23.59 1 1000 24.22 24.25 24.23 2 1000 24.54 24.57 24.56 4 1000 24.84 24.86 24.85 8 1000 25.36 25.37 25.36 16 1000 27.45 27.46 27.45 32 1000 29.76 29.78 29.77 64 1000 40.98 41.01 41.00 128 1000 43.37 43.40 43.39 256 1000 44.83 44.85 44.84 512 1000 47.68 47.71 47.70 1024 1000 54.51 54.54 54.53 2048 1000 68.54 68.58 68.56 4096 1000 118.54 118.60 118.58 8192 1000 188.45 188.54 188.51 16384 1000 412.84 412.85 412.84 32768 1000 724.95 725.17 725.07 65536 640 1624.70 1624.71 1624.71 131072 320 3249.37 3249.46 3249.42 262144 160 6222.57 6222.75 6222.69 524288 80 11972.07 11972.30 11972.16 1048576 40 23365.55 23366.35 23365.91 2097152 20 45716.10 45717.50 45716.70 4194304 10 106076.11 106252.71 106163.88 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 47.70 47.73 47.72 1 1000 49.19 49.23 49.21 2 1000 49.64 49.66 49.65 4 1000 52.03 52.06 52.05 8 1000 55.89 55.92 55.91 16 1000 60.01 60.08 60.06 32 1000 118.00 118.04 118.02 64 1000 118.84 118.87 118.85 128 1000 126.16 126.23 126.21 256 1000 127.46 127.51 127.49 512 1000 133.53 133.57 133.55 1024 1000 145.89 145.94 145.92 2048 1000 176.64 176.71 176.69 4096 1000 332.58 332.75 332.66 8192 1000 650.57 650.79 650.70 16384 1000 1610.30 1610.62 1610.49 32768 1000 2860.00 2860.13 2860.07 65536 640 5051.67 5051.77 5051.73 131072 320 9448.30 9449.16 9448.77 262144 160 18180.10 18184.67 18182.05 524288 80 36267.68 36282.60 36273.79 1048576 40 71825.20 71888.35 71843.26 2097152 20 165900.35 166192.15 166104.11 4194304 10 323287.50 323837.10 323459.42 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 74.19 74.24 74.22 1 1000 79.37 79.40 79.39 2 1000 82.84 82.88 82.86 4 1000 87.90 87.94 87.91 8 1000 95.17 95.21 95.19 16 1000 270.30 270.36 270.33 32 1000 271.28 271.36 271.32 64 1000 273.98 274.05 274.02 128 1000 294.51 294.63 294.57 256 1000 297.05 297.13 297.10 512 1000 309.22 309.34 309.29 1024 1000 335.61 335.70 335.65 2048 1000 479.67 479.91 479.80 4096 1000 933.25 933.55 933.42 8192 1000 1879.22 1879.84 1879.57 16384 1000 4181.09 4181.27 4181.19 32768 1000 6991.56 6991.88 6991.74 65536 640 12234.92 12235.61 12235.18 131072 320 22543.39 22545.54 22544.47 262144 160 42951.44 42959.74 42955.13 524288 80 83235.79 83259.29 83249.22 1048576 40 190541.77 190627.15 190601.98 2097152 20 377570.50 377966.15 377782.34 4194304 10 754574.50 756121.91 755344.26 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 32 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 105.71 105.75 105.73 1 1000 120.45 120.51 120.48 2 1000 130.85 130.90 130.87 4 1000 156.62 156.71 156.67 8 1000 579.60 579.79 579.69 16 1000 581.75 581.92 581.83 32 1000 582.79 582.98 582.88 64 1000 587.91 588.12 588.02 128 1000 630.27 630.50 630.39 256 1000 642.23 642.56 642.41 512 1000 661.09 661.37 661.24 1024 1000 730.38 730.66 730.51 2048 1000 1242.03 1242.47 1242.27 4096 1000 2482.71 2483.57 2483.18 8192 1000 5296.33 5296.58 5296.45 16384 1000 12028.58 12029.06 12028.86 32768 1000 21530.00 21530.84 21530.55 65536 640 40013.43 40014.89 40014.18 131072 320 76532.94 76536.78 76535.19 262144 160 150619.46 150638.59 150627.87 524288 80 318247.96 318309.43 318269.83 1048576 40 637773.37 638155.03 637958.58 2097152 20 1261555.65 1262622.10 1262040.29 4194304 10 2502436.60 2509852.90 2507282.31 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 1.26 1.27 1.27 2 1000 1.25 1.25 1.25 4 1000 1.29 1.29 1.29 8 1000 1.31 1.31 1.31 16 1000 1.37 1.37 1.37 32 1000 1.37 1.37 1.37 64 1000 1.47 1.47 1.47 128 1000 1.63 1.63 1.63 256 1000 1.86 1.86 1.86 512 1000 2.23 2.23 2.23 1024 1000 2.76 2.76 2.76 2048 1000 3.75 3.76 3.76 4096 1000 5.60 5.60 5.60 8192 1000 9.16 9.16 9.16 16384 1000 26.79 26.79 26.79 32768 1000 52.04 52.04 52.04 65536 640 111.48 111.50 111.49 131072 320 327.31 327.45 327.38 262144 160 786.20 786.46 786.33 524288 80 1670.51 1670.86 1670.69 1048576 40 3367.85 3368.75 3368.30 2097152 20 6751.95 6752.05 6752.00 4194304 10 13531.59 13535.20 13533.40 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 12.82 12.85 12.84 2 1000 12.79 12.83 12.81 4 1000 12.88 12.92 12.91 8 1000 12.86 12.89 12.88 16 1000 12.93 12.97 12.95 32 1000 12.89 12.92 12.91 64 1000 13.03 13.07 13.06 128 1000 13.96 13.99 13.98 256 1000 14.32 14.35 14.34 512 1000 15.20 15.22 15.21 1024 1000 16.93 16.97 16.96 2048 1000 23.01 23.07 23.04 4096 1000 36.72 36.80 36.77 8192 1000 64.47 64.61 64.55 16384 1000 189.16 189.21 189.19 32768 1000 395.94 396.05 396.00 65536 640 748.48 748.52 748.50 131072 320 1424.72 1424.76 1424.73 262144 160 2642.29 2642.36 2642.32 524288 80 5486.34 5486.91 5486.66 1048576 40 11371.60 11372.48 11372.11 2097152 20 22426.10 22642.55 22534.64 4194304 10 39903.01 39907.90 39905.47 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.06 0.05 1 1000 18.87 18.92 18.90 2 1000 18.76 18.82 18.80 4 1000 18.98 19.03 19.01 8 1000 18.80 18.85 18.83 16 1000 18.87 18.91 18.90 32 1000 19.12 19.17 19.16 64 1000 19.38 19.44 19.42 128 1000 19.92 19.99 19.97 256 1000 19.97 20.04 20.00 512 1000 21.29 21.36 21.33 1024 1000 23.84 23.91 23.88 2048 1000 32.46 32.56 32.52 4096 1000 57.33 57.47 57.42 8192 1000 103.56 103.79 103.71 16384 1000 279.29 279.38 279.34 32768 1000 527.50 527.66 527.57 65536 640 950.35 950.80 950.59 131072 320 1972.22 1974.00 1973.31 262144 160 3900.52 3906.18 3903.18 524288 80 7881.53 7893.15 7886.03 1048576 40 16080.82 16182.48 16131.10 2097152 20 32672.40 33081.15 32876.18 4194304 10 61698.60 63406.71 62767.10 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 75.50 75.53 75.51 2 1000 76.55 76.58 76.56 4 1000 85.20 85.25 85.23 8 1000 94.51 94.55 94.53 16 1000 99.74 99.77 99.75 32 1000 100.14 100.18 100.16 64 1000 100.58 100.62 100.60 128 1000 101.14 101.17 101.16 256 1000 103.84 103.87 103.85 512 1000 109.85 109.88 109.87 1024 1000 116.97 117.02 116.99 2048 1000 131.45 131.50 131.49 4096 1000 161.33 161.37 161.35 8192 1000 215.79 215.86 215.83 16384 1000 350.07 350.21 350.14 32768 1000 617.57 617.69 617.63 65536 640 1104.07 1104.75 1104.46 131072 320 2070.48 2073.07 2071.89 262144 160 4328.79 4333.14 4331.09 524288 80 8910.69 8945.54 8922.02 1048576 40 18260.17 18329.40 18291.44 2097152 20 35454.00 35839.85 35690.23 4194304 10 69728.10 71094.00 70594.82 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 32 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 97.27 97.32 97.30 2 1000 97.72 97.74 97.73 4 1000 105.55 105.61 105.58 8 1000 114.01 114.06 114.04 16 1000 122.90 122.94 122.92 32 1000 128.51 128.55 128.53 64 1000 128.73 128.77 128.76 128 1000 129.46 129.49 129.48 256 1000 132.29 132.33 132.31 512 1000 138.35 138.39 138.37 1024 1000 147.97 148.03 148.00 2048 1000 162.54 162.59 162.56 4096 1000 198.78 198.85 198.82 8192 1000 286.45 286.58 286.51 16384 1000 467.46 467.72 467.58 32768 1000 934.19 934.63 934.38 65536 640 1781.02 1782.42 1781.61 131072 320 3520.30 3524.47 3522.04 262144 160 6846.72 6868.49 6858.11 524288 80 14079.70 14170.89 14120.05 1048576 40 28239.67 28557.35 28402.22 2097152 20 57058.80 58181.70 57593.27 4194304 10 114070.11 118208.79 116856.20 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 2 ) # ( 30 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 2.17 2.17 2.17 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 4 ) # ( 28 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 21.49 21.50 21.49 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 8 ) # ( 24 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 43.31 43.34 43.32 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 16 ) # ( 16 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 64.36 64.39 64.38 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 32 ) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 85.88 85.91 85.89 #===================================================== # # Thanks for using PMB2.2 # # The Pallas team kindly requests that you # give us as much feedback for PMB as possible. # # It would be very helpful when you sent the # output tables of your run(s) of PMB to # # ####################### # # # # # pmb@pallas.com # # # # # ####################### # # You might also add # # - personal information (institution, motivation # for using PMB) # - basic information about the machine you used # (number of CPUs, processor type e.t.c.) # #=====================================================