0
0
mirror of https://gitlab.com/libeigen/eigen.git synced 2026-01-18 17:31:19 +01:00

Enable generic clang backend tests.

<!-- 
Thanks for contributing a merge request!

We recommend that first-time contributors read our [contribution guidelines](https://eigen.tuxfamily.org/index.php?title=Contributing_to_Eigen).

Before submitting the MR, please complete the following checks:
- Create one PR per feature or bugfix,
- Run the test suite to verify your changes.
  See our [test guidelines](https://eigen.tuxfamily.org/index.php?title=Tests).
- Add tests to cover the bug addressed or any new feature.
- Document new features.  If it is a substantial change, add it to the [Changelog](https://gitlab.com/libeigen/eigen/-/blob/master/CHANGELOG.md).
- Leave the following box checked when submitting: `Allow commits from members who can merge to the target branch`.
  This allows us to rebase and merge your change.

Note that we are a team of volunteers; we appreciate your patience during the review process.
-->

### Description
<!--Please explain your changes.-->

Enable generic clang backend tests.

Added an AVX512 job using the generic clang backend.

Also fixed up some guards in the custom AVX512 gemm/trsm kernels so they don't
start defining things if they aren't used.

See merge request libeigen/eigen!2063
This commit is contained in:
Antonio Sánchez
2025-11-07 01:37:12 +00:00
committed by Rasmus Munk Larsen
parent 3368ac6c69
commit ed989c7504
5 changed files with 44 additions and 5 deletions

View File

@@ -35,6 +35,8 @@
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
#if EIGEN_USE_AVX512_GEMM_KERNELS
template <typename Scalar, bool is_unit_inc> template <typename Scalar, bool is_unit_inc>
class gemm_class { class gemm_class {
using vec = typename packet_traits<Scalar>::type; using vec = typename packet_traits<Scalar>::type;
@@ -947,7 +949,6 @@ EIGEN_DONT_INLINE void gemm_kern_avx512(Index m, Index n, Index k, Scalar *alpha
} }
// Template specializations of GEBP kernels with nr = 8. // Template specializations of GEBP kernels with nr = 8.
#if EIGEN_USE_AVX512_GEMM_KERNELS
template <bool ConjLhs_, bool ConjRhs_, int PacketSize_> template <bool ConjLhs_, bool ConjRhs_, int PacketSize_>
class gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Target, PacketSize_> class gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Target, PacketSize_>
: public gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_> { : public gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_> {

View File

@@ -44,6 +44,8 @@
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
#if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
#define EIGEN_AVX_MAX_NUM_ACC (int64_t(24)) #define EIGEN_AVX_MAX_NUM_ACC (int64_t(24))
#define EIGEN_AVX_MAX_NUM_ROW (int64_t(8)) // Denoted L in code. #define EIGEN_AVX_MAX_NUM_ROW (int64_t(8)) // Denoted L in code.
#define EIGEN_AVX_MAX_K_UNROL (int64_t(4)) #define EIGEN_AVX_MAX_K_UNROL (int64_t(4))
@@ -58,7 +60,6 @@ typedef Packet4d vecHalfDouble;
// Note: this depends on macros and typedefs above. // Note: this depends on macros and typedefs above.
#include "TrsmUnrolls.inc" #include "TrsmUnrolls.inc"
#if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
/** /**
* For smaller problem sizes, and certain compilers, using the optimized kernels trsmKernelL/R directly * For smaller problem sizes, and certain compilers, using the optimized kernels trsmKernelL/R directly
* is faster than the packed versions in TriangularSolverMatrix.h. * is faster than the packed versions in TriangularSolverMatrix.h.

View File

@@ -141,7 +141,8 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, C
std::ptrdiff_t l1, l2, l3; std::ptrdiff_t l1, l2, l3;
manage_caching_sizes(GetAction, &l1, &l2, &l3); manage_caching_sizes(GetAction, &l1, &l2, &l3);
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS #if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_L_KERNELS) && EIGEN_USE_AVX512_TRSM_L_KERNELS && \
EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
EIGEN_IF_CONSTEXPR( EIGEN_IF_CONSTEXPR(
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) { (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
// Very rough cutoffs to determine when to call trsm w/o packing // Very rough cutoffs to determine when to call trsm w/o packing
@@ -209,7 +210,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, C
// tr solve // tr solve
{ {
Index i = IsLower ? k2 + k1 : k2 - k1; Index i = IsLower ? k2 + k1 : k2 - k1;
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS #if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_L_KERNELS) && EIGEN_USE_AVX512_TRSM_L_KERNELS
EIGEN_IF_CONSTEXPR( EIGEN_IF_CONSTEXPR(
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) { (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth; i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
@@ -273,7 +274,8 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheRight, Mode,
level3_blocking<Scalar, Scalar>& blocking) { level3_blocking<Scalar, Scalar>& blocking) {
Index rows = otherSize; Index rows = otherSize;
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_R_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS #if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_R_KERNELS) && EIGEN_USE_AVX512_TRSM_R_KERNELS && \
EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
EIGEN_IF_CONSTEXPR( EIGEN_IF_CONSTEXPR(
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) { (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
// TODO: Investigate better heuristics for cutoffs. // TODO: Investigate better heuristics for cutoffs.

View File

@@ -97,6 +97,18 @@ build:linux:cross:x86-64:clang-12:avx512dq:
variables: variables:
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on" EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on"
# Generic vector extension backend.
build:linux:cross:x86-64:clang-19:generic:avx512dq:
image: ubuntu:24.04
extends: build:linux:cross:x86-64:clang-12:default
variables:
EIGEN_CI_INSTALL: clang-19
EIGEN_CI_C_COMPILER: clang-19
EIGEN_CI_CXX_COMPILER: clang++-19
EIGEN_CI_CROSS_INSTALL: g++-14-x86-64-linux-gnu clang-19
EIGEN_CI_ADDITIONAL_ARGS: >
-DEIGEN_TEST_CUSTOM_CXX_FLAGS=-mfma;-mavx512dq;-DEIGEN_VECTORIZE_GENERIC=1
build:linux:docs: build:linux:docs:
extends: .build:linux:cross extends: .build:linux:cross
variables: variables:

View File

@@ -197,6 +197,29 @@ test:linux:x86-64:clang-12:avx512dq:unsupported:
variables: variables:
EIGEN_CI_CTEST_LABEL: Unsupported EIGEN_CI_CTEST_LABEL: Unsupported
# Generic vector extension backend.
.test:linux:x86-64:clang-19:generic:avx512dq:
image: ubuntu:24.04
extends: .test:linux:x86-64
needs: [ build:linux:cross:x86-64:clang-19:generic:avx512dq ]
variables:
EIGEN_CI_INSTALL: clang-19
tags:
- eigen-runner
- linux
- x86-64
- avx512
test:linux:x86-64:clang-19:generic:avx512dq:official:
extends: .test:linux:x86-64:clang-19:generic:avx512dq
variables:
EIGEN_CI_CTEST_LABEL: Official
test:linux:x86-64:clang-19:generic:avx512dq:unsupported:
extends: .test:linux:x86-64:clang-19:generic:avx512dq
variables:
EIGEN_CI_CTEST_LABEL: Unsupported
##### CUDA ##################################################################### ##### CUDA #####################################################################
.test:linux:cuda: .test:linux:cuda:
extends: .test:linux extends: .test:linux