Enable generic clang backend tests.

### Description  Enable generic clang backend tests. Added an AVX512 job using the generic clang backend. Also fixed up some guards in the custom AVX512 gemm/trsm kernels so they don't start defining things if they aren't used. See merge request libeigen/eigen!2063
2026-01-18 17:31:19 +01:00 · 2025-11-07 01:37:12 +00:00
parent 3368ac6c69
commit ed989c7504
5 changed files with 44 additions and 5 deletions
--- a/Eigen/src/Core/arch/AVX512/GemmKernel.h
+++ b/Eigen/src/Core/arch/AVX512/GemmKernel.h
@@ -35,6 +35,8 @@
 namespace Eigen {
 namespace internal {
 #if EIGEN_USE_AVX512_GEMM_KERNELS
 template <typename Scalar, bool is_unit_inc>
 class gemm_class {
  using vec = typename packet_traits<Scalar>::type;
@@ -947,7 +949,6 @@ EIGEN_DONT_INLINE void gemm_kern_avx512(Index m, Index n, Index k, Scalar *alpha
 }
 // Template specializations of GEBP kernels with nr = 8.
 #if EIGEN_USE_AVX512_GEMM_KERNELS
 template <bool ConjLhs_, bool ConjRhs_, int PacketSize_>
 class gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Target, PacketSize_>
    : public gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_> {
--- a/Eigen/src/Core/arch/AVX512/TrsmKernel.h
+++ b/Eigen/src/Core/arch/AVX512/TrsmKernel.h
@@ -44,6 +44,8 @@
 namespace Eigen {
 namespace internal {
 #if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
 #define EIGEN_AVX_MAX_NUM_ACC (int64_t(24))
 #define EIGEN_AVX_MAX_NUM_ROW (int64_t(8))  // Denoted L in code.
 #define EIGEN_AVX_MAX_K_UNROL (int64_t(4))
@@ -58,7 +60,6 @@ typedef Packet4d vecHalfDouble;
 // Note: this depends on macros and typedefs above.
 #include "TrsmUnrolls.inc"
 #if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
 /**
 * For smaller problem sizes, and certain compilers, using the optimized kernels trsmKernelL/R directly
 * is faster than the packed versions in TriangularSolverMatrix.h.
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -141,7 +141,8 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, C
  std::ptrdiff_t l1, l2, l3;
  manage_caching_sizes(GetAction, &l1, &l2, &l3);
-#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
+#if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_L_KERNELS) && EIGEN_USE_AVX512_TRSM_L_KERNELS && \
    EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
  EIGEN_IF_CONSTEXPR(
      (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
    // Very rough cutoffs to determine when to call trsm w/o packing
@@ -209,7 +210,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, C
        // tr solve
        {
          Index i = IsLower ? k2 + k1 : k2 - k1;
-#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS
+#if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_L_KERNELS) && EIGEN_USE_AVX512_TRSM_L_KERNELS
          EIGEN_IF_CONSTEXPR(
              (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
            i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
@@ -273,7 +274,8 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheRight, Mode,
                                                                      level3_blocking<Scalar, Scalar>& blocking) {
  Index rows = otherSize;
-#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_R_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
+#if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_R_KERNELS) && EIGEN_USE_AVX512_TRSM_R_KERNELS && \
    EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
  EIGEN_IF_CONSTEXPR(
      (OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
    // TODO: Investigate better heuristics for cutoffs.
--- a/ci/build.linux.gitlab-ci.yml
+++ b/ci/build.linux.gitlab-ci.yml
@@ -97,6 +97,18 @@ build:linux:cross:x86-64:clang-12:avx512dq:
  variables:
    EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on"
 # Generic vector extension backend.
 build:linux:cross:x86-64:clang-19:generic:avx512dq:
  image: ubuntu:24.04
  extends: build:linux:cross:x86-64:clang-12:default
  variables:
    EIGEN_CI_INSTALL: clang-19
    EIGEN_CI_C_COMPILER: clang-19
    EIGEN_CI_CXX_COMPILER: clang++-19
    EIGEN_CI_CROSS_INSTALL: g++-14-x86-64-linux-gnu clang-19
    EIGEN_CI_ADDITIONAL_ARGS: >
      -DEIGEN_TEST_CUSTOM_CXX_FLAGS=-mfma;-mavx512dq;-DEIGEN_VECTORIZE_GENERIC=1
 build:linux:docs:
  extends: .build:linux:cross
  variables:
--- a/ci/test.linux.gitlab-ci.yml
+++ b/ci/test.linux.gitlab-ci.yml
@@ -197,6 +197,29 @@ test:linux:x86-64:clang-12:avx512dq:unsupported:
  variables:
    EIGEN_CI_CTEST_LABEL: Unsupported
 # Generic vector extension backend.
 .test:linux:x86-64:clang-19:generic:avx512dq:
  image: ubuntu:24.04
  extends: .test:linux:x86-64
  needs: [ build:linux:cross:x86-64:clang-19:generic:avx512dq ]
  variables:
    EIGEN_CI_INSTALL: clang-19
  tags:
    - eigen-runner
    - linux
    - x86-64
    - avx512
 test:linux:x86-64:clang-19:generic:avx512dq:official:
  extends: .test:linux:x86-64:clang-19:generic:avx512dq
  variables:
    EIGEN_CI_CTEST_LABEL: Official
 test:linux:x86-64:clang-19:generic:avx512dq:unsupported:
  extends: .test:linux:x86-64:clang-19:generic:avx512dq
  variables:
    EIGEN_CI_CTEST_LABEL: Unsupported
 ##### CUDA #####################################################################
 .test:linux:cuda:
  extends: .test:linux