mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-01-18 17:31:19 +01:00
Enable generic clang backend tests.
<!-- Thanks for contributing a merge request! We recommend that first-time contributors read our [contribution guidelines](https://eigen.tuxfamily.org/index.php?title=Contributing_to_Eigen). Before submitting the MR, please complete the following checks: - Create one PR per feature or bugfix, - Run the test suite to verify your changes. See our [test guidelines](https://eigen.tuxfamily.org/index.php?title=Tests). - Add tests to cover the bug addressed or any new feature. - Document new features. If it is a substantial change, add it to the [Changelog](https://gitlab.com/libeigen/eigen/-/blob/master/CHANGELOG.md). - Leave the following box checked when submitting: `Allow commits from members who can merge to the target branch`. This allows us to rebase and merge your change. Note that we are a team of volunteers; we appreciate your patience during the review process. --> ### Description <!--Please explain your changes.--> Enable generic clang backend tests. Added an AVX512 job using the generic clang backend. Also fixed up some guards in the custom AVX512 gemm/trsm kernels so they don't start defining things if they aren't used. See merge request libeigen/eigen!2063
This commit is contained in:
committed by
Rasmus Munk Larsen
parent
3368ac6c69
commit
ed989c7504
@@ -35,6 +35,8 @@
|
|||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
#if EIGEN_USE_AVX512_GEMM_KERNELS
|
||||||
|
|
||||||
template <typename Scalar, bool is_unit_inc>
|
template <typename Scalar, bool is_unit_inc>
|
||||||
class gemm_class {
|
class gemm_class {
|
||||||
using vec = typename packet_traits<Scalar>::type;
|
using vec = typename packet_traits<Scalar>::type;
|
||||||
@@ -947,7 +949,6 @@ EIGEN_DONT_INLINE void gemm_kern_avx512(Index m, Index n, Index k, Scalar *alpha
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Template specializations of GEBP kernels with nr = 8.
|
// Template specializations of GEBP kernels with nr = 8.
|
||||||
#if EIGEN_USE_AVX512_GEMM_KERNELS
|
|
||||||
template <bool ConjLhs_, bool ConjRhs_, int PacketSize_>
|
template <bool ConjLhs_, bool ConjRhs_, int PacketSize_>
|
||||||
class gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Target, PacketSize_>
|
class gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Target, PacketSize_>
|
||||||
: public gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_> {
|
: public gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_> {
|
||||||
|
|||||||
@@ -44,6 +44,8 @@
|
|||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
#if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
|
||||||
|
|
||||||
#define EIGEN_AVX_MAX_NUM_ACC (int64_t(24))
|
#define EIGEN_AVX_MAX_NUM_ACC (int64_t(24))
|
||||||
#define EIGEN_AVX_MAX_NUM_ROW (int64_t(8)) // Denoted L in code.
|
#define EIGEN_AVX_MAX_NUM_ROW (int64_t(8)) // Denoted L in code.
|
||||||
#define EIGEN_AVX_MAX_K_UNROL (int64_t(4))
|
#define EIGEN_AVX_MAX_K_UNROL (int64_t(4))
|
||||||
@@ -58,7 +60,6 @@ typedef Packet4d vecHalfDouble;
|
|||||||
// Note: this depends on macros and typedefs above.
|
// Note: this depends on macros and typedefs above.
|
||||||
#include "TrsmUnrolls.inc"
|
#include "TrsmUnrolls.inc"
|
||||||
|
|
||||||
#if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
|
|
||||||
/**
|
/**
|
||||||
* For smaller problem sizes, and certain compilers, using the optimized kernels trsmKernelL/R directly
|
* For smaller problem sizes, and certain compilers, using the optimized kernels trsmKernelL/R directly
|
||||||
* is faster than the packed versions in TriangularSolverMatrix.h.
|
* is faster than the packed versions in TriangularSolverMatrix.h.
|
||||||
|
|||||||
@@ -141,7 +141,8 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, C
|
|||||||
std::ptrdiff_t l1, l2, l3;
|
std::ptrdiff_t l1, l2, l3;
|
||||||
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
manage_caching_sizes(GetAction, &l1, &l2, &l3);
|
||||||
|
|
||||||
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
|
#if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_L_KERNELS) && EIGEN_USE_AVX512_TRSM_L_KERNELS && \
|
||||||
|
EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS
|
||||||
EIGEN_IF_CONSTEXPR(
|
EIGEN_IF_CONSTEXPR(
|
||||||
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
||||||
// Very rough cutoffs to determine when to call trsm w/o packing
|
// Very rough cutoffs to determine when to call trsm w/o packing
|
||||||
@@ -209,7 +210,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheLeft, Mode, C
|
|||||||
// tr solve
|
// tr solve
|
||||||
{
|
{
|
||||||
Index i = IsLower ? k2 + k1 : k2 - k1;
|
Index i = IsLower ? k2 + k1 : k2 - k1;
|
||||||
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_L_KERNELS
|
#if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_L_KERNELS) && EIGEN_USE_AVX512_TRSM_L_KERNELS
|
||||||
EIGEN_IF_CONSTEXPR(
|
EIGEN_IF_CONSTEXPR(
|
||||||
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
||||||
i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
|
i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth;
|
||||||
@@ -273,7 +274,8 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar, Index, OnTheRight, Mode,
|
|||||||
level3_blocking<Scalar, Scalar>& blocking) {
|
level3_blocking<Scalar, Scalar>& blocking) {
|
||||||
Index rows = otherSize;
|
Index rows = otherSize;
|
||||||
|
|
||||||
#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_R_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
|
#if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_R_KERNELS) && EIGEN_USE_AVX512_TRSM_R_KERNELS && \
|
||||||
|
EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS
|
||||||
EIGEN_IF_CONSTEXPR(
|
EIGEN_IF_CONSTEXPR(
|
||||||
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
(OtherInnerStride == 1 && (std::is_same<Scalar, float>::value || std::is_same<Scalar, double>::value))) {
|
||||||
// TODO: Investigate better heuristics for cutoffs.
|
// TODO: Investigate better heuristics for cutoffs.
|
||||||
|
|||||||
@@ -97,6 +97,18 @@ build:linux:cross:x86-64:clang-12:avx512dq:
|
|||||||
variables:
|
variables:
|
||||||
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on"
|
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on"
|
||||||
|
|
||||||
|
# Generic vector extension backend.
|
||||||
|
build:linux:cross:x86-64:clang-19:generic:avx512dq:
|
||||||
|
image: ubuntu:24.04
|
||||||
|
extends: build:linux:cross:x86-64:clang-12:default
|
||||||
|
variables:
|
||||||
|
EIGEN_CI_INSTALL: clang-19
|
||||||
|
EIGEN_CI_C_COMPILER: clang-19
|
||||||
|
EIGEN_CI_CXX_COMPILER: clang++-19
|
||||||
|
EIGEN_CI_CROSS_INSTALL: g++-14-x86-64-linux-gnu clang-19
|
||||||
|
EIGEN_CI_ADDITIONAL_ARGS: >
|
||||||
|
-DEIGEN_TEST_CUSTOM_CXX_FLAGS=-mfma;-mavx512dq;-DEIGEN_VECTORIZE_GENERIC=1
|
||||||
|
|
||||||
build:linux:docs:
|
build:linux:docs:
|
||||||
extends: .build:linux:cross
|
extends: .build:linux:cross
|
||||||
variables:
|
variables:
|
||||||
|
|||||||
@@ -197,6 +197,29 @@ test:linux:x86-64:clang-12:avx512dq:unsupported:
|
|||||||
variables:
|
variables:
|
||||||
EIGEN_CI_CTEST_LABEL: Unsupported
|
EIGEN_CI_CTEST_LABEL: Unsupported
|
||||||
|
|
||||||
|
# Generic vector extension backend.
|
||||||
|
.test:linux:x86-64:clang-19:generic:avx512dq:
|
||||||
|
image: ubuntu:24.04
|
||||||
|
extends: .test:linux:x86-64
|
||||||
|
needs: [ build:linux:cross:x86-64:clang-19:generic:avx512dq ]
|
||||||
|
variables:
|
||||||
|
EIGEN_CI_INSTALL: clang-19
|
||||||
|
tags:
|
||||||
|
- eigen-runner
|
||||||
|
- linux
|
||||||
|
- x86-64
|
||||||
|
- avx512
|
||||||
|
|
||||||
|
test:linux:x86-64:clang-19:generic:avx512dq:official:
|
||||||
|
extends: .test:linux:x86-64:clang-19:generic:avx512dq
|
||||||
|
variables:
|
||||||
|
EIGEN_CI_CTEST_LABEL: Official
|
||||||
|
|
||||||
|
test:linux:x86-64:clang-19:generic:avx512dq:unsupported:
|
||||||
|
extends: .test:linux:x86-64:clang-19:generic:avx512dq
|
||||||
|
variables:
|
||||||
|
EIGEN_CI_CTEST_LABEL: Unsupported
|
||||||
|
|
||||||
##### CUDA #####################################################################
|
##### CUDA #####################################################################
|
||||||
.test:linux:cuda:
|
.test:linux:cuda:
|
||||||
extends: .test:linux
|
extends: .test:linux
|
||||||
|
|||||||
Reference in New Issue
Block a user