From 7f85d5cf5b0008af491f1a536518b7c2eed37a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Wed, 5 Nov 2025 21:26:04 +0000 Subject: [PATCH] Implement assume_aligned using the standard API This implements `Eigen::internal::assume_aligned` to match the API for C++20 standard as best as possible using either `std::assume_aligned` or `__builtin_assume_aligned` if available. If neither is available, the function is a no-op. The override macro `EIGEN_ASSUME_ALIGNED` was changed to a `EIGEN_DONT_ASSUME_ALIGNED`, which now forces the function to be a no-op. See merge request libeigen/eigen!2052 (cherry picked from commit 8716f109e465d4122e61be9087b17a48bf58ca45) f8191848 Fix pcmp_* for HVX to to comply with the new definition of true = Scalar(1). 7cc169d9 Revert "Fix pcmp_* for HVX to to comply with the new definition of true = Scalar(1)." 06999845 Merge branch eigen:master into master b5fde61f Merge branch eigen:master into master 10d10d60 Merge branch eigen:master into master 9398d6ad Merge branch eigen:master into master 7804d5a4 Merge branch eigen:master into master 0068623c Merge branch eigen:master into master b0ffc9cf Merge branch eigen:master into master f3791c80 Merge branch eigen:master into master 74275f0c Merge branch eigen:master into master b095614e Merge branch eigen:master into master 1312a696 Merge branch eigen:master into master e6dd44d2 Merge branch eigen:master into master 8ac67769 Implement assume_aligned using the standard API if available. 97b299fa Format. b31798be Fix typos. 04b3d312 Unformat. Co-authored-by: Rasmus Munk Larsen --- Eigen/src/Core/arch/NEON/Complex.h | 46 +++++----- Eigen/src/Core/arch/NEON/PacketMath.h | 127 ++++++++++---------------- Eigen/src/Core/util/Memory.h | 26 +++--- 3 files changed, 84 insertions(+), 115 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index f3f6a1a1b..8b2c5ffe3 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -48,7 +48,7 @@ struct Packet2cf { }; template <> -struct packet_traits > : default_packet_traits { +struct packet_traits> : default_packet_traits { typedef Packet2cf type; typedef Packet1cf half; enum { @@ -280,13 +280,13 @@ EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packe template <> EIGEN_STRONG_INLINE Packet1cf pload(const std::complex* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload((const float*)from)); + EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf( + pload(reinterpret_cast(assume_aligned::alignment>(from)))); } template <> EIGEN_STRONG_INLINE Packet2cf pload(const std::complex* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload(reinterpret_cast(from))); + EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf( + pload(reinterpret_cast(assume_aligned::alignment>(from)))); } template <> @@ -308,22 +308,22 @@ EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* fro } template <> -EIGEN_STRONG_INLINE void pstore >(std::complex* to, const Packet1cf& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); +EIGEN_STRONG_INLINE void pstore>(std::complex* to, const Packet1cf& from) { + EIGEN_DEBUG_ALIGNED_STORE pstore( + reinterpret_cast(assume_aligned::alignment>(to), from.v)); } template <> -EIGEN_STRONG_INLINE void pstore >(std::complex* to, const Packet2cf& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast(to), from.v); +EIGEN_STRONG_INLINE void pstore>(std::complex* to, const Packet2cf& from) { + EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast(assume_aligned::alignment>(to)), + from.v); } template <> -EIGEN_STRONG_INLINE void pstoreu >(std::complex* to, const Packet1cf& from) { +EIGEN_STRONG_INLINE void pstoreu>(std::complex* to, const Packet1cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } template <> -EIGEN_STRONG_INLINE void pstoreu >(std::complex* to, const Packet2cf& from) { +EIGEN_STRONG_INLINE void pstoreu>(std::complex* to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), from.v); } @@ -356,7 +356,7 @@ EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf>(std::comp } template <> -EIGEN_STRONG_INLINE void prefetch >(const std::complex* addr) { +EIGEN_STRONG_INLINE void prefetch>(const std::complex* addr) { EIGEN_ARM_PREFETCH(reinterpret_cast(addr)); } @@ -501,7 +501,7 @@ struct Packet1cd { }; template <> -struct packet_traits > : default_packet_traits { +struct packet_traits> : default_packet_traits { typedef Packet1cd type; typedef Packet1cd half; enum { @@ -531,8 +531,8 @@ struct unpacket_traits : neon_unpacket_default EIGEN_STRONG_INLINE Packet1cd pload(const std::complex* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload(reinterpret_cast(from))); + EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd( + pload(reinterpret_cast(assume_aligned::alignment>(from)))); } template <> @@ -644,18 +644,18 @@ EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* fr } template <> -EIGEN_STRONG_INLINE void pstore >(std::complex* to, const Packet1cd& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast(to), from.v); +EIGEN_STRONG_INLINE void pstore>(std::complex* to, const Packet1cd& from) { + EIGEN_DEBUG_ALIGNED_STORE pstore( + reinterpret_cast(assume_aligned::alignment>(to), from.v)); } template <> -EIGEN_STRONG_INLINE void pstoreu >(std::complex* to, const Packet1cd& from) { +EIGEN_STRONG_INLINE void pstoreu>(std::complex* to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), from.v); } template <> -EIGEN_STRONG_INLINE void prefetch >(const std::complex* addr) { +EIGEN_STRONG_INLINE void prefetch>(const std::complex* addr) { EIGEN_ARM_PREFETCH(reinterpret_cast(addr)); } @@ -677,7 +677,7 @@ EIGEN_DEVICE_FUNC inline void pscatter, Packet1cd>(std::com template <> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) { EIGEN_ALIGN16 std::complex res; - pstore >(&res, a); + pstore>(&res, a); return res; } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index bea50a3ef..8a3fb5be0 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -2268,13 +2268,11 @@ EIGEN_STRONG_INLINE Packet2ul plogical_shift_left(Packet2ul a) { template <> EIGEN_STRONG_INLINE Packet2f pload(const float* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1_f32(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1_f32(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet4c pload(const int8_t* from) { @@ -2284,13 +2282,11 @@ EIGEN_STRONG_INLINE Packet4c pload(const int8_t* from) { } template <> EIGEN_STRONG_INLINE Packet8c pload(const int8_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1_s8(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1_s8(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet16c pload(const int8_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s8(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s8(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet4uc pload(const uint8_t* from) { @@ -2300,63 +2296,51 @@ EIGEN_STRONG_INLINE Packet4uc pload(const uint8_t* from) { } template <> EIGEN_STRONG_INLINE Packet8uc pload(const uint8_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1_u8(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1_u8(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet16uc pload(const uint8_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u8(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u8(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet4s pload(const int16_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1_s16(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1_s16(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet8s pload(const int16_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s16(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s16(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet4us pload(const uint16_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1_u16(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1_u16(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet8us pload(const uint16_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u16(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u16(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet2i pload(const int32_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1_s32(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1_s32(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet4i pload(const int32_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet2ui pload(const uint32_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1_u32(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1_u32(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet4ui pload(const uint32_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u32(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u32(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet2l pload(const int64_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s64(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s64(assume_aligned::alignment>(from)); } template <> EIGEN_STRONG_INLINE Packet2ul pload(const uint64_t* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u64(from); + EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u64(assume_aligned::alignment>(from)); } template <> @@ -2580,13 +2564,11 @@ EIGEN_STRONG_INLINE Packet4ui ploadquad(const uint32_t* from) { template <> EIGEN_STRONG_INLINE void pstore(float* to, const Packet2f& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_f32(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1_f32(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(int8_t* to, const Packet4c& from) { @@ -2594,13 +2576,11 @@ EIGEN_STRONG_INLINE void pstore(int8_t* to, const Packet4c& from) { } template <> EIGEN_STRONG_INLINE void pstore(int8_t* to, const Packet8c& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_s8(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1_s8(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(int8_t* to, const Packet16c& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_s8(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_s8(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(uint8_t* to, const Packet4uc& from) { @@ -2608,63 +2588,51 @@ EIGEN_STRONG_INLINE void pstore(uint8_t* to, const Packet4uc& from) { } template <> EIGEN_STRONG_INLINE void pstore(uint8_t* to, const Packet8uc& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_u8(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1_u8(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(uint8_t* to, const Packet16uc& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_u8(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_u8(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(int16_t* to, const Packet4s& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_s16(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1_s16(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(int16_t* to, const Packet8s& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_s16(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_s16(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(uint16_t* to, const Packet4us& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_u16(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1_u16(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(uint16_t* to, const Packet8us& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_u16(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_u16(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(int32_t* to, const Packet2i& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_s32(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1_s32(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(int32_t* to, const Packet4i& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(uint32_t* to, const Packet2ui& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_u32(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1_u32(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(uint32_t* to, const Packet4ui& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_u32(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_u32(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(int64_t* to, const Packet2l& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_s64(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_s64(assume_aligned::alignment>(to), from); } template <> EIGEN_STRONG_INLINE void pstore(uint64_t* to, const Packet2ul& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_u64(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_u64(assume_aligned::alignment>(to), from); } template <> @@ -4739,8 +4707,8 @@ EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet4bf& from) { template <> EIGEN_STRONG_INLINE Packet4bf pload(const bfloat16* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - return Packet4bf(pload(reinterpret_cast(from))); +unpacket_traits::alignment); +return Packet4bf(pload(reinterpret_cast(from))); } template <> @@ -4750,8 +4718,8 @@ EIGEN_STRONG_INLINE Packet4bf ploadu(const bfloat16* from) { template <> EIGEN_STRONG_INLINE void pstore(bfloat16* to, const Packet4bf& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_u16(reinterpret_cast(to), from); + EIGEN_DEBUG_ALIGNED_STORE vst1_u16( + reinterpret_cast(assume_aligned::alignment>(to)), from); } template <> @@ -5240,8 +5208,8 @@ EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { template <> EIGEN_STRONG_INLINE Packet2d pload(const double* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); +unpacket_traits::alignment); +EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); } template <> @@ -5255,8 +5223,7 @@ EIGEN_STRONG_INLINE Packet2d ploaddup(const double* from) { } template <> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(assume_aligned::alignment>(to), from); } template <> @@ -5784,14 +5751,14 @@ EIGEN_STRONG_INLINE Packet4hf pandnot(const Packet4hf& a, const Packe template <> EIGEN_STRONG_INLINE Packet8hf pload(const Eigen::half* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f16(reinterpret_cast(from)); +unpacket_traits::alignment); +EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f16(reinterpret_cast(from)); } template <> EIGEN_STRONG_INLINE Packet4hf pload(const Eigen::half* from) { - EIGEN_ASSUME_ALIGNED(from, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_LOAD return vld1_f16(reinterpret_cast(from)); +unpacket_traits::alignment); +EIGEN_DEBUG_ALIGNED_LOAD return vld1_f16(reinterpret_cast(from)); } template <> @@ -5866,14 +5833,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4hf pinsertlast(const Packet4hf& a, template <> EIGEN_STRONG_INLINE void pstore(Eigen::half* to, const Packet8hf& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1q_f16(reinterpret_cast(to), from); + EIGEN_DEBUG_ALIGNED_STORE vst1q_f16( + reinterpret_cast(assume_aligned::alignment>(to)), from); } template <> EIGEN_STRONG_INLINE void pstore(Eigen::half* to, const Packet4hf& from) { - EIGEN_ASSUME_ALIGNED(to, unpacket_traits::alignment); - EIGEN_DEBUG_ALIGNED_STORE vst1_f16(reinterpret_cast(to), from); + EIGEN_DEBUG_ALIGNED_STORE vst1_f16( + reinterpret_cast(assume_aligned::alignment>(to)), from); } template <> diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index f8d3c42ec..b1c244c9e 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -1354,19 +1354,21 @@ EIGEN_DEVICE_FUNC void destroy_at(T* p) { } #endif -/** \internal - * This informs the implementation that PTR is aligned to at least ALIGN_BYTES - */ -#ifndef EIGEN_ASSUME_ALIGNED -#if defined(__cpp_lib_assume_aligned) && (__cpp_lib_assume_aligned >= 201811L) -#define EIGEN_ASSUME_ALIGNED(PTR, ALIGN_BYTES) \ - { PTR = std::assume_aligned(PTR); } -#elif EIGEN_HAS_BUILTIN(__builtin_assume_aligned) -#define EIGEN_ASSUME_ALIGNED(PTR, ALIGN_BYTES) \ - { PTR = static_cast(__builtin_assume_aligned(PTR, ALIGN_BYTES)); } +#if !defined(EIGEN_DONT_ASSUME_ALIGNED) && defined(__cpp_lib_assume_aligned) && (__cpp_lib_assume_aligned >= 201811L) +template +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) { + return std::assume_aligned(ptr); +} +#elif !defined(EIGEN_DONT_ASSUME_ALIGNED) && EIGEN_HAS_BUILTIN(__builtin_assume_aligned) +template +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC T* assume_aligned(T* ptr) { + return static_cast(__builtin_assume_aligned(ptr, N)); +} #else -#define EIGEN_ASSUME_ALIGNED(PTR, ALIGN_BYTES) /* do nothing */ -#endif +template +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) { + return ptr; +} #endif } // end namespace internal