mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-01-18 17:31:19 +01:00
Implement assume_aligned using the standard API
This implements `Eigen::internal::assume_aligned` to match the API for C++20 standard as best as possible using either `std::assume_aligned` or `__builtin_assume_aligned` if available. If neither is available, the function is a no-op. The override macro `EIGEN_ASSUME_ALIGNED` was changed to a `EIGEN_DONT_ASSUME_ALIGNED`, which now forces the function to be a no-op. See merge request libeigen/eigen!2052 (cherry picked from commit8716f109e4)f8191848Fix pcmp_* for HVX to to comply with the new definition of true = Scalar(1).7cc169d9Revert "Fix pcmp_* for HVX to to comply with the new definition of true = Scalar(1)."06999845Merge branch eigen:master into masterb5fde61fMerge branch eigen:master into master10d10d60Merge branch eigen:master into master9398d6adMerge branch eigen:master into master7804d5a4Merge branch eigen:master into master0068623cMerge branch eigen:master into masterb0ffc9cfMerge branch eigen:master into masterf3791c80Merge branch eigen:master into master74275f0cMerge branch eigen:master into masterb095614eMerge branch eigen:master into master1312a696Merge branch eigen:master into mastere6dd44d2Merge branch eigen:master into master8ac67769Implement assume_aligned using the standard API if available.97b299faFormat.b31798beFix typos.04b3d312Unformat. Co-authored-by: Rasmus Munk Larsen <rmlarsen@google.com>
This commit is contained in:
@@ -48,7 +48,7 @@ struct Packet2cf {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<std::complex<float> > : default_packet_traits {
|
||||
struct packet_traits<std::complex<float>> : default_packet_traits {
|
||||
typedef Packet2cf type;
|
||||
typedef Packet1cf half;
|
||||
enum {
|
||||
@@ -280,13 +280,13 @@ EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packe
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet1cf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload<Packet2f>((const float*)from));
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(
|
||||
pload<Packet2f>(reinterpret_cast<const float*>(assume_aligned<unpacket_traits<Packet1cf>::alignment>(from))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet2cf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(reinterpret_cast<const float*>(from)));
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(
|
||||
pload<Packet4f>(reinterpret_cast<const float*>(assume_aligned<unpacket_traits<Packet2cf>::alignment>(from))));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -308,22 +308,22 @@ EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* fro
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet1cf& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet1cf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v);
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<float>>(std::complex<float>* to, const Packet1cf& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore(
|
||||
reinterpret_cast<float*>(assume_aligned<unpacket_traits<Packet1cf>::alignment>(to), from.v));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet2cf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(to), from.v);
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<float>>(std::complex<float>* to, const Packet2cf& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(assume_aligned<unpacket_traits<Packet2cf>::alignment>(to)),
|
||||
from.v);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet1cf& from) {
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<float>>(std::complex<float>* to, const Packet1cf& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<float>>(std::complex<float>* to, const Packet2cf& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v);
|
||||
}
|
||||
|
||||
@@ -356,7 +356,7 @@ EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::comp
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float>* addr) {
|
||||
EIGEN_STRONG_INLINE void prefetch<std::complex<float>>(const std::complex<float>* addr) {
|
||||
EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr));
|
||||
}
|
||||
|
||||
@@ -501,7 +501,7 @@ struct Packet1cd {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<std::complex<double> > : default_packet_traits {
|
||||
struct packet_traits<std::complex<double>> : default_packet_traits {
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
@@ -531,8 +531,8 @@ struct unpacket_traits<Packet1cd> : neon_unpacket_default<Packet1cd, std::comple
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet1cd>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>(reinterpret_cast<const double*>(from)));
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(
|
||||
pload<Packet2d>(reinterpret_cast<const double*>(assume_aligned<unpacket_traits<Packet1cd>::alignment>(from))));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -644,18 +644,18 @@ EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* fr
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet1cd>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<double*>(to), from.v);
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<double>>(std::complex<double>* to, const Packet1cd& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore(
|
||||
reinterpret_cast<double*>(assume_aligned<unpacket_traits<Packet1cd>::alignment>(to), from.v));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<double>>(std::complex<double>* to, const Packet1cd& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double>* addr) {
|
||||
EIGEN_STRONG_INLINE void prefetch<std::complex<double>>(const std::complex<double>* addr) {
|
||||
EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr));
|
||||
}
|
||||
|
||||
@@ -677,7 +677,7 @@ EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::com
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) {
|
||||
EIGEN_ALIGN16 std::complex<double> res;
|
||||
pstore<std::complex<double> >(&res, a);
|
||||
pstore<std::complex<double>>(&res, a);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
@@ -2268,13 +2268,11 @@ EIGEN_STRONG_INLINE Packet2ul plogical_shift_left(Packet2ul a) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pload<Packet2f>(const float* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet2f>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_f32(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_f32(assume_aligned<unpacket_traits<Packet2f>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet4f>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(assume_aligned<unpacket_traits<Packet4f>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4c pload<Packet4c>(const int8_t* from) {
|
||||
@@ -2284,13 +2282,11 @@ EIGEN_STRONG_INLINE Packet4c pload<Packet4c>(const int8_t* from) {
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8c pload<Packet8c>(const int8_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet8c>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_s8(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_s8(assume_aligned<unpacket_traits<Packet8c>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pload<Packet16c>(const int8_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet16c>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s8(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s8(assume_aligned<unpacket_traits<Packet16c>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4uc pload<Packet4uc>(const uint8_t* from) {
|
||||
@@ -2300,63 +2296,51 @@ EIGEN_STRONG_INLINE Packet4uc pload<Packet4uc>(const uint8_t* from) {
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8uc pload<Packet8uc>(const uint8_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet8uc>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_u8(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_u8(assume_aligned<unpacket_traits<Packet8uc>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pload<Packet16uc>(const uint8_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet16uc>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u8(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u8(assume_aligned<unpacket_traits<Packet16uc>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4s pload<Packet4s>(const int16_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet4s>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_s16(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_s16(assume_aligned<unpacket_traits<Packet4s>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pload<Packet8s>(const int16_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet8s>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s16(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s16(assume_aligned<unpacket_traits<Packet8s>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4us pload<Packet4us>(const uint16_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet4us>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_u16(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_u16(assume_aligned<unpacket_traits<Packet4us>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pload<Packet8us>(const uint16_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet8us>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u16(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u16(assume_aligned<unpacket_traits<Packet8us>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2i pload<Packet2i>(const int32_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet2i>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_s32(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_s32(assume_aligned<unpacket_traits<Packet2i>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet4i>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(assume_aligned<unpacket_traits<Packet4i>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ui pload<Packet2ui>(const uint32_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet2ui>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_u32(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_u32(assume_aligned<unpacket_traits<Packet2ui>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pload<Packet4ui>(const uint32_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet4ui>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u32(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u32(assume_aligned<unpacket_traits<Packet4ui>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pload<Packet2l>(const int64_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet2l>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s64(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s64(assume_aligned<unpacket_traits<Packet2l>::alignment>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pload<Packet2ul>(const uint64_t* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet2ul>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u64(from);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_u64(assume_aligned<unpacket_traits<Packet2ul>::alignment>(from));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -2580,13 +2564,11 @@ EIGEN_STRONG_INLINE Packet4ui ploadquad<Packet4ui>(const uint32_t* from) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet2f& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet2f>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_f32(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_f32(assume_aligned<unpacket_traits<Packet2f>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet4f>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(assume_aligned<unpacket_traits<Packet4f>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int8_t>(int8_t* to, const Packet4c& from) {
|
||||
@@ -2594,13 +2576,11 @@ EIGEN_STRONG_INLINE void pstore<int8_t>(int8_t* to, const Packet4c& from) {
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int8_t>(int8_t* to, const Packet8c& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet8c>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_s8(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_s8(assume_aligned<unpacket_traits<Packetc>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int8_t>(int8_t* to, const Packet16c& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet16c>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_s8(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_s8(assume_aligned<unpacket_traits<Packet16c>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<uint8_t>(uint8_t* to, const Packet4uc& from) {
|
||||
@@ -2608,63 +2588,51 @@ EIGEN_STRONG_INLINE void pstore<uint8_t>(uint8_t* to, const Packet4uc& from) {
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<uint8_t>(uint8_t* to, const Packet8uc& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet8uc>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_u8(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_u8(assume_aligned<unpacket_traits<Packet8uc>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<uint8_t>(uint8_t* to, const Packet16uc& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet16uc>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_u8(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_u8(assume_aligned<unpacket_traits<Packet16uc>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int16_t>(int16_t* to, const Packet4s& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet4s>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_s16(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_s16(assume_aligned<unpacket_traits<Packet4s>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int16_t>(int16_t* to, const Packet8s& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet8s>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_s16(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_s16(assume_aligned<unpacket_traits<Packet8s>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<uint16_t>(uint16_t* to, const Packet4us& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet4us>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_u16(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_u16(assume_aligned<unpacket_traits<Packet4us>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<uint16_t>(uint16_t* to, const Packet8us& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet8us>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_u16(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_u16(assume_aligned<unpacket_traits<Packet8us>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet2i& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet2i>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_s32(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_s32(assume_aligned<unpacket_traits<Packet2i>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet4i>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(assume_aligned<unpacket_traits<Packet4i>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<uint32_t>(uint32_t* to, const Packet2ui& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet2ui>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_u32(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_u32(assume_aligned<unpacket_traits<Packet2ui>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<uint32_t>(uint32_t* to, const Packet4ui& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet4ui>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_u32(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_u32(assume_aligned<unpacket_traits<Packet4ui>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int64_t>(int64_t* to, const Packet2l& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet2l>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_s64(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_s64(assume_aligned<unpacket_traits<Packet2l>::alignment>(to), from);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<uint64_t>(uint64_t* to, const Packet2ul& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet2ul>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_u64(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_u64(assume_aligned<unpacket_traits<Packet2ul>::alignment>(to), from);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -4739,8 +4707,8 @@ EIGEN_STRONG_INLINE bfloat16 pfirst<Packet4bf>(const Packet4bf& from) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4bf pload<Packet4bf>(const bfloat16* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet4bf>::alignment);
|
||||
return Packet4bf(pload<Packet4us>(reinterpret_cast<const uint16_t*>(from)));
|
||||
unpacket_traits<Packet4bf>::alignment);
|
||||
return Packet4bf(pload<Packet4us>(reinterpret_cast<const uint16_t*>(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -4750,8 +4718,8 @@ EIGEN_STRONG_INLINE Packet4bf ploadu<Packet4bf>(const bfloat16* from) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<bfloat16>(bfloat16* to, const Packet4bf& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet4bf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_u16(reinterpret_cast<uint16_t*>(to), from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_u16(
|
||||
reinterpret_cast<uint16_t*>(assume_aligned<unpacket_traits<Packet4bf>::alignment>(to)), from);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -5240,8 +5208,8 @@ EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet2d>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from);
|
||||
unpacket_traits<Packet2d>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -5255,8 +5223,7 @@ EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet2d>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(assume_aligned<unpacket_traits<Packet2d>::alignment>(to), from);
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -5784,14 +5751,14 @@ EIGEN_STRONG_INLINE Packet4hf pandnot<Packet4hf>(const Packet4hf& a, const Packe
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8hf pload<Packet8hf>(const Eigen::half* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet8hf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f16(reinterpret_cast<const float16_t*>(from));
|
||||
unpacket_traits<Packet8hf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f16(reinterpret_cast<const float16_t*>(from));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4hf pload<Packet4hf>(const Eigen::half* from) {
|
||||
EIGEN_ASSUME_ALIGNED(from, unpacket_traits<Packet4hf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_f16(reinterpret_cast<const float16_t*>(from));
|
||||
unpacket_traits<Packet4hf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return vld1_f16(reinterpret_cast<const float16_t*>(from));
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -5866,14 +5833,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4hf pinsertlast(const Packet4hf& a,
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet8hf& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet8hf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_f16(reinterpret_cast<float16_t*>(to), from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1q_f16(
|
||||
reinterpret_cast<float16_t*>(assume_aligned<unpacket_traits<Packet8hf>::alignment>(to)), from);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet4hf& from) {
|
||||
EIGEN_ASSUME_ALIGNED(to, unpacket_traits<Packet4hf>::alignment);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_f16(reinterpret_cast<float16_t*>(to), from);
|
||||
EIGEN_DEBUG_ALIGNED_STORE vst1_f16(
|
||||
reinterpret_cast<float16_t*>(assume_aligned<unpacket_traits<Packet4hf>::alignment>(to)), from);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -1354,19 +1354,21 @@ EIGEN_DEVICE_FUNC void destroy_at(T* p) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \internal
|
||||
* This informs the implementation that PTR is aligned to at least ALIGN_BYTES
|
||||
*/
|
||||
#ifndef EIGEN_ASSUME_ALIGNED
|
||||
#if defined(__cpp_lib_assume_aligned) && (__cpp_lib_assume_aligned >= 201811L)
|
||||
#define EIGEN_ASSUME_ALIGNED(PTR, ALIGN_BYTES) \
|
||||
{ PTR = std::assume_aligned<ALIGN_BYTES>(PTR); }
|
||||
#elif EIGEN_HAS_BUILTIN(__builtin_assume_aligned)
|
||||
#define EIGEN_ASSUME_ALIGNED(PTR, ALIGN_BYTES) \
|
||||
{ PTR = static_cast<decltype(PTR)>(__builtin_assume_aligned(PTR, ALIGN_BYTES)); }
|
||||
#if !defined(EIGEN_DONT_ASSUME_ALIGNED) && defined(__cpp_lib_assume_aligned) && (__cpp_lib_assume_aligned >= 201811L)
|
||||
template <std::size_t N, typename T>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
|
||||
return std::assume_aligned<N, T>(ptr);
|
||||
}
|
||||
#elif !defined(EIGEN_DONT_ASSUME_ALIGNED) && EIGEN_HAS_BUILTIN(__builtin_assume_aligned)
|
||||
template <std::size_t N, typename T>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC T* assume_aligned(T* ptr) {
|
||||
return static_cast<T*>(__builtin_assume_aligned(ptr, N));
|
||||
}
|
||||
#else
|
||||
#define EIGEN_ASSUME_ALIGNED(PTR, ALIGN_BYTES) /* do nothing */
|
||||
#endif
|
||||
template <std::size_t N, typename T>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr T* assume_aligned(T* ptr) {
|
||||
return ptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
Reference in New Issue
Block a user