0
0
mirror of https://gitlab.com/libeigen/eigen.git synced 2026-01-18 17:31:19 +01:00

Fix undefined behavior in packetmath.

libeigen/eigen!2098

Closes #3009
This commit is contained in:
Antonio Sánchez
2025-12-18 21:08:52 +00:00
parent 748e0a6517
commit 9164d3f16a
6 changed files with 30 additions and 24 deletions

View File

@@ -79,8 +79,8 @@ EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a) {
}
template <>
EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a) {
const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000,
0x80000000, 0x00000000, 0x80000000));
const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32,
0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32));
return Packet4cf(_mm256_xor_ps(a.v, mask));
}
@@ -282,7 +282,8 @@ EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) {
}
template <>
EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a) {
const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0));
const __m256d mask =
_mm256_castsi256_pd(_mm256_set_epi32(SIGN_MASK_I32, 0x0, 0x0, 0x0, SIGN_MASK_I32, 0x0, 0x0, 0x0));
return Packet2cd(_mm256_xor_pd(a.v, mask));
}

View File

@@ -46,6 +46,8 @@ typedef eigen_packet_wrapper<__m256i, 3> Packet4l;
typedef eigen_packet_wrapper<__m256i, 5> Packet4ul;
#endif
#define SIGN_MASK_I64 static_cast<int64_t>(0x8000000000000000ULL)
template <>
struct is_arithmetic<__m256> {
enum { value = true };
@@ -875,12 +877,12 @@ EIGEN_STRONG_INLINE Packet8ui psub<Packet8ui>(const Packet8ui& a, const Packet8u
template <>
EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a) {
const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(SIGN_MASK_I32));
return _mm256_xor_ps(a, mask);
}
template <>
EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a) {
const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000ULL));
const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(SIGN_MASK_I64));
return _mm256_xor_pd(a, mask);
}
template <>

View File

@@ -82,8 +82,8 @@ EIGEN_STRONG_INLINE Packet8cf pnegate(const Packet8cf& a) {
template <>
EIGEN_STRONG_INLINE Packet8cf pconj(const Packet8cf& a) {
const __m512 mask = _mm512_castsi512_ps(_mm512_setr_epi32(
0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000,
0x80000000, 0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000, 0x80000000));
0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32,
0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32, 0x00000000, SIGN_MASK_I32));
return Packet8cf(pxor(a.v, mask));
}
@@ -262,8 +262,9 @@ EIGEN_STRONG_INLINE Packet4cd pnegate(const Packet4cd& a) {
}
template <>
EIGEN_STRONG_INLINE Packet4cd pconj(const Packet4cd& a) {
const __m512d mask = _mm512_castsi512_pd(_mm512_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0,
0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0));
const __m512d mask =
_mm512_castsi512_pd(_mm512_set_epi32(SIGN_MASK_I32, 0x0, 0x0, 0x0, SIGN_MASK_I32, 0x0, 0x0, 0x0, SIGN_MASK_I32,
0x0, 0x0, 0x0, SIGN_MASK_I32, 0x0, 0x0, 0x0));
return Packet4cd(pxor(a.v, mask));
}

View File

@@ -443,15 +443,15 @@ EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
// The intel docs give it a relatively high latency as well, so we're probably
// better off with using _mm512_set_epi32 directly anyways.
const __m512i mask =
_mm512_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000);
_mm512_set_epi32(SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32,
SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32,
SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32);
return _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), mask));
}
template <>
EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
const __m512i mask =
_mm512_set_epi64(0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL,
0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL);
const __m512i mask = _mm512_set_epi64(SIGN_MASK_I64, SIGN_MASK_I64, SIGN_MASK_I64, SIGN_MASK_I64, SIGN_MASK_I64,
SIGN_MASK_I64, SIGN_MASK_I64, SIGN_MASK_I64);
return _mm512_castsi512_pd(_mm512_xor_epi64(_mm512_castpd_si512(a), mask));
}
template <>
@@ -770,22 +770,22 @@ EIGEN_STRONG_INLINE Packet8l pcmp_lt(const Packet8l& a, const Packet8l& b) {
template <>
EIGEN_STRONG_INLINE Packet8d pcmp_eq(const Packet8d& a, const Packet8d& b) {
__mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ);
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu));
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, int64_t(-1)));
}
template <>
EIGEN_STRONG_INLINE Packet8d pcmp_le(const Packet8d& a, const Packet8d& b) {
__mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_LE_OQ);
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu));
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, int64_t(-1)));
}
template <>
EIGEN_STRONG_INLINE Packet8d pcmp_lt(const Packet8d& a, const Packet8d& b) {
__mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ);
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu));
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, int64_t(-1)));
}
template <>
EIGEN_STRONG_INLINE Packet8d pcmp_lt_or_nan(const Packet8d& a, const Packet8d& b) {
__mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_NGE_UQ);
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, 0xffffffffffffffffu));
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_setzero_epi32(), mask, int64_t(-1)));
}
template <>

View File

@@ -277,7 +277,7 @@ EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
}
template <>
EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000, 0x0, 0x0, 0x0));
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(static_cast<int32_t>(0x80000000), 0x0, 0x0, 0x0));
return Packet1cd(_mm_xor_pd(a.v, mask));
}

View File

@@ -91,6 +91,8 @@ struct shuffle_mask {
enum { mask = (s) << 6 | (r) << 4 | (q) << 2 | (p) };
};
#define SIGN_MASK_I32 static_cast<int32_t>(0x80000000)
// TODO: change the implementation of all swizzle* ops from macro to template,
#define vec4f_swizzle1(v, p, q, r, s) \
Packet4f(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v), (shuffle_mask<p, q, r, s>::mask))))
@@ -560,7 +562,7 @@ EIGEN_STRONG_INLINE Packet4f paddsub<Packet4f>(const Packet4f& a, const Packet4f
#ifdef EIGEN_VECTORIZE_SSE3
return _mm_addsub_ps(a, b);
#else
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x0, 0x80000000, 0x0));
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(SIGN_MASK_I32, 0x0, SIGN_MASK_I32, 0x0));
return padd(a, pxor(mask, b));
#endif
}
@@ -572,19 +574,19 @@ EIGEN_STRONG_INLINE Packet2d paddsub<Packet2d>(const Packet2d& a, const Packet2d
#ifdef EIGEN_VECTORIZE_SSE3
return _mm_addsub_pd(a, b);
#else
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, 0x80000000, 0x0, 0x0));
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, SIGN_MASK_I32, 0x0, 0x0));
return padd(a, pxor(mask, b));
#endif
}
template <>
EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) {
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000));
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32, SIGN_MASK_I32));
return _mm_xor_ps(a, mask);
}
template <>
EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) {
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, 0x80000000, 0x0, 0x80000000));
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0, SIGN_MASK_I32, 0x0, SIGN_MASK_I32));
return _mm_xor_pd(a, mask);
}
template <>
@@ -1249,7 +1251,7 @@ EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) {
}
template <>
EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) {
const __m128i mask = _mm_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF);
const __m128i mask = _mm_setr_epi32(-1, 0x7FFFFFFF, -1, 0x7FFFFFFF);
return _mm_castsi128_pd(_mm_and_si128(mask, _mm_castpd_si128(a)));
}
template <>