mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-01-18 17:31:19 +01:00
Add support for complex numbers in the generic clang backend
libeigen/eigen!2078 Co-authored-by: Rasmus Munk Larsen <rmlarsen@google.com>
This commit is contained in:
@@ -208,6 +208,7 @@ using std::ptrdiff_t;
|
||||
#if defined(EIGEN_VECTORIZE_GENERIC) && !defined(EIGEN_DONT_VECTORIZE)
|
||||
#include "src/Core/arch/clang/PacketMath.h"
|
||||
#include "src/Core/arch/clang/TypeCasting.h"
|
||||
#include "src/Core/arch/clang/Complex.h"
|
||||
#include "src/Core/arch/clang/Reductions.h"
|
||||
#include "src/Core/arch/clang/MathFunctions.h"
|
||||
#else
|
||||
|
||||
@@ -1010,12 +1010,26 @@ EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
||||
/** \internal \returns \a a with real and imaginary parts flipped (for complex types only) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
|
||||
return Packet(numext::imag(a), numext::real(a));
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with real part duplicated (for complex types only) */
|
||||
// TODO(rmlarsen): Define and use in all complex backends.
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pdupreal(const Packet& a) {
|
||||
return Packet(numext::real(a), numext::real(a));
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with imaginary part duplicated (for complex types only) */
|
||||
// TODO(rmlarsen): Define and use in all complex backends.
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pdupimag(const Packet& a) {
|
||||
return Packet(numext::imag(a), numext::imag(a));
|
||||
}
|
||||
|
||||
/**************************
|
||||
* Special math functions
|
||||
***************************/
|
||||
|
||||
@@ -1130,6 +1130,20 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_double(const Pac
|
||||
return psincos_double<false>(x);
|
||||
}
|
||||
|
||||
template <bool ComputeSin, typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
std::enable_if_t<std::is_same<typename unpacket_traits<Packet>::type, float>::value, Packet>
|
||||
psincos_selector(const Packet& x) {
|
||||
return psincos_float<ComputeSin, Packet, true>(x);
|
||||
}
|
||||
|
||||
template <bool ComputeSin, typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
std::enable_if_t<std::is_same<typename unpacket_traits<Packet>::type, double>::value, Packet>
|
||||
psincos_selector(const Packet& x) {
|
||||
return psincos_double<ComputeSin, Packet, true>(x);
|
||||
}
|
||||
|
||||
// Generic implementation of acos(x).
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos_float(const Packet& x_in) {
|
||||
@@ -1469,15 +1483,25 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Pa
|
||||
return Packet(pdiv(result_scaled.v, y_max));
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pmul_complex(const Packet& x, const Packet& y) {
|
||||
// In the following we annotate the code for the case where the inputs
|
||||
// are a pair length-2 SIMD vectors representing a single pair of complex
|
||||
// numbers x = a + i*b, y = c + i*d.
|
||||
Packet x_re = pdupreal(x); // a, a
|
||||
Packet x_im = pdupimag(x); // b, b
|
||||
Packet tmp_re = Packet(pmul(x_re.v, y.v)); // a*c, a*d
|
||||
Packet tmp_im = Packet(pmul(x_im.v, y.v)); // b*c, b*d
|
||||
tmp_im = pcplxflip(pconj(tmp_im)); // -b*d, d*c
|
||||
return padd(tmp_im, tmp_re); // a*c - b*d, a*d + b*c
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Packet& x) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename Scalar::value_type RealScalar;
|
||||
typedef typename unpacket_traits<Packet>::as_real RealPacket;
|
||||
|
||||
RealPacket real_mask_rp = peven_mask(x.v);
|
||||
Packet real_mask(real_mask_rp);
|
||||
|
||||
// Real part
|
||||
RealPacket x_flip = pcplxflip(x).v; // b, a
|
||||
Packet x_norm = phypot_complex(x); // sqrt(a^2 + b^2), sqrt(a^2 + b^2)
|
||||
@@ -1493,12 +1517,12 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Pa
|
||||
RealPacket is_any_inf = por(is_x_pos_inf, is_y_pos_inf);
|
||||
RealPacket xreal = pselect(is_any_inf, cst_pos_inf, xlogr);
|
||||
|
||||
Packet xres = pselect(real_mask, Packet(xreal), Packet(ximg)); // log(sqrt(a^2 + b^2)), atan2(b, a)
|
||||
return xres;
|
||||
return Packet(pselect(peven_mask(xreal), xreal, ximg)); // log(sqrt(a^2 + b^2)), atan2(b, a)
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& a) {
|
||||
// FIXME(rmlarsen): This does not work correctly for Packets of std::complex<double>.
|
||||
typedef typename unpacket_traits<Packet>::as_real RealPacket;
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename Scalar::value_type RealScalar;
|
||||
@@ -1516,7 +1540,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Pa
|
||||
// cis(y):
|
||||
RealPacket y = pand(odd_mask, a.v);
|
||||
y = por(y, pcplxflip(Packet(y)).v);
|
||||
RealPacket cisy = psincos_float<false, RealPacket, true>(y);
|
||||
RealPacket cisy = psincos_selector<false, RealPacket>(y);
|
||||
cisy = pcplxflip(Packet(cisy)).v; // cos(y) + i * sin(y)
|
||||
|
||||
const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
|
||||
|
||||
@@ -150,6 +150,10 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt_complex(const P
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Packet& x, const Packet& y);
|
||||
|
||||
/** \internal \returns x * y for complex types */
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pmul_complex(const Packet& x, const Packet& y);
|
||||
|
||||
template <typename Packet, int N>
|
||||
struct ppolevl;
|
||||
|
||||
|
||||
404
Eigen/src/Core/arch/clang/Complex.h
Normal file
404
Eigen/src/Core/arch/clang/Complex.h
Normal file
@@ -0,0 +1,404 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2025 Rasmus Munk Larsen
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_CLANG_H
|
||||
#define EIGEN_COMPLEX_CLANG_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
template <typename RealScalar, int N>
|
||||
struct complex_packet_wrapper {
|
||||
using RealPacketT = detail::VectorType<RealScalar, 2 * N>;
|
||||
EIGEN_STRONG_INLINE complex_packet_wrapper() = default;
|
||||
EIGEN_STRONG_INLINE explicit complex_packet_wrapper(const RealPacketT& a) : v(a) {}
|
||||
EIGEN_STRONG_INLINE constexpr std::complex<RealScalar> operator[](Index i) const {
|
||||
return std::complex<RealScalar>(v[2 * i], v[2 * i + 1]);
|
||||
}
|
||||
RealPacketT v;
|
||||
};
|
||||
|
||||
using Packet8cf = complex_packet_wrapper<float, 8>;
|
||||
using Packet4cf = complex_packet_wrapper<float, 4>;
|
||||
using Packet2cf = complex_packet_wrapper<float, 2>;
|
||||
using Packet4cd = complex_packet_wrapper<double, 4>;
|
||||
using Packet2cd = complex_packet_wrapper<double, 2>;
|
||||
|
||||
struct generic_complex_packet_traits : default_packet_traits {
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasArg = 0,
|
||||
HasSetLinear = 0,
|
||||
HasConj = 1,
|
||||
// Math functions
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<std::complex<float>> : generic_complex_packet_traits {
|
||||
using type = Packet8cf;
|
||||
using half = Packet8cf;
|
||||
enum {
|
||||
size = 8,
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet8cf> : generic_unpacket_traits {
|
||||
using type = std::complex<float>;
|
||||
using half = Packet8cf;
|
||||
using as_real = Packet16f;
|
||||
enum {
|
||||
size = 8,
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<std::complex<double>> : generic_complex_packet_traits {
|
||||
using type = Packet4cd;
|
||||
using half = Packet4cd;
|
||||
enum {
|
||||
size = 4,
|
||||
HasExp = 0, // FIXME(rmlarsen): pexp_complex is broken for double.
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet4cd> : generic_unpacket_traits {
|
||||
using type = std::complex<double>;
|
||||
using half = Packet4cd;
|
||||
using as_real = Packet8d;
|
||||
enum {
|
||||
size = 4,
|
||||
};
|
||||
};
|
||||
|
||||
// ------------ Load and store ops ----------
|
||||
#define EIGEN_CLANG_COMPLEX_LOAD_STORE(PACKET_TYPE) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE ploadu<PACKET_TYPE>(const unpacket_traits<PACKET_TYPE>::type* from) { \
|
||||
return PACKET_TYPE(ploadu<typename unpacket_traits<PACKET_TYPE>::as_real>(&numext::real_ref(*from))); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pload<PACKET_TYPE>(const unpacket_traits<PACKET_TYPE>::type* from) { \
|
||||
return PACKET_TYPE(pload<typename unpacket_traits<PACKET_TYPE>::as_real>(&numext::real_ref(*from))); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE void pstoreu<typename unpacket_traits<PACKET_TYPE>::type, PACKET_TYPE>( \
|
||||
typename unpacket_traits<PACKET_TYPE>::type * to, const PACKET_TYPE& from) { \
|
||||
pstoreu(&numext::real_ref(*to), from.v); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE void pstore<typename unpacket_traits<PACKET_TYPE>::type, PACKET_TYPE>( \
|
||||
typename unpacket_traits<PACKET_TYPE>::type * to, const PACKET_TYPE& from) { \
|
||||
pstore(&numext::real_ref(*to), from.v); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_COMPLEX_LOAD_STORE(Packet8cf);
|
||||
EIGEN_CLANG_COMPLEX_LOAD_STORE(Packet4cd);
|
||||
#undef EIGEN_CLANG_COMPLEX_LOAD_STORE
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from) {
|
||||
const float re = numext::real(from);
|
||||
const float im = numext::imag(from);
|
||||
return Packet8cf(Packet16f{re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im});
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pset1<Packet4cd>(const std::complex<double>& from) {
|
||||
const double re = numext::real(from);
|
||||
const double im = numext::imag(from);
|
||||
return Packet4cd(Packet8d{re, im, re, im, re, im, re, im});
|
||||
}
|
||||
|
||||
// ----------- Unary ops ------------------
|
||||
#define DELEGATE_UNARY_TO_REAL_OP(PACKET_TYPE, OP) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE OP<PACKET_TYPE>(const PACKET_TYPE& a) { \
|
||||
return PACKET_TYPE(OP(a.v)); \
|
||||
}
|
||||
|
||||
#define EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(PACKET_TYPE) \
|
||||
DELEGATE_UNARY_TO_REAL_OP(PACKET_TYPE, pnegate) \
|
||||
DELEGATE_UNARY_TO_REAL_OP(PACKET_TYPE, pzero) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE unpacket_traits<PACKET_TYPE>::type pfirst<PACKET_TYPE>(const PACKET_TYPE& a) { \
|
||||
return a[0]; \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pexp<PACKET_TYPE>(const PACKET_TYPE& a) { \
|
||||
return pexp_complex(a); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE plog<PACKET_TYPE>(const PACKET_TYPE& a) { \
|
||||
return plog_complex(a); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE psqrt<PACKET_TYPE>(const PACKET_TYPE& a) { \
|
||||
return psqrt_complex(a); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(Packet8cf);
|
||||
EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(Packet4cd);
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf pconj<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(__builtin_shufflevector(a.v, -a.v, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pconj<Packet4cf>(const Packet4cf& a) {
|
||||
return Packet4cf(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pconj<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pconj<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pconj<Packet2cd>(const Packet2cd& a) {
|
||||
return Packet2cd(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
|
||||
}
|
||||
|
||||
#undef DELEGATE_UNARY_TO_REAL_OP
|
||||
#undef EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS
|
||||
|
||||
// Flip real and imaginary parts, i.e. {re(a), im(a)} -> {im(a), re(a)}.
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf pcplxflip<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& a) {
|
||||
return Packet4cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& a) {
|
||||
return Packet2cd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
|
||||
}
|
||||
|
||||
// Copy real to imaginary part, i.e. {re(a), im(a)} -> {re(a), re(a)}.
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf pdupreal<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pdupreal<Packet4cf>(const Packet4cf& a) {
|
||||
return Packet4cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pdupreal<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pdupreal<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pdupreal<Packet2cd>(const Packet2cd& a) {
|
||||
return Packet2cd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
|
||||
}
|
||||
|
||||
// Copy imaginary to real part, i.e. {re(a), im(a)} -> {im(a), im(a)}.
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf pdupimag<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cf pdupimag<Packet4cf>(const Packet4cf& a) {
|
||||
return Packet4cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pdupimag<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd pdupimag<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cd pdupimag<Packet2cd>(const Packet2cd& a) {
|
||||
return Packet2cd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from) {
|
||||
return Packet8cf(Packet16f{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]),
|
||||
std::real(from[2]), std::imag(from[2]), std::real(from[2]), std::imag(from[2]),
|
||||
std::real(from[3]), std::imag(from[3]), std::real(from[3]), std::imag(from[3])});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd ploaddup<Packet4cd>(const std::complex<double>* from) {
|
||||
return Packet4cd(Packet8d{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])});
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf ploadquad<Packet8cf>(const std::complex<float>* from) {
|
||||
return Packet8cf(Packet16f{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]),
|
||||
std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])});
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd ploadquad<Packet4cd>(const std::complex<double>* from) {
|
||||
return pset1<Packet4cd>(*from);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf preverse<Packet8cf>(const Packet8cf& a) {
|
||||
return Packet8cf(reinterpret_cast<Packet16f>(preverse(reinterpret_cast<Packet8d>(a.v))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4cd preverse<Packet4cd>(const Packet4cd& a) {
|
||||
return Packet4cd(__builtin_shufflevector(a.v, a.v, 6, 7, 4, 5, 2, 3, 0, 1));
|
||||
}
|
||||
|
||||
// ----------- Binary ops ------------------
|
||||
#define DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, OP) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE OP<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
|
||||
return PACKET_TYPE(OP(a.v, b.v)); \
|
||||
}
|
||||
|
||||
#define EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(PACKET_TYPE) \
|
||||
DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, psub) \
|
||||
DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, pand) \
|
||||
DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, por) \
|
||||
DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, pxor) \
|
||||
DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, pandnot) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pdiv<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
|
||||
return pdiv_complex(a, b); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pcmp_eq<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
|
||||
const PACKET_TYPE t = PACKET_TYPE(pcmp_eq(a.v, b.v)); \
|
||||
return PACKET_TYPE(pand(pdupreal(t).v, pdupimag(t).v)); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(Packet8cf);
|
||||
EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(Packet4cd);
|
||||
|
||||
// Binary ops that are needed on sub-packets for predux and predux_mul.
|
||||
#define EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(PACKET_TYPE) \
|
||||
DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, padd) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pmul<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
|
||||
return pmul_complex(a, b); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet8cf);
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet4cf);
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cf);
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet4cd);
|
||||
EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cd);
|
||||
|
||||
#define EIGEN_CLANG_PACKET_SCATTER_GATHER(PACKET_TYPE) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE void pscatter(unpacket_traits<PACKET_TYPE>::type* to, const PACKET_TYPE& from, Index stride) { \
|
||||
constexpr int size = unpacket_traits<PACKET_TYPE>::size; \
|
||||
for (int i = 0; i < size; ++i) { \
|
||||
to[i * stride] = from[i]; \
|
||||
} \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pgather<typename unpacket_traits<PACKET_TYPE>::type, PACKET_TYPE>( \
|
||||
const unpacket_traits<PACKET_TYPE>::type* from, Index stride) { \
|
||||
constexpr int size = unpacket_traits<PACKET_TYPE>::size; \
|
||||
PACKET_TYPE result; \
|
||||
for (int i = 0; i < size; ++i) { \
|
||||
const unpacket_traits<PACKET_TYPE>::type from_i = from[i * stride]; \
|
||||
result.v[2 * i] = numext::real(from_i); \
|
||||
result.v[2 * i + 1] = numext::imag(from_i); \
|
||||
} \
|
||||
return result; \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8cf);
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet4cd);
|
||||
#undef EIGEN_CLANG_PACKET_SCATTER_GATHER
|
||||
|
||||
#undef DELEGATE_BINARY_TO_REAL_OP
|
||||
#undef EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS
|
||||
#undef EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS
|
||||
|
||||
// ------------ ternary ops -------------
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8cf pselect<Packet8cf>(const Packet8cf& mask, const Packet8cf& a, const Packet8cf& b) {
|
||||
return Packet8cf(reinterpret_cast<Packet16f>(
|
||||
pselect(reinterpret_cast<Packet8d>(mask.v), reinterpret_cast<Packet8d>(a.v), reinterpret_cast<Packet8d>(b.v))));
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<Packet8cf>(Packet8cf& p1, Packet8cf& p2) {
|
||||
Packet16f tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23);
|
||||
p2.v = __builtin_shufflevector(p1.v, p2.v, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31);
|
||||
p1.v = tmp;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_ALWAYS_INLINE void zip_in_place<Packet4cd>(Packet4cd& p1, Packet4cd& p2) {
|
||||
Packet8d tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 8, 9, 2, 3, 10, 11);
|
||||
p2.v = __builtin_shufflevector(p1.v, p2.v, 4, 5, 12, 13, 6, 7, 14, 15);
|
||||
p1.v = tmp;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 8>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 2>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4cd, 4>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4cd, 2>& kernel) {
|
||||
detail::ptranspose_impl(kernel);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_CLANG_H
|
||||
@@ -245,28 +245,30 @@ EIGEN_STRONG_INLINE void store_vector_aligned(scalar_type_of_vector_t<VectorT>*
|
||||
// --- Intrinsic-like specializations ---
|
||||
|
||||
// --- Load/Store operations ---
|
||||
#define EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PACKET_TYPE, SCALAR_TYPE) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE ploadu<PACKET_TYPE>(const SCALAR_TYPE* from) { \
|
||||
return detail::load_vector_unaligned<PACKET_TYPE>(from); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pload<PACKET_TYPE>(const SCALAR_TYPE* from) { \
|
||||
return detail::load_vector_aligned<PACKET_TYPE>(from); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE void pstoreu<SCALAR_TYPE, PACKET_TYPE>(SCALAR_TYPE * to, const PACKET_TYPE& from) { \
|
||||
detail::store_vector_unaligned<PACKET_TYPE>(to, from); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE void pstore<SCALAR_TYPE, PACKET_TYPE>(SCALAR_TYPE * to, const PACKET_TYPE& from) { \
|
||||
detail::store_vector_aligned<PACKET_TYPE>(to, from); \
|
||||
#define EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PACKET_TYPE) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE ploadu<PACKET_TYPE>(const detail::scalar_type_of_vector_t<PACKET_TYPE>* from) { \
|
||||
return detail::load_vector_unaligned<PACKET_TYPE>(from); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pload<PACKET_TYPE>(const detail::scalar_type_of_vector_t<PACKET_TYPE>* from) { \
|
||||
return detail::load_vector_aligned<PACKET_TYPE>(from); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE void pstoreu<detail::scalar_type_of_vector_t<PACKET_TYPE>, PACKET_TYPE>( \
|
||||
detail::scalar_type_of_vector_t<PACKET_TYPE> * to, const PACKET_TYPE& from) { \
|
||||
detail::store_vector_unaligned<PACKET_TYPE>(to, from); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE void pstore<detail::scalar_type_of_vector_t<PACKET_TYPE>, PACKET_TYPE>( \
|
||||
detail::scalar_type_of_vector_t<PACKET_TYPE> * to, const PACKET_TYPE& from) { \
|
||||
detail::store_vector_aligned<PACKET_TYPE>(to, from); \
|
||||
}
|
||||
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16f, float)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8d, double)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16i, int32_t)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8l, int64_t)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16f)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8d)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16i)
|
||||
EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8l)
|
||||
#undef EIGEN_CLANG_PACKET_LOAD_STORE_PACKET
|
||||
|
||||
// --- Broadcast operation ---
|
||||
@@ -329,6 +331,10 @@ EIGEN_STRONG_INLINE Packet8d pcast_long_to_double(const Packet8l& a) { return re
|
||||
|
||||
// Bitwise ops for integer packets
|
||||
#define EIGEN_CLANG_PACKET_BITWISE_INT(PACKET_TYPE) \
|
||||
template <> \
|
||||
constexpr EIGEN_STRONG_INLINE PACKET_TYPE pzero<PACKET_TYPE>(const PACKET_TYPE& /*unused*/) { \
|
||||
return PACKET_TYPE(0); \
|
||||
} \
|
||||
template <> \
|
||||
constexpr EIGEN_STRONG_INLINE PACKET_TYPE ptrue<PACKET_TYPE>(const PACKET_TYPE& /*unused*/) { \
|
||||
return PACKET_TYPE(0) == PACKET_TYPE(0); \
|
||||
@@ -370,8 +376,9 @@ EIGEN_CLANG_PACKET_BITWISE_INT(Packet8l)
|
||||
// Bitwise ops for floating point packets
|
||||
#define EIGEN_CLANG_PACKET_BITWISE_FLOAT(PACKET_TYPE, CAST_TO_INT, CAST_FROM_INT) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE ptrue<PACKET_TYPE>(const PACKET_TYPE& a) { \
|
||||
return CAST_FROM_INT(CAST_TO_INT(a) == CAST_TO_INT(a)); \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE ptrue<PACKET_TYPE>(const PACKET_TYPE& /* unused */) { \
|
||||
using Scalar = detail::scalar_type_of_vector_t<PACKET_TYPE>; \
|
||||
return CAST_FROM_INT(PACKET_TYPE(Scalar(0)) == PACKET_TYPE(Scalar(0))); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE PACKET_TYPE pand<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
|
||||
@@ -537,6 +544,7 @@ EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet16f)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8d)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet16i)
|
||||
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8l)
|
||||
|
||||
#undef EIGEN_CLANG_PACKET_SCATTER_GATHER
|
||||
|
||||
// ---- Various operations that depend on __builtin_shufflevector.
|
||||
@@ -653,6 +661,21 @@ EIGEN_STRONG_INLINE Packet8l plset<Packet8l>(const int64_t& a) {
|
||||
return Packet8l{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7};
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f peven_mask(const Packet16f& /* unused */) {
|
||||
float kTrue = numext::bit_cast<float>(int32_t(-1));
|
||||
float kFalse = 0.0f;
|
||||
return Packet16f{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse,
|
||||
kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d peven_mask(const Packet8d& /* unused */) {
|
||||
double kTrue = numext::bit_cast<double>(int64_t(-1l));
|
||||
double kFalse = 0.0;
|
||||
return Packet8d{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
|
||||
}
|
||||
|
||||
// Helpers for ptranspose.
|
||||
namespace detail {
|
||||
|
||||
|
||||
@@ -57,54 +57,100 @@ EIGEN_CLANG_PACKET_REDUX_INT(Packet8l)
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_shufflevector)
|
||||
namespace detail {
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE scalar_type_of_vector_t<VectorT> ReduceAdd16(const VectorT& a) {
|
||||
auto t1 = __builtin_shufflevector(a, a, 0, 2, 4, 6, 8, 10, 12, 14) +
|
||||
__builtin_shufflevector(a, a, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
auto t2 = __builtin_shufflevector(t1, t1, 0, 2, 4, 6) + __builtin_shufflevector(t1, t1, 1, 3, 5, 7);
|
||||
auto t3 = __builtin_shufflevector(t2, t2, 0, 2) + __builtin_shufflevector(t2, t2, 1, 3);
|
||||
return t3[0] + t3[1];
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd16(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3, 4, 5, 6, 7) +
|
||||
__builtin_shufflevector(a, a, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
const auto t2 = __builtin_shufflevector(t1, t1, 0, 1, 2, 3) + __builtin_shufflevector(t1, t1, 4, 5, 6, 7);
|
||||
const auto t3 = __builtin_shufflevector(t2, t2, 0, 1) + __builtin_shufflevector(t2, t2, 2, 3);
|
||||
return {t3[0], t3[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE scalar_type_of_vector_t<VectorT> ReduceAdd8(const VectorT& a) {
|
||||
auto t1 = __builtin_shufflevector(a, a, 0, 2, 4, 6) + __builtin_shufflevector(a, a, 1, 3, 5, 7);
|
||||
auto t2 = __builtin_shufflevector(t1, t1, 0, 2) + __builtin_shufflevector(t1, t1, 1, 3);
|
||||
return t2[0] + t2[1];
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd8(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) + __builtin_shufflevector(a, a, 4, 5, 6, 7);
|
||||
const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) + __builtin_shufflevector(t1, t1, 2, 3);
|
||||
return {t2[0], t2[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE scalar_type_of_vector_t<VectorT> ReduceMul16(const VectorT& a) {
|
||||
auto t1 = __builtin_shufflevector(a, a, 0, 2, 4, 6, 8, 10, 12, 14) *
|
||||
__builtin_shufflevector(a, a, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
auto t2 = __builtin_shufflevector(t1, t1, 0, 2, 4, 6) * __builtin_shufflevector(t1, t1, 1, 3, 5, 7);
|
||||
auto t3 = __builtin_shufflevector(t2, t2, 0, 2) * __builtin_shufflevector(t2, t2, 1, 3);
|
||||
return t3[0] * t3[1];
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceMul16(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3, 4, 5, 6, 7) *
|
||||
__builtin_shufflevector(a, a, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
const auto t2 = __builtin_shufflevector(t1, t1, 0, 1, 2, 3) * __builtin_shufflevector(t1, t1, 4, 5, 6, 7);
|
||||
const auto t3 = __builtin_shufflevector(t2, t2, 0, 1) * __builtin_shufflevector(t2, t2, 2, 3);
|
||||
return {t3[0], t3[1]};
|
||||
}
|
||||
|
||||
template <typename VectorT>
|
||||
EIGEN_STRONG_INLINE scalar_type_of_vector_t<VectorT> ReduceMul8(const VectorT& a) {
|
||||
auto t1 = __builtin_shufflevector(a, a, 0, 2, 4, 6) * __builtin_shufflevector(a, a, 1, 3, 5, 7);
|
||||
auto t2 = __builtin_shufflevector(t1, t1, 0, 2) * __builtin_shufflevector(t1, t1, 1, 3);
|
||||
return t2[0] * t2[1];
|
||||
EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceMul8(
|
||||
const VectorT& a) {
|
||||
const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) * __builtin_shufflevector(a, a, 4, 5, 6, 7);
|
||||
const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) * __builtin_shufflevector(t1, t1, 2, 3);
|
||||
return {t2[0], t2[1]};
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
|
||||
return detail::ReduceAdd16(a);
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceAdd16(a);
|
||||
return even + odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
|
||||
return detail::ReduceAdd8(a);
|
||||
double even, odd;
|
||||
std::tie(even, odd) = detail::ReduceAdd8(a);
|
||||
return even + odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
|
||||
return detail::ReduceMul16(a);
|
||||
float even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul16(a);
|
||||
return even * odd;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
|
||||
return detail::ReduceMul8(a);
|
||||
double even, odd;
|
||||
std::tie(even, odd) = detail::ReduceMul8(a);
|
||||
return even * odd;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux<Packet8cf>(const Packet8cf& a) {
|
||||
float re, im;
|
||||
std::tie(re, im) = detail::ReduceAdd16(a.v);
|
||||
return std::complex<float>(re, im);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux<Packet4cd>(const Packet4cd& a) {
|
||||
double re, im;
|
||||
std::tie(re, im) = detail::ReduceAdd8(a.v);
|
||||
return std::complex<double>(re, im);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet8cf>(const Packet8cf& a) {
|
||||
const Packet4cf lower4 = Packet4cf(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3, 4, 5, 6, 7));
|
||||
const Packet4cf upper4 = Packet4cf(__builtin_shufflevector(a.v, a.v, 8, 9, 10, 11, 12, 13, 14, 15));
|
||||
const Packet4cf prod4 = pmul<Packet4cf>(lower4, upper4);
|
||||
const Packet2cf lower2 = Packet2cf(__builtin_shufflevector(prod4.v, prod4.v, 0, 1, 2, 3));
|
||||
const Packet2cf upper2 = Packet2cf(__builtin_shufflevector(prod4.v, prod4.v, 4, 5, 6, 7));
|
||||
const Packet2cf prod2 = pmul<Packet2cf>(lower2, upper2);
|
||||
return prod2[0] * prod2[1];
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const Packet4cd& a) {
|
||||
const Packet2cd lower2 = Packet2cd(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3));
|
||||
const Packet2cd upper2 = Packet2cd(__builtin_shufflevector(a.v, a.v, 4, 5, 6, 7));
|
||||
const Packet2cd prod2 = pmul<Packet2cd>(lower2, upper2);
|
||||
return prod2[0] * prod2[1];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
Reference in New Issue
Block a user