0
0
mirror of https://gitlab.com/libeigen/eigen.git synced 2026-01-18 17:31:19 +01:00

set EIGEN_RISCV64_DEFAULT_LMUL to 1.

This commit is contained in:
Chip Kerchner
2025-11-07 14:48:42 +00:00
parent ce896ac170
commit 9ee3d62060
4 changed files with 5 additions and 1217 deletions

View File

@@ -1,725 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2025 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_COMPLEX_RVV10_H
#define EIGEN_COMPLEX_RVV10_H
// IWYU pragma: private
#include "../../InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
/********************************* float32 ************************************/
struct PacketXcf {
EIGEN_STRONG_INLINE PacketXcf() {}
EIGEN_STRONG_INLINE explicit PacketXcf(const PacketMul1Xf& _real, const PacketMul1Xf& _imag) : real(_real), imag(_imag) {}
EIGEN_STRONG_INLINE explicit PacketXcf(const PacketMul2Xf& a)
: real(__riscv_vget_v_f32m2_f32m1(a, 0)), imag(__riscv_vget_v_f32m2_f32m1(a, 1)) {}
PacketMul1Xf real;
PacketMul1Xf imag;
};
template <>
struct packet_traits<std::complex<float>> : default_packet_traits {
typedef PacketXcf type;
typedef PacketXcf half;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 1>::size,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasSqrt = 1,
HasSign = 0,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasLog = 0,
HasSetLinear = 0
};
};
template <>
struct unpacket_traits<PacketXcf> {
typedef std::complex<float> type;
typedef PacketXcf half;
typedef PacketMul2Xf as_real;
enum {
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 1>::size,
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
vectorizable = true,
masked_load_available = false,
masked_store_available = false
};
};
template <>
EIGEN_STRONG_INLINE PacketXcf pcast<PacketMul2Xf, PacketXcf>(const PacketMul2Xf& a) {
return PacketXcf(a);
}
template <>
EIGEN_STRONG_INLINE PacketMul2Xf pcast<PacketXcf, PacketMul2Xf>(const PacketXcf& a) {
return __riscv_vcreate_v_f32m1_f32m2(a.real, a.imag);
}
template <>
EIGEN_STRONG_INLINE PacketXcf pset1<PacketXcf>(const std::complex<float>& from) {
PacketMul1Xf real = pset1<PacketMul1Xf>(from.real());
PacketMul1Xf imag = pset1<PacketMul1Xf>(from.imag());
return PacketXcf(real, imag);
}
template <>
EIGEN_STRONG_INLINE PacketXcf padd<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
return PacketXcf(padd<PacketMul1Xf>(a.real, b.real), padd<PacketMul1Xf>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf psub<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
return PacketXcf(psub<PacketMul1Xf>(a.real, b.real), psub<PacketMul1Xf>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pnegate(const PacketXcf& a) {
return PacketXcf(pnegate<PacketMul1Xf>(a.real), pnegate<PacketMul1Xf>(a.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pconj(const PacketXcf& a) {
return PacketXcf(
a.real, __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vxor_vx_u32m1(__riscv_vreinterpret_v_f32m1_u32m1(a.imag),
0x80000000, unpacket_traits<PacketMul1Xf>::size)));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pmul<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
PacketMul1Xf v1 = pmul<PacketMul1Xf>(a.real, b.real);
PacketMul1Xf v2 = pmul<PacketMul1Xf>(a.imag, b.imag);
PacketMul1Xf v3 = pmul<PacketMul1Xf>(a.real, b.imag);
PacketMul1Xf v4 = pmul<PacketMul1Xf>(a.imag, b.real);
return PacketXcf(psub<PacketMul1Xf>(v1, v2), padd<PacketMul1Xf>(v3, v4));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pmadd<PacketXcf>(const PacketXcf& a, const PacketXcf& b, const PacketXcf& c) {
PacketMul1Xf v1 = pmadd<PacketMul1Xf>(a.real, b.real, c.real);
PacketMul1Xf v2 = pmul<PacketMul1Xf>(a.imag, b.imag);
PacketMul1Xf v3 = pmadd<PacketMul1Xf>(a.real, b.imag, c.imag);
PacketMul1Xf v4 = pmul<PacketMul1Xf>(a.imag, b.real);
return PacketXcf(psub<PacketMul1Xf>(v1, v2), padd<PacketMul1Xf>(v3, v4));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pcmp_eq(const PacketXcf& a, const PacketXcf& b) {
PacketMask32 eq_both = pand<PacketMask32>(pcmp_eq_mask(a.real, b.real), pcmp_eq_mask(a.imag, b.imag));
PacketMul1Xf res = pselect(eq_both, ptrue<PacketMul1Xf>(a.real), pzero<PacketMul1Xf>(a.real));
return PacketXcf(res, res);
}
template <>
EIGEN_STRONG_INLINE PacketXcf pand<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
return PacketXcf(pand<PacketMul1Xf>(a.real, b.real), pand<PacketMul1Xf>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf por<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
return PacketXcf(por<PacketMul1Xf>(a.real, b.real), por<PacketMul1Xf>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pxor<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
return PacketXcf(pxor<PacketMul1Xf>(a.real, b.real), pxor<PacketMul1Xf>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pandnot<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
return PacketXcf(pandnot<PacketMul1Xf>(a.real, b.real), pandnot<PacketMul1Xf>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pload<PacketXcf>(const std::complex<float>* from) {
vfloat32m1x2_t res = __riscv_vlseg2e32_v_f32m1x2((const float*)from, unpacket_traits<PacketMul1Xf>::size);
EIGEN_DEBUG_ALIGNED_LOAD return PacketXcf(__riscv_vget_v_f32m1x2_f32m1(res, 0), __riscv_vget_v_f32m1x2_f32m1(res, 1));
}
template <>
EIGEN_STRONG_INLINE PacketXcf ploadu<PacketXcf>(const std::complex<float>* from) {
vfloat32m1x2_t res = __riscv_vlseg2e32_v_f32m1x2((const float*)from, unpacket_traits<PacketMul1Xf>::size);
EIGEN_DEBUG_UNALIGNED_LOAD return PacketXcf(__riscv_vget_v_f32m1x2_f32m1(res, 0),
__riscv_vget_v_f32m1x2_f32m1(res, 1));
}
template <>
EIGEN_STRONG_INLINE PacketXcf ploaddup<PacketXcf>(const std::complex<float>* from) {
PacketMul1Xu real_idx = __riscv_vid_v_u32m1(unpacket_traits<PacketMul1Xf>::size);
real_idx = __riscv_vsll_vx_u32m1(__riscv_vand_vx_u32m1(real_idx, 0xfffffffeu, unpacket_traits<PacketMul1Xf>::size), 2,
unpacket_traits<PacketMul1Xf>::size);
PacketMul1Xu imag_idx = __riscv_vadd_vx_u32m1(real_idx, sizeof(float), unpacket_traits<PacketMul1Xf>::size);
// real_idx = 0 0 2*sizeof(float) 2*sizeof(float) 4*sizeof(float) 4*sizeof(float) ...
return PacketXcf(__riscv_vloxei32_v_f32m1((const float*)from, real_idx, unpacket_traits<PacketMul1Xf>::size),
__riscv_vloxei32_v_f32m1((const float*)from, imag_idx, unpacket_traits<PacketMul1Xf>::size));
}
template <>
EIGEN_STRONG_INLINE PacketXcf ploadquad<PacketXcf>(const std::complex<float>* from) {
PacketMul1Xu real_idx = __riscv_vid_v_u32m1(unpacket_traits<PacketMul1Xf>::size);
real_idx = __riscv_vsll_vx_u32m1(__riscv_vand_vx_u32m1(real_idx, 0xfffffffcu, unpacket_traits<PacketMul1Xf>::size), 1,
unpacket_traits<PacketMul1Xf>::size);
PacketMul1Xu imag_idx = __riscv_vadd_vx_u32m1(real_idx, sizeof(float), unpacket_traits<PacketMul1Xf>::size);
// real_idx = 0 0 2*sizeof(float) 2*sizeof(float) 4*sizeof(float) 4*sizeof(float) ...
return PacketXcf(__riscv_vloxei32_v_f32m1((const float*)from, real_idx, unpacket_traits<PacketMul1Xf>::size),
__riscv_vloxei32_v_f32m1((const float*)from, imag_idx, unpacket_traits<PacketMul1Xf>::size));
}
template <>
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const PacketXcf& from) {
vfloat32m1x2_t vx2 = __riscv_vundefined_f32m1x2();
vx2 = __riscv_vset_v_f32m1_f32m1x2(vx2, 0, from.real);
vx2 = __riscv_vset_v_f32m1_f32m1x2(vx2, 1, from.imag);
EIGEN_DEBUG_ALIGNED_STORE __riscv_vsseg2e32_v_f32m1x2((float*)to, vx2, unpacket_traits<PacketXcf>::size);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const PacketXcf& from) {
vfloat32m1x2_t vx2 = __riscv_vundefined_f32m1x2();
vx2 = __riscv_vset_v_f32m1_f32m1x2(vx2, 0, from.real);
vx2 = __riscv_vset_v_f32m1_f32m1x2(vx2, 1, from.imag);
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vsseg2e32_v_f32m1x2((float*)to, vx2, unpacket_traits<PacketMul1Xf>::size);
}
template <>
EIGEN_DEVICE_FUNC inline PacketXcf pgather<std::complex<float>, PacketXcf>(const std::complex<float>* from,
Index stride) {
vfloat32m1x2_t res =
__riscv_vlsseg2e32_v_f32m1x2((const float*)from, 2 * stride * sizeof(float), unpacket_traits<PacketMul1Xf>::size);
return PacketXcf(__riscv_vget_v_f32m1x2_f32m1(res, 0), __riscv_vget_v_f32m1x2_f32m1(res, 1));
}
template <>
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, PacketXcf>(std::complex<float>* to, const PacketXcf& from,
Index stride) {
vfloat32m1x2_t from_rvv_type = __riscv_vundefined_f32m1x2();
from_rvv_type = __riscv_vset_v_f32m1_f32m1x2(from_rvv_type, 0, from.real);
from_rvv_type = __riscv_vset_v_f32m1_f32m1x2(from_rvv_type, 1, from.imag);
__riscv_vssseg2e32_v_f32m1x2((float*)to, 2 * stride * sizeof(float), from_rvv_type, unpacket_traits<PacketMul1Xf>::size);
}
template <>
EIGEN_STRONG_INLINE std::complex<float> pfirst<PacketXcf>(const PacketXcf& a) {
return std::complex<float>(pfirst<PacketMul1Xf>(a.real), pfirst<PacketMul1Xf>(a.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf preverse(const PacketXcf& a) {
return PacketXcf(preverse<PacketMul1Xf>(a.real), preverse<PacketMul1Xf>(a.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pcplxflip<PacketXcf>(const PacketXcf& a) {
return PacketXcf(a.imag, a.real);
}
template <>
EIGEN_STRONG_INLINE std::complex<float> predux<PacketXcf>(const PacketXcf& a) {
return std::complex<float>(predux<PacketMul1Xf>(a.real), predux<PacketMul1Xf>(a.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcf pdiv<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
PacketXcf b_conj = pconj<PacketXcf>(b);
PacketXcf dividend = pmul<PacketXcf>(a, b_conj);
PacketMul1Xf divider = psub<PacketMul1Xf>(pmul<PacketMul1Xf>(b.real, b_conj.real), pmul<PacketMul1Xf>(b.imag, b_conj.imag));
return PacketXcf(pdiv<PacketMul1Xf>(dividend.real, divider), pdiv<PacketMul1Xf>(dividend.imag, divider));
}
template <int N>
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXcf, N>& kernel) {
float buffer_real[unpacket_traits<PacketMul1Xf>::size * N];
float buffer_imag[unpacket_traits<PacketMul1Xf>::size * N];
int i = 0;
for (i = 0; i < N; i++) {
__riscv_vsse32(&buffer_real[i], N * sizeof(float), kernel.packet[i].real, unpacket_traits<PacketMul1Xf>::size);
__riscv_vsse32(&buffer_imag[i], N * sizeof(float), kernel.packet[i].imag, unpacket_traits<PacketMul1Xf>::size);
}
for (i = 0; i < N; i++) {
kernel.packet[i].real =
__riscv_vle32_v_f32m1(&buffer_real[i * unpacket_traits<PacketMul1Xf>::size], unpacket_traits<PacketMul1Xf>::size);
kernel.packet[i].imag =
__riscv_vle32_v_f32m1(&buffer_imag[i * unpacket_traits<PacketMul1Xf>::size], unpacket_traits<PacketMul1Xf>::size);
}
}
template <typename Packet>
EIGEN_STRONG_INLINE Packet psqrt_complex_rvv(const Packet& a) {
typedef typename unpacket_traits<Packet>::type Scalar;
typedef typename Scalar::value_type RealScalar;
typedef typename packet_traits<RealScalar>::type RealPacket;
typedef typename unpacket_traits<RealPacket>::packet_mask PacketMask;
// Computes the principal sqrt of the complex numbers in the input.
//
// For example, for packets containing 2 complex numbers stored in
// [real0, real1, imag0, imag1] format
// a = [a0, a1] = [x0, x1, y0, y1],
// where x0 = real(a0), y0 = imag(a0) etc., this function returns
// b = [b0, b1] = [u0, u1, v0, v1],
// such that b0^2 = a0, b1^2 = a1.
//
// To derive the formula for the complex square roots, let's consider the equation for
// a single complex square root of the number x + i*y. We want to find real numbers
// u and v such that
// (u + i*v)^2 = x + i*y <=>
// u^2 - v^2 + i*2*u*v = x + i*v.
// By equating the real and imaginary parts we get:
// u^2 - v^2 = x
// 2*u*v = y.
//
// For x >= 0, this has the numerically stable solution
// u = sqrt(0.5 * (x + sqrt(x^2 + y^2)))
// v = 0.5 * (y / u)
// and for x < 0,
// v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))
// u = 0.5 * (y / v)
//
// To avoid unnecessary over- and underflow, we compute sqrt(x^2 + y^2) as
// l = max(|x|, |y|) * sqrt(1 + (min(|x|, |y|) / max(|x|, |y|))^2) ,
// In the following, without lack of generality, we have annotated the code, assuming
// that the input is a packet of 2 complex numbers.
//
// Step 1. Compute l = [l0, l1], where
// l0 = sqrt(x0^2 + y0^2), l1 = sqrt(x1^2 + y1^2)
// To avoid over- and underflow, we use the stable formula for each hypotenuse
// l0 = (min0 == 0 ? max0 : max0 * sqrt(1 + (min0/max0)**2)),
// where max0 = max(|x0|, |y0|), min0 = min(|x0|, |y0|), and similarly for l1.
Packet a_abs = Packet(pabs(a.real), pabs(a.imag));
RealPacket a_max = pmax(a_abs.real, a_abs.imag);
RealPacket a_min = pmin(a_abs.real, a_abs.imag);
PacketMask a_min_zero_mask = pcmp_eq_mask(a_min, pzero(a_min));
PacketMask a_max_zero_mask = pcmp_eq_mask(a_max, pzero(a_max));
RealPacket r = pdiv(a_min, a_max);
const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
const RealPacket cst_true = ptrue<RealPacket>(cst_one);
RealPacket l = pmul(a_max, psqrt(padd(cst_one, pmul(r, r))));
// Set l to a_max if a_min is zero.
l = pselect(a_min_zero_mask, a_max, l);
// Step 2. Compute [rho0, rho1], where
// rho0 = sqrt(0.5 * (l0 + |x0|)), rho1 = sqrt(0.5 * (l1 + |x1|))
// We don't care about the imaginary parts computed here. They will be overwritten later.
const RealPacket cst_half = pset1<RealPacket>(RealScalar(0.5));
RealPacket rho = psqrt(pmul(cst_half, padd(a_abs.real, l)));
// Step 3. Compute [rho0, rho1, eta0, eta1], where
// eta0 = (y0 / rho0) / 2, and eta1 = (y1 / rho1) / 2.
// set eta = 0 of input is 0 + i0.
RealPacket eta = pselect(a_max_zero_mask, pzero<RealPacket>(cst_one), pmul(cst_half, pdiv(a.imag, rho)));
// Compute result for inputs with positive real part.
Packet positive_real_result = Packet(rho, eta);
// Step 4. Compute solution for inputs with negative real part:
// [|eta0| |eta1|, sign(y0)*rho0, sign(y1)*rho1]
const RealPacket cst_imag_sign_mask = pset1<RealPacket>(RealScalar(-0.0));
RealPacket imag_signs = pand(a.imag, cst_imag_sign_mask);
Packet negative_real_result = Packet(pabs(eta), por(rho, imag_signs));
// Step 5. Select solution branch based on the sign of the real parts.
PacketMask negative_real_mask_half = pcmp_lt_mask(a.real, pzero(a.real));
Packet result = Packet(pselect(negative_real_mask_half, negative_real_result.real, positive_real_result.real),
pselect(negative_real_mask_half, negative_real_result.imag, positive_real_result.imag));
// Step 6. Handle special cases for infinities:
// * If z is (x,+∞), the result is (+∞,+∞) even if x is NaN
// * If z is (x,-∞), the result is (+∞,-∞) even if x is NaN
// * If z is (-∞,y), the result is (0*|y|,+∞) for finite or NaN y
// * If z is (+∞,y), the result is (+∞,0*|y|) for finite or NaN y
const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
PacketMask is_real_inf = pcmp_eq_mask(a_abs.real, cst_pos_inf);
// prepare packet of (+∞,0*|y|) or (0*|y|,+∞), depending on the sign of the infinite real part.
const Packet cst_one_zero = pset1<Packet>(Scalar(RealScalar(1.0), RealScalar(0.0)));
Packet real_inf_result = Packet(pmul(a_abs.real, cst_one_zero.real), pmul(a_abs.imag, cst_one_zero.imag));
real_inf_result = Packet(pselect(negative_real_mask_half, real_inf_result.imag, real_inf_result.real),
pselect(negative_real_mask_half, real_inf_result.real, real_inf_result.imag));
// prepare packet of (+∞,+∞) or (+∞,-∞), depending on the sign of the infinite imaginary part.
PacketMask is_imag_inf = pcmp_eq_mask(a_abs.imag, cst_pos_inf);
// unless otherwise specified, if either the real or imaginary component is nan, the entire result is nan
result = Packet(pselect(pcmp_eq_mask(result.real, result.real), result.real, cst_true),
pselect(pcmp_eq_mask(result.imag, result.imag), result.imag, cst_true));
result = Packet(pselect(is_real_inf, real_inf_result.real, result.real),
pselect(is_real_inf, real_inf_result.imag, result.imag));
return Packet(pselect(is_imag_inf, cst_pos_inf, result.real), pselect(is_imag_inf, a.imag, result.imag));
}
template <typename Packet>
EIGEN_STRONG_INLINE Packet plog_complex_rvv(const Packet& x) {
typedef typename unpacket_traits<Packet>::type Scalar;
typedef typename Scalar::value_type RealScalar;
typedef typename packet_traits<RealScalar>::type RealPacket;
typedef typename unpacket_traits<RealPacket>::packet_mask PacketMask;
// log(sqrt(a^2 + b^2)), atan2(b, a)
RealPacket xlogr = plog(psqrt(padd(pmul<RealPacket>(x.real, x.real), pmul<RealPacket>(x.imag, x.imag))));
RealPacket ximg = patan2(x.imag, x.real);
const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
RealPacket r_abs = pabs(x.real);
RealPacket i_abs = pabs(x.imag);
PacketMask is_r_pos_inf = pcmp_eq_mask(r_abs, cst_pos_inf);
PacketMask is_i_pos_inf = pcmp_eq_mask(i_abs, cst_pos_inf);
PacketMask is_any_inf = por(is_r_pos_inf, is_i_pos_inf);
RealPacket xreal = pselect(is_any_inf, cst_pos_inf, xlogr);
return Packet(xreal, ximg);
}
template <>
EIGEN_STRONG_INLINE PacketXcf psqrt<PacketXcf>(const PacketXcf& a) {
return psqrt_complex_rvv<PacketXcf>(a);
}
template <>
EIGEN_STRONG_INLINE PacketXcf plog<PacketXcf>(const PacketXcf& a) {
return plog_complex_rvv<PacketXcf>(a);
}
template <>
struct conj_helper<PacketMul2Xf, PacketXcf, false, false> {
EIGEN_STRONG_INLINE PacketXcf pmadd(const PacketMul2Xf& x, const PacketXcf& y, const PacketXcf& c) const {
return padd(c, this->pmul(x, y));
}
EIGEN_STRONG_INLINE PacketXcf pmsub(const PacketMul2Xf& x, const PacketXcf& y, const PacketXcf& c) const {
return psub(this->pmul(x, y), c);
}
EIGEN_STRONG_INLINE PacketXcf pmul(const PacketMul2Xf& x, const PacketXcf& y) const {
return PacketXcf(Eigen::internal::pmul<PacketMul2Xf>(x, pcast<PacketXcf, PacketMul2Xf>(y)));
}
};
template <>
struct conj_helper<PacketXcf, PacketMul2Xf, false, false> {
EIGEN_STRONG_INLINE PacketXcf pmadd(const PacketXcf& x, const PacketMul2Xf& y, const PacketXcf& c) const {
return padd(c, this->pmul(x, y));
}
EIGEN_STRONG_INLINE PacketXcf pmsub(const PacketXcf& x, const PacketMul2Xf& y, const PacketXcf& c) const {
return psub(this->pmul(x, y), c);
}
EIGEN_STRONG_INLINE PacketXcf pmul(const PacketXcf& x, const PacketMul2Xf& y) const {
return PacketXcf(Eigen::internal::pmul<PacketMul2Xf>(pcast<PacketXcf, PacketMul2Xf>(x), y));
}
};
/********************************* double ************************************/
struct PacketXcd {
EIGEN_STRONG_INLINE PacketXcd() {}
EIGEN_STRONG_INLINE explicit PacketXcd(const PacketMul1Xd& _real, const PacketMul1Xd& _imag) : real(_real), imag(_imag) {}
EIGEN_STRONG_INLINE explicit PacketXcd(const PacketMul2Xd& a)
: real(__riscv_vget_v_f64m2_f64m1(a, 0)), imag(__riscv_vget_v_f64m2_f64m1(a, 1)) {}
PacketMul1Xd real;
PacketMul1Xd imag;
};
template <>
struct packet_traits<std::complex<double>> : default_packet_traits {
typedef PacketXcd type;
typedef PacketXcd half;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 1>::size,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasSqrt = 1,
HasSign = 0,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasLog = 0,
HasSetLinear = 0
};
};
template <>
struct unpacket_traits<PacketXcd> {
typedef std::complex<double> type;
typedef PacketXcd half;
typedef PacketMul2Xd as_real;
enum {
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 1>::size,
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
vectorizable = true,
masked_load_available = false,
masked_store_available = false
};
};
template <>
EIGEN_STRONG_INLINE PacketXcd pcast<PacketMul2Xd, PacketXcd>(const PacketMul2Xd& a) {
return PacketXcd(a);
}
template <>
EIGEN_STRONG_INLINE PacketMul2Xd pcast<PacketXcd, PacketMul2Xd>(const PacketXcd& a) {
return __riscv_vcreate_v_f64m1_f64m2(a.real, a.imag);
}
template <>
EIGEN_STRONG_INLINE PacketXcd pset1<PacketXcd>(const std::complex<double>& from) {
PacketMul1Xd real = pset1<PacketMul1Xd>(from.real());
PacketMul1Xd imag = pset1<PacketMul1Xd>(from.imag());
return PacketXcd(real, imag);
}
template <>
EIGEN_STRONG_INLINE PacketXcd padd<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
return PacketXcd(padd<PacketMul1Xd>(a.real, b.real), padd<PacketMul1Xd>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd psub<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
return PacketXcd(psub<PacketMul1Xd>(a.real, b.real), psub<PacketMul1Xd>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pnegate(const PacketXcd& a) {
return PacketXcd(pnegate<PacketMul1Xd>(a.real), pnegate<PacketMul1Xd>(a.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pconj(const PacketXcd& a) {
return PacketXcd(
a.real, __riscv_vreinterpret_v_u64m1_f64m1(__riscv_vxor_vx_u64m1(
__riscv_vreinterpret_v_f64m1_u64m1(a.imag), 0x8000000000000000, unpacket_traits<PacketMul1Xd>::size)));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pmul<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
PacketMul1Xd v1 = pmul<PacketMul1Xd>(a.real, b.real);
PacketMul1Xd v2 = pmul<PacketMul1Xd>(a.imag, b.imag);
PacketMul1Xd v3 = pmul<PacketMul1Xd>(a.real, b.imag);
PacketMul1Xd v4 = pmul<PacketMul1Xd>(a.imag, b.real);
return PacketXcd(psub<PacketMul1Xd>(v1, v2), padd<PacketMul1Xd>(v3, v4));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pmadd<PacketXcd>(const PacketXcd& a, const PacketXcd& b, const PacketXcd& c) {
PacketMul1Xd v1 = pmadd<PacketMul1Xd>(a.real, b.real, c.real);
PacketMul1Xd v2 = pmul<PacketMul1Xd>(a.imag, b.imag);
PacketMul1Xd v3 = pmadd<PacketMul1Xd>(a.real, b.imag, c.imag);
PacketMul1Xd v4 = pmul<PacketMul1Xd>(a.imag, b.real);
return PacketXcd(psub<PacketMul1Xd>(v1, v2), padd<PacketMul1Xd>(v3, v4));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pcmp_eq(const PacketXcd& a, const PacketXcd& b) {
PacketMask64 eq_both = pand<PacketMask64>(pcmp_eq_mask(a.real, b.real), pcmp_eq_mask(a.imag, b.imag));
PacketMul1Xd res = pselect(eq_both, ptrue<PacketMul1Xd>(a.real), pzero<PacketMul1Xd>(a.real));
return PacketXcd(res, res);
}
template <>
EIGEN_STRONG_INLINE PacketXcd pand<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
return PacketXcd(pand<PacketMul1Xd>(a.real, b.real), pand<PacketMul1Xd>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd por<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
return PacketXcd(por<PacketMul1Xd>(a.real, b.real), por<PacketMul1Xd>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pxor<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
return PacketXcd(pxor<PacketMul1Xd>(a.real, b.real), pxor<PacketMul1Xd>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pandnot<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
return PacketXcd(pandnot<PacketMul1Xd>(a.real, b.real), pandnot<PacketMul1Xd>(a.imag, b.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pload<PacketXcd>(const std::complex<double>* from) {
vfloat64m1x2_t res = __riscv_vlseg2e64_v_f64m1x2((const double*)from, unpacket_traits<PacketMul1Xd>::size);
EIGEN_DEBUG_ALIGNED_LOAD return PacketXcd(__riscv_vget_v_f64m1x2_f64m1(res, 0), __riscv_vget_v_f64m1x2_f64m1(res, 1));
}
template <>
EIGEN_STRONG_INLINE PacketXcd ploadu<PacketXcd>(const std::complex<double>* from) {
vfloat64m1x2_t res = __riscv_vlseg2e64_v_f64m1x2((const double*)from, unpacket_traits<PacketMul1Xd>::size);
EIGEN_DEBUG_UNALIGNED_LOAD return PacketXcd(__riscv_vget_v_f64m1x2_f64m1(res, 0),
__riscv_vget_v_f64m1x2_f64m1(res, 1));
}
template <>
EIGEN_STRONG_INLINE PacketXcd ploaddup<PacketXcd>(const std::complex<double>* from) {
PacketMul1Xul real_idx = __riscv_vid_v_u64m1(unpacket_traits<PacketMul1Xd>::size);
real_idx =
__riscv_vsll_vx_u64m1(__riscv_vand_vx_u64m1(real_idx, 0xfffffffffffffffeu, unpacket_traits<PacketMul1Xd>::size), 3,
unpacket_traits<PacketMul1Xd>::size);
PacketMul1Xul imag_idx = __riscv_vadd_vx_u64m1(real_idx, sizeof(double), unpacket_traits<PacketMul1Xd>::size);
// real_idx = 0 0 2*sizeof(double) 2*sizeof(double) 4*sizeof(double) 4*sizeof(double) ...
return PacketXcd(__riscv_vloxei64_v_f64m1((const double*)from, real_idx, unpacket_traits<PacketMul1Xd>::size),
__riscv_vloxei64_v_f64m1((const double*)from, imag_idx, unpacket_traits<PacketMul1Xd>::size));
}
template <>
EIGEN_STRONG_INLINE PacketXcd ploadquad<PacketXcd>(const std::complex<double>* from) {
PacketMul1Xul real_idx = __riscv_vid_v_u64m1(unpacket_traits<PacketMul1Xd>::size);
real_idx =
__riscv_vsll_vx_u64m1(__riscv_vand_vx_u64m1(real_idx, 0xfffffffffffffffcu, unpacket_traits<PacketMul1Xd>::size), 2,
unpacket_traits<PacketMul1Xd>::size);
PacketMul1Xul imag_idx = __riscv_vadd_vx_u64m1(real_idx, sizeof(double), unpacket_traits<PacketMul1Xd>::size);
// real_idx = 0 0 2*sizeof(double) 2*sizeof(double) 4*sizeof(double) 4*sizeof(double) ...
return PacketXcd(__riscv_vloxei64_v_f64m1((const double*)from, real_idx, unpacket_traits<PacketMul1Xd>::size),
__riscv_vloxei64_v_f64m1((const double*)from, imag_idx, unpacket_traits<PacketMul1Xd>::size));
}
template <>
EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const PacketXcd& from) {
vfloat64m1x2_t vx2 = __riscv_vundefined_f64m1x2();
vx2 = __riscv_vset_v_f64m1_f64m1x2(vx2, 0, from.real);
vx2 = __riscv_vset_v_f64m1_f64m1x2(vx2, 1, from.imag);
EIGEN_DEBUG_ALIGNED_STORE __riscv_vsseg2e64_v_f64m1x2((double*)to, vx2, unpacket_traits<PacketXcd>::size);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const PacketXcd& from) {
vfloat64m1x2_t vx2 = __riscv_vundefined_f64m1x2();
vx2 = __riscv_vset_v_f64m1_f64m1x2(vx2, 0, from.real);
vx2 = __riscv_vset_v_f64m1_f64m1x2(vx2, 1, from.imag);
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vsseg2e64_v_f64m1x2((double*)to, vx2, unpacket_traits<PacketMul1Xd>::size);
}
template <>
EIGEN_DEVICE_FUNC inline PacketXcd pgather<std::complex<double>, PacketXcd>(const std::complex<double>* from,
Index stride) {
vfloat64m1x2_t res =
__riscv_vlsseg2e64_v_f64m1x2((const double*)from, 2 * stride * sizeof(double), unpacket_traits<PacketMul1Xd>::size);
return PacketXcd(__riscv_vget_v_f64m1x2_f64m1(res, 0), __riscv_vget_v_f64m1x2_f64m1(res, 1));
}
template <>
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, PacketXcd>(std::complex<double>* to, const PacketXcd& from,
Index stride) {
vfloat64m1x2_t from_rvv_type = __riscv_vundefined_f64m1x2();
from_rvv_type = __riscv_vset_v_f64m1_f64m1x2(from_rvv_type, 0, from.real);
from_rvv_type = __riscv_vset_v_f64m1_f64m1x2(from_rvv_type, 1, from.imag);
__riscv_vssseg2e64_v_f64m1x2((double*)to, 2 * stride * sizeof(double), from_rvv_type,
unpacket_traits<PacketMul1Xd>::size);
}
template <>
EIGEN_STRONG_INLINE std::complex<double> pfirst<PacketXcd>(const PacketXcd& a) {
return std::complex<double>(pfirst<PacketMul1Xd>(a.real), pfirst<PacketMul1Xd>(a.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd preverse(const PacketXcd& a) {
return PacketXcd(preverse<PacketMul1Xd>(a.real), preverse<PacketMul1Xd>(a.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pcplxflip<PacketXcd>(const PacketXcd& a) {
return PacketXcd(a.imag, a.real);
}
template <>
EIGEN_STRONG_INLINE std::complex<double> predux<PacketXcd>(const PacketXcd& a) {
return std::complex<double>(predux<PacketMul1Xd>(a.real), predux<PacketMul1Xd>(a.imag));
}
template <>
EIGEN_STRONG_INLINE PacketXcd pdiv<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
PacketXcd b_conj = pconj<PacketXcd>(b);
PacketXcd dividend = pmul<PacketXcd>(a, b_conj);
PacketMul1Xd divider = psub<PacketMul1Xd>(pmul<PacketMul1Xd>(b.real, b_conj.real), pmul<PacketMul1Xd>(b.imag, b_conj.imag));
return PacketXcd(pdiv<PacketMul1Xd>(dividend.real, divider), pdiv<PacketMul1Xd>(dividend.imag, divider));
}
template <int N>
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXcd, N>& kernel) {
double buffer_real[unpacket_traits<PacketMul1Xd>::size * N];
double buffer_imag[unpacket_traits<PacketMul1Xd>::size * N];
int i = 0;
for (i = 0; i < N; i++) {
__riscv_vsse64(&buffer_real[i], N * sizeof(double), kernel.packet[i].real, unpacket_traits<PacketMul1Xd>::size);
__riscv_vsse64(&buffer_imag[i], N * sizeof(double), kernel.packet[i].imag, unpacket_traits<PacketMul1Xd>::size);
}
for (i = 0; i < N; i++) {
kernel.packet[i].real =
__riscv_vle64_v_f64m1(&buffer_real[i * unpacket_traits<PacketMul1Xd>::size], unpacket_traits<PacketMul1Xd>::size);
kernel.packet[i].imag =
__riscv_vle64_v_f64m1(&buffer_imag[i * unpacket_traits<PacketMul1Xd>::size], unpacket_traits<PacketMul1Xd>::size);
}
}
template <>
EIGEN_STRONG_INLINE PacketXcd psqrt<PacketXcd>(const PacketXcd& a) {
return psqrt_complex_rvv<PacketXcd>(a);
}
template <>
EIGEN_STRONG_INLINE PacketXcd plog<PacketXcd>(const PacketXcd& a) {
return plog_complex_rvv<PacketXcd>(a);
}
template <>
struct conj_helper<PacketMul2Xd, PacketXcd, false, false> {
EIGEN_STRONG_INLINE PacketXcd pmadd(const PacketMul2Xd& x, const PacketXcd& y, const PacketXcd& c) const {
return padd(c, this->pmul(x, y));
}
EIGEN_STRONG_INLINE PacketXcd pmsub(const PacketMul2Xd& x, const PacketXcd& y, const PacketXcd& c) const {
return psub(this->pmul(x, y), c);
}
EIGEN_STRONG_INLINE PacketXcd pmul(const PacketMul2Xd& x, const PacketXcd& y) const {
return PacketXcd(Eigen::internal::pmul<PacketMul2Xd>(x, pcast<PacketXcd, PacketMul2Xd>(y)));
}
};
template <>
struct conj_helper<PacketXcd, PacketMul2Xd, false, false> {
EIGEN_STRONG_INLINE PacketXcd pmadd(const PacketXcd& x, const PacketMul2Xd& y, const PacketXcd& c) const {
return padd(c, this->pmul(x, y));
}
EIGEN_STRONG_INLINE PacketXcd pmsub(const PacketXcd& x, const PacketMul2Xd& y, const PacketXcd& c) const {
return psub(this->pmul(x, y), c);
}
EIGEN_STRONG_INLINE PacketXcd pmul(const PacketXcd& x, const PacketMul2Xd& y) const {
return PacketXcd(Eigen::internal::pmul<PacketMul2Xd>(pcast<PacketXcd, PacketMul2Xd>(x), y));
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_COMPLEX_RVV10_H

View File

@@ -1,491 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2024 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H
#define EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H
#include "../../InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
/********************************* real ************************************/
template <>
struct gebp_traits<float, float, false, false, Architecture::RVV10, GEBPPacketFull>
: gebp_traits<float, float, false, false, Architecture::Generic, GEBPPacketFull> {
typedef float RhsPacket;
typedef QuadPacket<float> RhsPacketx4;
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = __riscv_vfmadd_vf_f32m1(a, b, c, unpacket_traits<AccPacket>::size);
}
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
const LaneIdType& lane) const {
c = __riscv_vfmadd_vf_f32m1(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
}
};
template <>
struct gebp_traits<double, double, false, false, Architecture::RVV10, GEBPPacketFull>
: gebp_traits<double, double, false, false, Architecture::Generic, GEBPPacketFull> {
typedef double RhsPacket;
typedef QuadPacket<double> RhsPacketx4;
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = __riscv_vfmadd_vf_f64m1(a, b, c, unpacket_traits<AccPacket>::size);
}
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
const LaneIdType& lane) const {
c = __riscv_vfmadd_vf_f64m1(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
}
};
#if defined(EIGEN_VECTORIZE_RVV10FP16)
template <>
struct gebp_traits<half, half, false, false, Architecture::RVV10>
: gebp_traits<half, half, false, false, Architecture::Generic> {
typedef half RhsPacket;
typedef PacketXh LhsPacket;
typedef PacketXh AccPacket;
typedef QuadPacket<half> RhsPacketx4;
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = pload<RhsPacket>(b); }
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = __riscv_vfmadd_vf_f16m1(a, b, c, unpacket_traits<AccPacket>::size);
}
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
const LaneIdType& lane) const {
c = __riscv_vfmadd_vf_f16m1(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
}
};
#endif
/********************************* complex ************************************/
#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size) \
typedef typename packet_conditional< \
packet_size, typename packet_traits<name##Scalar>::type, typename packet_traits<name##Scalar>::half, \
typename unpacket_traits<typename packet_traits<name##Scalar>::half>::half>::type name##Packet##postfix
#define RISCV_COMPLEX_PACKET_DECL_COND_SCALAR(packet_size) \
typedef typename packet_conditional< \
packet_size, typename packet_traits<Scalar>::type, typename packet_traits<Scalar>::half, \
typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type ScalarPacket
template <typename RealScalar, bool ConjLhs_, bool ConjRhs_, int PacketSize_>
struct gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, ConjLhs_, ConjRhs_, Architecture::RVV10,
PacketSize_> : gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, ConjLhs_, ConjRhs_,
Architecture::Generic, PacketSize_> {
typedef std::complex<RealScalar> Scalar;
typedef std::complex<RealScalar> LhsScalar;
typedef std::complex<RealScalar> RhsScalar;
typedef std::complex<RealScalar> ResScalar;
typedef typename packet_traits<std::complex<RealScalar>>::type RealPacket;
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
RISCV_COMPLEX_PACKET_DECL_COND_SCALAR(PacketSize_);
#undef RISCV_COMPLEX_PACKET_DECL_COND_SCALAR
enum {
ConjLhs = ConjLhs_,
ConjRhs = ConjRhs_,
Vectorizable = unpacket_traits<RealPacket>::vectorizable && unpacket_traits<ScalarPacket>::vectorizable,
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
RhsPacketSize = Vectorizable ? unpacket_traits<RhsScalar>::size : 1,
RealPacketSize = Vectorizable ? unpacket_traits<RealPacket>::size : 1,
nr = 4,
mr = ResPacketSize,
LhsProgress = ResPacketSize,
RhsProgress = 1
};
typedef DoublePacket<RealPacket> DoublePacketType;
typedef std::conditional_t<Vectorizable, ScalarPacket, Scalar> LhsPacket4Packing;
typedef std::conditional_t<Vectorizable, RealPacket, Scalar> LhsPacket;
typedef std::conditional_t<Vectorizable, DoublePacket<RealScalar>, Scalar> RhsPacket;
typedef std::conditional_t<Vectorizable, ScalarPacket, Scalar> ResPacket;
typedef std::conditional_t<Vectorizable, DoublePacketType, Scalar> AccPacket;
typedef QuadPacket<RhsPacket> RhsPacketx4;
EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p) {
p.first = pset1<RealPacket>(RealScalar(0));
p.second = pset1<RealPacket>(RealScalar(0));
}
// Scalar path
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ScalarPacket& dest) const { dest = pset1<ScalarPacket>(*b); }
// Vectorized path
template <typename RealPacketType>
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket<RealPacketType>& dest) const {
dest.first = pset1<RealPacketType>(numext::real(*b));
dest.second = pset1<RealPacketType>(numext::imag(*b));
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
loadRhs(b, dest.B_0);
loadRhs(b + 1, dest.B1);
loadRhs(b + 2, dest.B2);
loadRhs(b + 3, dest.B3);
}
// Scalar path
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, ScalarPacket& dest) const { loadRhs(b, dest); }
// Vectorized path
template <typename RealPacketType>
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, DoublePacket<RealPacketType>& dest) const {
loadRhs(b, dest);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacket<RealScalar>& dest) const {
loadQuadToDoublePacket(b, dest);
}
// nothing special here
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const {
dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
}
template <typename LhsPacketType>
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
dest = ploadu<LhsPacketType>((const typename unpacket_traits<LhsPacketType>::type*)(a));
}
EIGEN_STRONG_INLINE PacketXcf pmadd_scalar(const PacketXcf& a, float b, const PacketXcf& c) const {
PacketXf v1 = __riscv_vfmadd_vf_f32m1(a.real, b, c.real, unpacket_traits<PacketXf>::size);
PacketXf v4 = __riscv_vfmadd_vf_f32m1(a.imag, b, c.imag, unpacket_traits<PacketXf>::size);
return PacketXcf(v1, v4);
}
EIGEN_STRONG_INLINE PacketXcd pmadd_scalar(const PacketXcd& a, double b, const PacketXcd& c) const {
PacketXd v1 = __riscv_vfmadd_vf_f64m1(a.real, b, c.real, unpacket_traits<PacketXd>::size);
PacketXd v4 = __riscv_vfmadd_vf_f64m1(a.imag, b, c.imag, unpacket_traits<PacketXd>::size);
return PacketXcd(v1, v4);
}
template <typename LhsPacketType, typename RhsPacketType, typename ResPacketType, typename TmpType,
typename LaneIdType>
EIGEN_STRONG_INLINE std::enable_if_t<!is_same<RhsPacketType, RhsPacketx4>::value> madd(const LhsPacketType& a,
const RhsPacketType& b,
DoublePacket<ResPacketType>& c,
TmpType& /*tmp*/,
const LaneIdType&) const {
c.first = pmadd_scalar(a, b.first, c.first);
c.second = pmadd_scalar(a, b.second, c.second);
}
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
const LaneIdType& lane) const {
madd(a, b.get(lane), c, tmp, lane);
}
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/,
const LaneIdType&) const {
c = cj.pmadd(a, b, c);
}
protected:
conj_helper<LhsScalar, RhsScalar, ConjLhs, ConjRhs> cj;
};
#define PACKET_DECL_COND_SCALAR_POSTFIX(postfix, packet_size) \
typedef typename packet_conditional< \
packet_size, typename packet_traits<Scalar>::type, typename packet_traits<Scalar>::half, \
typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type ScalarPacket##postfix
template <typename RealScalar, bool ConjRhs_, int PacketSize_>
class gebp_traits<RealScalar, std::complex<RealScalar>, false, ConjRhs_, Architecture::RVV10, PacketSize_>
: public gebp_traits<RealScalar, std::complex<RealScalar>, false, ConjRhs_, Architecture::Generic, PacketSize_> {
public:
typedef std::complex<RealScalar> Scalar;
typedef RealScalar LhsScalar;
typedef Scalar RhsScalar;
typedef Scalar ResScalar;
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Real, PacketSize_);
PACKET_DECL_COND_SCALAR_POSTFIX(_, PacketSize_);
#undef PACKET_DECL_COND_SCALAR_POSTFIX
enum {
ConjLhs = false,
ConjRhs = ConjRhs_,
Vectorizable = unpacket_traits<RealPacket_>::vectorizable && unpacket_traits<ScalarPacket_>::vectorizable,
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
// FIXME: should depend on NumberOfRegisters
nr = 4,
mr = (plain_enum_min(16, NumberOfRegisters) / 2 / nr) * ResPacketSize,
LhsProgress = ResPacketSize,
RhsProgress = 1
};
typedef std::conditional_t<Vectorizable, LhsPacket_, LhsScalar> LhsPacket;
typedef RhsScalar RhsPacket;
typedef std::conditional_t<Vectorizable, ResPacket_, ResScalar> ResPacket;
typedef LhsPacket LhsPacket4Packing;
typedef QuadPacket<RhsPacket> RhsPacketx4;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { p = pset1<ResPacket>(ResScalar(0)); }
template <typename RhsPacketType>
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const {
dest = pset1<RhsPacketType>(*b);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
template <typename RhsPacketType>
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const {
loadRhs(b, dest);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = pload<LhsPacket>(a); }
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
template <typename LhsPacketType>
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
dest = ploadu<LhsPacketType>((const typename unpacket_traits<LhsPacketType>::type*)a);
}
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp,
const LaneIdType&) const {
madd_impl(a, b, c, tmp, std::conditional_t<Vectorizable, true_type, false_type>());
}
EIGEN_STRONG_INLINE PacketXcf pmadd_scalar(const PacketXf& a, std::complex<float> b, const PacketXcf& c) const {
PacketXf v1 = __riscv_vfmadd_vf_f32m1(a, b.real(), c.real, unpacket_traits<PacketXf>::size);
PacketXf v3 = __riscv_vfmadd_vf_f32m1(a, b.imag(), c.imag, unpacket_traits<PacketXf>::size);
return PacketXcf(v1, v3);
}
EIGEN_STRONG_INLINE PacketXcd pmadd_scalar(const PacketXd& a, std::complex<double> b, const PacketXcd& c) const {
PacketXd v1 = __riscv_vfmadd_vf_f64m1(a, b.real(), c.real, unpacket_traits<PacketXd>::size);
PacketXd v3 = __riscv_vfmadd_vf_f64m1(a, b.imag(), c.imag, unpacket_traits<PacketXd>::size);
return PacketXcd(v1, v3);
}
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c,
RhsPacketType& tmp, const true_type&) const {
EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd_scalar(a, b, c);
}
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/,
const false_type&) const {
c += a * b;
}
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
const LaneIdType& lane) const {
madd(a, b.get(lane), c, tmp, lane);
}
template <typename ResPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const {
conj_helper<ResPacketType, ResPacketType, false, ConjRhs> cj;
r = cj.pmadd(alpha, c, r);
}
};
template <typename RealScalar, bool ConjLhs_, int PacketSize_>
class gebp_traits<std::complex<RealScalar>, RealScalar, ConjLhs_, false, Architecture::RVV10, PacketSize_>
: public gebp_traits<RealScalar, std::complex<RealScalar>, ConjLhs_, false, Architecture::Generic, PacketSize_> {
public:
typedef std::complex<RealScalar> LhsScalar;
typedef RealScalar RhsScalar;
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
#undef PACKET_DECL_COND_POSTFIX
enum {
ConjLhs = ConjLhs_,
ConjRhs = false,
Vectorizable = unpacket_traits<LhsPacket_>::vectorizable && unpacket_traits<RhsPacket_>::vectorizable,
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
nr = 4,
mr = 3 * LhsPacketSize,
LhsProgress = LhsPacketSize,
RhsProgress = 1
};
typedef std::conditional_t<Vectorizable, LhsPacket_, LhsScalar> LhsPacket;
typedef RhsScalar RhsPacket;
typedef std::conditional_t<Vectorizable, ResPacket_, ResScalar> ResPacket;
typedef LhsPacket LhsPacket4Packing;
typedef QuadPacket<RhsPacket> RhsPacketx4;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { p = pset1<ResPacket>(ResScalar(0)); }
template <typename RhsPacketType>
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const {
dest = pset1<RhsPacketType>(*b);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
template <typename RhsPacketType>
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const {
loadRhs(b, dest);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const {
loadRhsQuad_impl(b, dest, std::conditional_t<RhsPacketSize == 16, true_type, false_type>());
}
EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const true_type&) const {
// FIXME we can do better!
// what we want here is a ploadheight
RhsScalar tmp[4] = {b[0], b[0], b[1], b[1]};
dest = ploadquad<RhsPacket>(tmp);
}
EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const false_type&) const {
eigen_internal_assert(RhsPacketSize <= 8);
dest = pset1<RhsPacket>(*b);
}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = pload<LhsPacket>(a); }
template <typename LhsPacketType>
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
dest = ploadu<LhsPacketType>(a);
}
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp,
const LaneIdType&) const {
madd_impl(a, b, c, tmp, std::conditional_t<Vectorizable, true_type, false_type>());
}
EIGEN_STRONG_INLINE PacketXcf pmadd_scalar(const PacketXcf& a, float b, const PacketXcf& c) const {
PacketXf v1 = __riscv_vfmadd_vf_f32m1(a.real, b, c.real, unpacket_traits<PacketXf>::size);
PacketXf v3 = __riscv_vfmadd_vf_f32m1(a.imag, b, c.imag, unpacket_traits<PacketXf>::size);
return PacketXcf(v1, v3);
}
EIGEN_STRONG_INLINE PacketXcd pmadd_scalar(const PacketXcd& a, double b, const PacketXcd& c) const {
PacketXd v1 = __riscv_vfmadd_vf_f64m1(a.real, b, c.real, unpacket_traits<PacketXd>::size);
PacketXd v3 = __riscv_vfmadd_vf_f64m1(a.imag, b, c.imag, unpacket_traits<PacketXd>::size);
return PacketXcd(v1, v3);
}
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c,
RhsPacketType& tmp, const true_type&) const {
EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd_scalar(a, b, c);
}
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/,
const false_type&) const {
c += a * b;
}
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
const LaneIdType& lane) const {
madd(a, b.get(lane), c, tmp, lane);
}
template <typename ResPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const {
conj_helper<ResPacketType, ResPacketType, ConjLhs, false> cj;
r = cj.pmadd(c, alpha, r);
}
};
} // namespace internal
} // namespace Eigen
#endif // EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H

View File

@@ -25,6 +25,10 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
// Temporarily make LMUL = 1
#undef EIGEN_RISCV64_DEFAULT_LMUL
#define EIGEN_RISCV64_DEFAULT_LMUL 1
template <typename Scalar, std::size_t VectorLength, std::size_t VectorLMul>
struct rvv_packet_size_selector {
enum { size = VectorLength * VectorLMul / (sizeof(Scalar) * CHAR_BIT) };

View File

@@ -540,7 +540,7 @@ extern "C" {
#if defined(__riscv)
// Defines the default LMUL for RISC-V
#ifndef EIGEN_RISCV64_DEFAULT_LMUL
#define EIGEN_RISCV64_DEFAULT_LMUL 4
#define EIGEN_RISCV64_DEFAULT_LMUL 1
#endif
#endif