0
0
mirror of https://gitlab.com/libeigen/eigen.git synced 2026-01-18 17:31:19 +01:00

Merge remote-tracking branch 'origin2/master'

This commit is contained in:
Chip Kerchner
2025-11-20 17:43:17 +00:00
11 changed files with 6642 additions and 5 deletions

View File

@@ -277,6 +277,15 @@ using std::ptrdiff_t;
#include "src/Core/arch/SVE/PacketMath.h"
#include "src/Core/arch/SVE/TypeCasting.h"
#include "src/Core/arch/SVE/MathFunctions.h"
#elif defined EIGEN_VECTORIZE_RVV10
#include "src/Core/arch/RVV10/PacketMath.h"
#include "src/Core/arch/RVV10/PacketMath4.h"
#include "src/Core/arch/RVV10/PacketMath2.h"
#include "src/Core/arch/RVV10/TypeCasting.h"
#include "src/Core/arch/RVV10/MathFunctions.h"
#if defined EIGEN_VECTORIZE_RVV10FP16
#include "src/Core/arch/RVV10/PacketMathFP16.h"
#endif
#elif defined EIGEN_VECTORIZE_ZVECTOR
#include "src/Core/arch/ZVector/PacketMath.h"
#include "src/Core/arch/ZVector/MathFunctions.h"

View File

@@ -0,0 +1,30 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2024 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_MATH_FUNCTIONS_RVV10_H
#define EIGEN_MATH_FUNCTIONS_RVV10_H
// IWYU pragma: private
#include "../../InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet1Xf)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet2Xf)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4Xf)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet1Xd)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2Xd)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet4Xd)
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_RVV10_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,922 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2025 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_PACKET_MATH_FP16_RVV10_H
#define EIGEN_PACKET_MATH_FP16_RVV10_H
// IWYU pragma: private
#include "../../InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
typedef vfloat16m1_t Packet1Xh __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL)));
typedef vfloat16m2_t Packet2Xh __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2)));
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
typedef Packet1Xh PacketXh;
template <>
struct packet_traits<Eigen::half> : default_packet_traits {
typedef Packet1Xh type;
typedef Packet1Xh half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = rvv_packet_size_selector<Eigen::half, EIGEN_RISCV64_RVV_VL, 1>::size,
HasAdd = 1,
HasSub = 1,
HasShift = 1,
HasMul = 1,
HasNegate = 1,
HasAbs = 1,
HasArg = 0,
HasAbs2 = 1,
HasMin = 1,
HasMax = 1,
HasConj = 1,
HasSetLinear = 0,
HasBlend = 0,
HasReduxp = 0,
HasCmp = 1,
HasDiv = 1,
HasRound = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
HasLog = 0,
HasExp = 0,
HasSqrt = 1,
HasTanh = EIGEN_FAST_MATH,
HasErf = 0
};
};
#else
typedef Packet2Xh PacketXh;
template <>
struct packet_traits<Eigen::half> : default_packet_traits {
typedef Packet2Xh type;
typedef Packet1Xh half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = rvv_packet_size_selector<Eigen::half, EIGEN_RISCV64_RVV_VL, 2>::size,
HasAdd = 1,
HasSub = 1,
HasShift = 1,
HasMul = 1,
HasNegate = 1,
HasAbs = 1,
HasArg = 0,
HasAbs2 = 1,
HasMin = 1,
HasMax = 1,
HasConj = 1,
HasSetLinear = 0,
HasBlend = 0,
HasReduxp = 0,
HasCmp = 1,
HasDiv = 1,
HasRound = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH,
HasLog = 0,
HasExp = 0,
HasSqrt = 1,
HasTanh = EIGEN_FAST_MATH,
HasErf = 0
};
};
#endif
template <>
struct unpacket_traits<Packet1Xh> {
typedef Eigen::half type;
typedef Packet1Xh half; // Half not yet implemented
typedef PacketXs integer_packet;
typedef numext::uint8_t mask_t;
enum {
size = rvv_packet_size_selector<Eigen::half, EIGEN_RISCV64_RVV_VL, 1>::size,
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 1>::alignment,
vectorizable = true,
masked_load_available = false,
masked_store_available = false
};
};
template <>
struct unpacket_traits<Packet2Xh> {
typedef Eigen::half type;
typedef Packet1Xh half;
typedef Packet2Xs integer_packet;
typedef numext::uint8_t mask_t;
enum {
size = rvv_packet_size_selector<Eigen::half, EIGEN_RISCV64_RVV_VL, 2>::size,
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
vectorizable = true,
masked_load_available = false,
masked_store_available = false
};
};
/********************************* PacketXh ************************************/
template <>
EIGEN_STRONG_INLINE PacketXh ptrue<PacketXh>(const PacketXh& /*a*/) {
return __riscv_vreinterpret_f16m1(__riscv_vmv_v_x_u16m1(0xffffu, unpacket_traits<PacketXh>::size));
}
template <>
EIGEN_STRONG_INLINE PacketXh pzero<PacketXh>(const PacketXh& /*a*/) {
return __riscv_vfmv_v_f_f16m1(static_cast<Eigen::half>(0.0), unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pabs(const PacketXh& a) {
return __riscv_vfabs_v_f16m1(a, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pset1<PacketXh>(const Eigen::half& from) {
return __riscv_vfmv_v_f_f16m1(static_cast<_Float16>(from), unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pset1frombits<PacketXh>(numext::uint16_t from) {
return __riscv_vreinterpret_f16m1(__riscv_vmv_v_x_u16m1(from, unpacket_traits<PacketXh>::size));
}
template <>
EIGEN_STRONG_INLINE PacketXh plset<PacketXh>(const Eigen::half& a) {
PacketXh idx =
__riscv_vfcvt_f_x_v_f16m1(__riscv_vid_v_i16m1(unpacket_traits<PacketXs>::size), unpacket_traits<PacketXh>::size);
return __riscv_vfadd_vf_f16m1(idx, a, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh padd<PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vfadd_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh psub<PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vfsub_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pnegate(const PacketXh& a) {
return __riscv_vfneg_v_f16m1(a, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pconj(const PacketXh& a) {
return a;
}
template <>
EIGEN_STRONG_INLINE PacketXh pmul<PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vfmul_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pdiv<PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vfdiv_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pmadd(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
return __riscv_vfmadd_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pmsub(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
return __riscv_vfmsub_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pnmadd(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
return __riscv_vfnmsub_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pnmsub(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
return __riscv_vfnmadd_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pmin<PacketXh>(const PacketXh& a, const PacketXh& b) {
PacketXh nans =
__riscv_vfmv_v_f_f16m1((std::numeric_limits<Eigen::half>::quiet_NaN)(), unpacket_traits<PacketXh>::size);
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
PacketMask16 mask2 = __riscv_vmfeq_vv_f16m1_b16(b, b, unpacket_traits<PacketXh>::size);
mask = __riscv_vmand_mm_b16(mask, mask2, unpacket_traits<PacketXh>::size);
return __riscv_vfmin_vv_f16m1_tum(mask, nans, a, b, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pmin<PropagateNaN, PacketXh>(const PacketXh& a, const PacketXh& b) {
return pmin<PacketXh>(a, b);
}
template <>
EIGEN_STRONG_INLINE PacketXh pmin<PropagateNumbers, PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vfmin_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pmax<PacketXh>(const PacketXh& a, const PacketXh& b) {
PacketXh nans =
__riscv_vfmv_v_f_f16m1((std::numeric_limits<Eigen::half>::quiet_NaN)(), unpacket_traits<PacketXh>::size);
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
PacketMask16 mask2 = __riscv_vmfeq_vv_f16m1_b16(b, b, unpacket_traits<PacketXh>::size);
mask = __riscv_vmand_mm_b16(mask, mask2, unpacket_traits<PacketXh>::size);
return __riscv_vfmax_vv_f16m1_tum(mask, nans, a, b, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pmax<PropagateNaN, PacketXh>(const PacketXh& a, const PacketXh& b) {
return pmax<PacketXh>(a, b);
}
template <>
EIGEN_STRONG_INLINE PacketXh pmax<PropagateNumbers, PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vfmax_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pcmp_le<PacketXh>(const PacketXh& a, const PacketXh& b) {
PacketMask16 mask = __riscv_vmfle_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pcmp_lt<PacketXh>(const PacketXh& a, const PacketXh& b) {
PacketMask16 mask = __riscv_vmflt_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pcmp_eq<PacketXh>(const PacketXh& a, const PacketXh& b) {
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pcmp_lt_or_nan<PacketXh>(const PacketXh& a, const PacketXh& b) {
PacketMask16 mask = __riscv_vmfge_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
return __riscv_vfmerge_vfm_f16m1(ptrue<PacketXh>(a), static_cast<Eigen::half>(0.0), mask,
unpacket_traits<PacketXh>::size);
}
// Logical Operations are not supported for half, so reinterpret casts
template <>
EIGEN_STRONG_INLINE PacketXh pand<PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vand_vv_u16m1(
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
}
template <>
EIGEN_STRONG_INLINE PacketXh por<PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vor_vv_u16m1(
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
}
template <>
EIGEN_STRONG_INLINE PacketXh pxor<PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vxor_vv_u16m1(
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
}
template <>
EIGEN_STRONG_INLINE PacketXh pandnot<PacketXh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vand_vv_u16m1(
__riscv_vreinterpret_v_f16m1_u16m1(a),
__riscv_vnot_v_u16m1(__riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size),
unpacket_traits<PacketXh>::size));
}
template <>
EIGEN_STRONG_INLINE PacketXh pload<PacketXh>(const Eigen::half* from) {
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle16_v_f16m1(reinterpret_cast<const _Float16*>(from),
unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh ploadu<PacketXh>(const Eigen::half* from) {
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle16_v_f16m1(reinterpret_cast<const _Float16*>(from),
unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh ploaddup<PacketXh>(const Eigen::half* from) {
PacketXsu idx = __riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size);
idx = __riscv_vand_vx_u16m1(idx, 0xfffeu, unpacket_traits<PacketXh>::size);
return __riscv_vloxei16_v_f16m1(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh ploadquad<PacketXh>(const Eigen::half* from) {
PacketXsu idx = __riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size);
idx = __riscv_vsrl_vx_u16m1(__riscv_vand_vx_u16m1(idx, 0xfffcu, unpacket_traits<PacketXh>::size), 1,
unpacket_traits<PacketXh>::size);
return __riscv_vloxei16_v_f16m1(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const PacketXh& from) {
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse16_v_f16m1(reinterpret_cast<_Float16*>(to), from,
unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const PacketXh& from) {
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse16_v_f16m1(reinterpret_cast<_Float16*>(to), from,
unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_DEVICE_FUNC inline PacketXh pgather<Eigen::half, PacketXh>(const Eigen::half* from, Index stride) {
return __riscv_vlse16_v_f16m1(reinterpret_cast<const _Float16*>(from), stride * sizeof(Eigen::half),
unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, PacketXh>(Eigen::half* to, const PacketXh& from, Index stride) {
__riscv_vsse16(reinterpret_cast<_Float16*>(to), stride * sizeof(Eigen::half), from, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE Eigen::half pfirst<PacketXh>(const PacketXh& a) {
return static_cast<Eigen::half>(__riscv_vfmv_f_s_f16m1_f16(a));
}
template <>
EIGEN_STRONG_INLINE PacketXh psqrt(const PacketXh& a) {
return __riscv_vfsqrt_v_f16m1(a, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh print<PacketXh>(const PacketXh& a) {
const PacketXh limit = pset1<PacketXh>(static_cast<Eigen::half>(1 << 10));
const PacketXh abs_a = pabs(a);
PacketMask16 mask = __riscv_vmfne_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
const PacketXh x = __riscv_vfadd_vv_f16m1_tum(mask, a, a, a, unpacket_traits<PacketXh>::size);
const PacketXh new_x = __riscv_vfcvt_f_x_v_f16m1(__riscv_vfcvt_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
unpacket_traits<PacketXh>::size);
mask = __riscv_vmflt_vv_f16m1_b16(abs_a, limit, unpacket_traits<PacketXh>::size);
PacketXh signed_x = __riscv_vfsgnj_vv_f16m1(new_x, x, unpacket_traits<PacketXh>::size);
return __riscv_vmerge_vvm_f16m1(x, signed_x, mask, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh pfloor<PacketXh>(const PacketXh& a) {
PacketXh tmp = print<PacketXh>(a);
// If greater, subtract one.
PacketMask16 mask = __riscv_vmflt_vv_f16m1_b16(a, tmp, unpacket_traits<PacketXh>::size);
return __riscv_vfsub_vf_f16m1_tum(mask, tmp, tmp, static_cast<Eigen::half>(1.0), unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh preverse(const PacketXh& a) {
PacketXsu idx = __riscv_vrsub_vx_u16m1(__riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size),
unpacket_traits<PacketXh>::size - 1, unpacket_traits<PacketXh>::size);
return __riscv_vrgather_vv_f16m1(a, idx, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE Eigen::half predux<PacketXh>(const PacketXh& a) {
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredusum_vs_f16m1_f16m1(
a, __riscv_vfmv_v_f_f16m1(static_cast<Eigen::half>(0.0), unpacket_traits<PacketXh>::size),
unpacket_traits<PacketXh>::size)));
}
template <>
EIGEN_STRONG_INLINE Eigen::half predux_mul<PacketXh>(const PacketXh& a) {
// Multiply the vector by its reverse
PacketXh prod = __riscv_vfmul_vv_f16m1(preverse(a), a, unpacket_traits<PacketXh>::size);
PacketXh half_prod;
if (EIGEN_RISCV64_RVV_VL >= 1024) {
half_prod = __riscv_vslidedown_vx_f16m1(prod, 16, unpacket_traits<PacketXh>::size);
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
}
if (EIGEN_RISCV64_RVV_VL >= 512) {
half_prod = __riscv_vslidedown_vx_f16m1(prod, 8, unpacket_traits<PacketXh>::size);
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
}
if (EIGEN_RISCV64_RVV_VL >= 256) {
half_prod = __riscv_vslidedown_vx_f16m1(prod, 4, unpacket_traits<PacketXh>::size);
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
}
// Last reduction
half_prod = __riscv_vslidedown_vx_f16m1(prod, 2, unpacket_traits<PacketXh>::size);
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
half_prod = __riscv_vslidedown_vx_f16m1(prod, 1, unpacket_traits<PacketXh>::size);
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
// The reduction is done to the first element.
return pfirst(prod);
}
template <>
EIGEN_STRONG_INLINE Eigen::half predux_min<PacketXh>(const PacketXh& a) {
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmin_vs_f16m1_f16m1(
a, __riscv_vfmv_v_f_f16m1((std::numeric_limits<Eigen::half>::max)(), unpacket_traits<PacketXh>::size),
unpacket_traits<PacketXh>::size)));
}
template <>
EIGEN_STRONG_INLINE Eigen::half predux_max<PacketXh>(const PacketXh& a) {
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmax_vs_f16m1_f16m1(
a, __riscv_vfmv_v_f_f16m1(-(std::numeric_limits<Eigen::half>::max)(), unpacket_traits<PacketXh>::size),
unpacket_traits<PacketXh>::size)));
}
template <int N>
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXh, N>& kernel) {
Eigen::half buffer[unpacket_traits<PacketXh>::size * N];
int i = 0;
for (i = 0; i < N; i++) {
__riscv_vsse16(reinterpret_cast<_Float16*>(&buffer[i]), N * sizeof(Eigen::half), kernel.packet[i],
unpacket_traits<PacketXh>::size);
}
for (i = 0; i < N; i++) {
kernel.packet[i] = __riscv_vle16_v_f16m1(reinterpret_cast<_Float16*>(&buffer[i * unpacket_traits<PacketXh>::size]),
unpacket_traits<PacketXh>::size);
}
}
EIGEN_STRONG_INLINE Packet2Xf half2float(const PacketXh& a) {
return __riscv_vfwcvt_f_f_v_f32m2(a, unpacket_traits<Packet2Xf>::size);
}
EIGEN_STRONG_INLINE PacketXh float2half(const Packet2Xf& a) {
return __riscv_vfncvt_f_f_w_f16m1(a, unpacket_traits<PacketXh>::size);
}
/********************************* Packet2Xh ************************************/
template <>
EIGEN_STRONG_INLINE Packet2Xh ptrue<Packet2Xh>(const Packet2Xh& /*a*/) {
return __riscv_vreinterpret_f16m2(__riscv_vmv_v_x_u16m2(0xffffu, unpacket_traits<Packet2Xh>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pzero<Packet2Xh>(const Packet2Xh& /*a*/) {
return __riscv_vfmv_v_f_f16m2(static_cast<Eigen::half>(0.0), unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pabs(const Packet2Xh& a) {
return __riscv_vfabs_v_f16m2(a, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pset1<Packet2Xh>(const Eigen::half& from) {
return __riscv_vfmv_v_f_f16m2(static_cast<_Float16>(from), unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pset1frombits<Packet2Xh>(numext::uint16_t from) {
return __riscv_vreinterpret_f16m2(__riscv_vmv_v_x_u16m2(from, unpacket_traits<Packet2Xh>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh plset<Packet2Xh>(const Eigen::half& a) {
Packet2Xh idx = __riscv_vfcvt_f_x_v_f16m2(__riscv_vid_v_i16m2(unpacket_traits<Packet4Xs>::size),
unpacket_traits<Packet2Xh>::size);
return __riscv_vfadd_vf_f16m2(idx, a, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh padd<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vfadd_vv_f16m2(a, b, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh psub<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vfsub_vv_f16m2(a, b, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pnegate(const Packet2Xh& a) {
return __riscv_vfneg_v_f16m2(a, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pconj(const Packet2Xh& a) {
return a;
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmul<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vfmul_vv_f16m2(a, b, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pdiv<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vfdiv_vv_f16m2(a, b, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmadd(const Packet2Xh& a, const Packet2Xh& b, const Packet2Xh& c) {
return __riscv_vfmadd_vv_f16m2(a, b, c, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmsub(const Packet2Xh& a, const Packet2Xh& b, const Packet2Xh& c) {
return __riscv_vfmsub_vv_f16m2(a, b, c, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pnmadd(const Packet2Xh& a, const Packet2Xh& b, const Packet2Xh& c) {
return __riscv_vfnmsub_vv_f16m2(a, b, c, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pnmsub(const Packet2Xh& a, const Packet2Xh& b, const Packet2Xh& c) {
return __riscv_vfnmadd_vv_f16m2(a, b, c, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmin<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
Packet2Xh nans =
__riscv_vfmv_v_f_f16m2((std::numeric_limits<Eigen::half>::quiet_NaN)(), unpacket_traits<Packet2Xh>::size);
PacketMask8 mask = __riscv_vmfeq_vv_f16m2_b8(a, a, unpacket_traits<Packet2Xh>::size);
PacketMask8 mask2 = __riscv_vmfeq_vv_f16m2_b8(b, b, unpacket_traits<Packet2Xh>::size);
mask = __riscv_vmand_mm_b8(mask, mask2, unpacket_traits<Packet2Xh>::size);
return __riscv_vfmin_vv_f16m2_tum(mask, nans, a, b, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmin<PropagateNaN, Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return pmin<Packet2Xh>(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmin<PropagateNumbers, Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vfmin_vv_f16m2(a, b, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmax<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
Packet2Xh nans =
__riscv_vfmv_v_f_f16m2((std::numeric_limits<Eigen::half>::quiet_NaN)(), unpacket_traits<Packet2Xh>::size);
PacketMask8 mask = __riscv_vmfeq_vv_f16m2_b8(a, a, unpacket_traits<Packet2Xh>::size);
PacketMask8 mask2 = __riscv_vmfeq_vv_f16m2_b8(b, b, unpacket_traits<Packet2Xh>::size);
mask = __riscv_vmand_mm_b8(mask, mask2, unpacket_traits<Packet2Xh>::size);
return __riscv_vfmax_vv_f16m2_tum(mask, nans, a, b, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmax<PropagateNaN, Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return pmax<Packet2Xh>(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pmax<PropagateNumbers, Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vfmax_vv_f16m2(a, b, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pcmp_le<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
PacketMask8 mask = __riscv_vmfle_vv_f16m2_b8(a, b, unpacket_traits<Packet2Xh>::size);
return __riscv_vmerge_vvm_f16m2(pzero<Packet2Xh>(a), ptrue<Packet2Xh>(a), mask,
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pcmp_lt<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
PacketMask8 mask = __riscv_vmflt_vv_f16m2_b8(a, b, unpacket_traits<Packet2Xh>::size);
return __riscv_vmerge_vvm_f16m2(pzero<Packet2Xh>(a), ptrue<Packet2Xh>(a), mask,
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pcmp_eq<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
PacketMask8 mask = __riscv_vmfeq_vv_f16m2_b8(a, b, unpacket_traits<Packet2Xh>::size);
return __riscv_vmerge_vvm_f16m2(pzero<Packet2Xh>(a), ptrue<Packet2Xh>(a), mask,
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pcmp_lt_or_nan<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
PacketMask8 mask = __riscv_vmfge_vv_f16m2_b8(a, b, unpacket_traits<Packet2Xh>::size);
return __riscv_vfmerge_vfm_f16m2(ptrue<Packet2Xh>(a), static_cast<Eigen::half>(0.0), mask,
unpacket_traits<Packet2Xh>::size);
}
// Logical Operations are not supported for half, so reinterpret casts
template <>
EIGEN_STRONG_INLINE Packet2Xh pand<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vreinterpret_v_u16m2_f16m2(__riscv_vand_vv_u16m2(__riscv_vreinterpret_v_f16m2_u16m2(a),
__riscv_vreinterpret_v_f16m2_u16m2(b),
unpacket_traits<Packet2Xh>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh por<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vreinterpret_v_u16m2_f16m2(__riscv_vor_vv_u16m2(__riscv_vreinterpret_v_f16m2_u16m2(a),
__riscv_vreinterpret_v_f16m2_u16m2(b),
unpacket_traits<Packet2Xh>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pxor<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vreinterpret_v_u16m2_f16m2(__riscv_vxor_vv_u16m2(__riscv_vreinterpret_v_f16m2_u16m2(a),
__riscv_vreinterpret_v_f16m2_u16m2(b),
unpacket_traits<Packet2Xh>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pandnot<Packet2Xh>(const Packet2Xh& a, const Packet2Xh& b) {
return __riscv_vreinterpret_v_u16m2_f16m2(__riscv_vand_vv_u16m2(
__riscv_vreinterpret_v_f16m2_u16m2(a),
__riscv_vnot_v_u16m2(__riscv_vreinterpret_v_f16m2_u16m2(b), unpacket_traits<Packet2Xh>::size),
unpacket_traits<Packet2Xh>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pload<Packet2Xh>(const Eigen::half* from) {
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle16_v_f16m2(reinterpret_cast<const _Float16*>(from),
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh ploadu<Packet2Xh>(const Eigen::half* from) {
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle16_v_f16m2(reinterpret_cast<const _Float16*>(from),
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh ploaddup<Packet2Xh>(const Eigen::half* from) {
Packet2Xsu idx = __riscv_vid_v_u16m2(unpacket_traits<Packet2Xh>::size);
idx = __riscv_vand_vx_u16m2(idx, 0xfffeu, unpacket_traits<Packet2Xh>::size);
return __riscv_vloxei16_v_f16m2(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh ploadquad<Packet2Xh>(const Eigen::half* from) {
Packet2Xsu idx = __riscv_vid_v_u16m2(unpacket_traits<Packet2Xh>::size);
idx = __riscv_vsrl_vx_u16m2(__riscv_vand_vx_u16m2(idx, 0xfffcu, unpacket_traits<Packet2Xh>::size), 1,
unpacket_traits<Packet2Xs>::size);
return __riscv_vloxei16_v_f16m2(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet2Xh& from) {
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse16_v_f16m2(reinterpret_cast<_Float16*>(to), from,
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet2Xh& from) {
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse16_v_f16m2(reinterpret_cast<_Float16*>(to), from,
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_DEVICE_FUNC inline Packet2Xh pgather<Eigen::half, Packet2Xh>(const Eigen::half* from, Index stride) {
return __riscv_vlse16_v_f16m2(reinterpret_cast<const _Float16*>(from), stride * sizeof(Eigen::half),
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, Packet2Xh>(Eigen::half* to, const Packet2Xh& from,
Index stride) {
__riscv_vsse16(reinterpret_cast<_Float16*>(to), stride * sizeof(Eigen::half), from,
unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Eigen::half pfirst<Packet2Xh>(const Packet2Xh& a) {
return static_cast<Eigen::half>(__riscv_vfmv_f_s_f16m2_f16(a));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh psqrt(const Packet2Xh& a) {
return __riscv_vfsqrt_v_f16m2(a, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh print<Packet2Xh>(const Packet2Xh& a) {
const Packet2Xh limit = pset1<Packet2Xh>(static_cast<Eigen::half>(1 << 10));
const Packet2Xh abs_a = pabs(a);
PacketMask8 mask = __riscv_vmfne_vv_f16m2_b8(a, a, unpacket_traits<Packet2Xh>::size);
const Packet2Xh x = __riscv_vfadd_vv_f16m2_tum(mask, a, a, a, unpacket_traits<Packet2Xh>::size);
const Packet2Xh new_x = __riscv_vfcvt_f_x_v_f16m2(
__riscv_vfcvt_x_f_v_i16m2(a, unpacket_traits<Packet2Xh>::size), unpacket_traits<Packet2Xh>::size);
mask = __riscv_vmflt_vv_f16m2_b8(abs_a, limit, unpacket_traits<Packet2Xh>::size);
Packet2Xh signed_x = __riscv_vfsgnj_vv_f16m2(new_x, x, unpacket_traits<Packet2Xh>::size);
return __riscv_vmerge_vvm_f16m2(x, signed_x, mask, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pfloor<Packet2Xh>(const Packet2Xh& a) {
Packet2Xh tmp = print<Packet2Xh>(a);
// If greater, subtract one.
PacketMask8 mask = __riscv_vmflt_vv_f16m2_b8(a, tmp, unpacket_traits<Packet2Xh>::size);
return __riscv_vfsub_vf_f16m2_tum(mask, tmp, tmp, static_cast<Eigen::half>(1.0), unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh preverse(const Packet2Xh& a) {
Packet2Xsu idx =
__riscv_vrsub_vx_u16m2(__riscv_vid_v_u16m2(unpacket_traits<Packet2Xh>::size),
unpacket_traits<Packet2Xh>::size - 1, unpacket_traits<Packet2Xh>::size);
return __riscv_vrgather_vv_f16m2(a, idx, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Eigen::half predux<Packet2Xh>(const Packet2Xh& a) {
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredusum_vs_f16m2_f16m1(
a, __riscv_vfmv_v_f_f16m1(static_cast<Eigen::half>(0.0), unpacket_traits<Packet2Xh>::size / 4),
unpacket_traits<Packet2Xh>::size)));
}
template <>
EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet2Xh>(const Packet2Xh& a) {
return predux_mul<PacketXh>(__riscv_vfmul_vv_f16m1(__riscv_vget_v_f16m2_f16m1(a, 0), __riscv_vget_v_f16m2_f16m1(a, 1),
unpacket_traits<PacketXh>::size));
}
template <>
EIGEN_STRONG_INLINE Eigen::half predux_min<Packet2Xh>(const Packet2Xh& a) {
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmin_vs_f16m2_f16m1(
a, __riscv_vfmv_v_f_f16m1((std::numeric_limits<Eigen::half>::max)(), unpacket_traits<Packet2Xh>::size / 4),
unpacket_traits<Packet2Xh>::size)));
}
template <>
EIGEN_STRONG_INLINE Eigen::half predux_max<Packet2Xh>(const Packet2Xh& a) {
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmax_vs_f16m2_f16m1(
a, __riscv_vfmv_v_f_f16m1(-(std::numeric_limits<Eigen::half>::max)(), unpacket_traits<Packet2Xh>::size / 4),
unpacket_traits<Packet2Xh>::size)));
}
template <int N>
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2Xh, N>& kernel) {
Eigen::half buffer[unpacket_traits<Packet2Xh>::size * N];
int i = 0;
for (i = 0; i < N; i++) {
__riscv_vsse16(reinterpret_cast<_Float16*>(&buffer[i]), N * sizeof(Eigen::half), kernel.packet[i],
unpacket_traits<Packet2Xh>::size);
}
for (i = 0; i < N; i++) {
kernel.packet[i] =
__riscv_vle16_v_f16m2(reinterpret_cast<_Float16*>(&buffer[i * unpacket_traits<Packet2Xh>::size]),
unpacket_traits<Packet2Xh>::size);
}
}
EIGEN_STRONG_INLINE Packet4Xf half2float(const Packet2Xh& a) {
return __riscv_vfwcvt_f_f_v_f32m4(a, unpacket_traits<Packet4Xf>::size);
}
EIGEN_STRONG_INLINE Packet2Xh float2half(const Packet4Xf& a) {
return __riscv_vfncvt_f_f_w_f16m2(a, unpacket_traits<Packet2Xh>::size);
}
template <typename Packet = Packet2Xh>
EIGEN_STRONG_INLINE
typename std::enable_if<std::is_same<Packet, Packet2Xh>::value && (unpacket_traits<Packet2Xh>::size % 8) == 0,
PacketXh>::type
predux_half_dowto4(const Packet2Xh& a) {
return __riscv_vfadd_vv_f16m1(__riscv_vget_v_f16m2_f16m1(a, 0), __riscv_vget_v_f16m2_f16m1(a, 1),
unpacket_traits<PacketXh>::size);
}
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, pcos)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, pexp)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, pexpm1)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, plog)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, plog1p)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, plog2)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, preciprocal)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, prsqrt)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, psin)
F16_PACKET_FUNCTION(Packet2Xf, PacketXh, ptanh)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, pcos)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, pexp)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, pexpm1)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, plog)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, plog1p)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, plog2)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, preciprocal)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, prsqrt)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, psin)
F16_PACKET_FUNCTION(Packet4Xf, Packet2Xh, ptanh)
/********************************* casting ************************************/
template <>
struct type_casting_traits<_Float16, numext::int16_t> {
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
template <>
struct type_casting_traits<numext::int16_t, _Float16> {
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
template <>
EIGEN_STRONG_INLINE PacketXh pcast<PacketXs, PacketXh>(const PacketXs& a) {
return __riscv_vfcvt_f_x_v_f16m1(a, unpacket_traits<PacketXs>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXs pcast<PacketXh, PacketXs>(const PacketXh& a) {
return __riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size);
}
template <>
EIGEN_STRONG_INLINE PacketXh preinterpret<PacketXh, PacketXs>(const PacketXs& a) {
return __riscv_vreinterpret_v_i16m1_f16m1(a);
}
template <>
EIGEN_STRONG_INLINE PacketXs preinterpret<PacketXs, PacketXh>(const PacketXh& a) {
return __riscv_vreinterpret_v_f16m1_i16m1(a);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pcast<Packet2Xs, Packet2Xh>(const Packet2Xs& a) {
return __riscv_vfcvt_f_x_v_f16m2(a, unpacket_traits<Packet2Xs>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xs pcast<Packet2Xh, Packet2Xs>(const Packet2Xh& a) {
return __riscv_vfcvt_rtz_x_f_v_i16m2(a, unpacket_traits<Packet2Xh>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xh preinterpret<Packet2Xh, Packet2Xs>(const Packet2Xs& a) {
return __riscv_vreinterpret_v_i16m2_f16m2(a);
}
template <>
EIGEN_STRONG_INLINE Packet2Xs preinterpret<Packet2Xs, Packet2Xh>(const Packet2Xh& a) {
return __riscv_vreinterpret_v_f16m2_i16m2(a);
}
template <>
EIGEN_STRONG_INLINE Packet4Xs pcast<PacketXh, Packet4Xs>(const PacketXh& a, const PacketXh& b, const PacketXh& c,
const PacketXh& d) {
return __riscv_vcreate_v_i16m1_i16m4(__riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
__riscv_vfcvt_rtz_x_f_v_i16m1(b, unpacket_traits<PacketXh>::size),
__riscv_vfcvt_rtz_x_f_v_i16m1(c, unpacket_traits<PacketXh>::size),
__riscv_vfcvt_rtz_x_f_v_i16m1(d, unpacket_traits<PacketXh>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pcast<PacketXs, Packet2Xh>(const PacketXs& a, const PacketXs& b) {
return __riscv_vcreate_v_f16m1_f16m2(__riscv_vfcvt_f_x_v_f16m1(a, unpacket_traits<PacketXs>::size),
__riscv_vfcvt_f_x_v_f16m1(b, unpacket_traits<PacketXs>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xh pcast<PacketXh, Packet2Xh>(const PacketXh& a, const PacketXh& b) {
return __riscv_vcreate_v_f16m1_f16m2(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2Xs pcast<PacketXh, Packet2Xs>(const PacketXh& a, const PacketXh& b) {
return __riscv_vcreate_v_i16m1_i16m2(__riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
__riscv_vfcvt_rtz_x_f_v_i16m1(b, unpacket_traits<PacketXh>::size));
}
} // namespace internal
} // namespace Eigen
#endif // EIGEN_PACKET_MATH_FP16_RVV10_H

View File

@@ -0,0 +1,284 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2024 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TYPE_CASTING_RVV10_H
#define EIGEN_TYPE_CASTING_RVV10_H
// IWYU pragma: private
#include "../../InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
/********************************* 32 bits ************************************/
template <>
struct type_casting_traits<float, numext::int32_t> {
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
template <>
struct type_casting_traits<numext::int32_t, float> {
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
template <>
EIGEN_STRONG_INLINE Packet1Xf pcast<Packet1Xi, Packet1Xf>(const Packet1Xi& a) {
return __riscv_vfcvt_f_x_v_f32m1(a, unpacket_traits<Packet1Xi>::size);
}
template <>
EIGEN_STRONG_INLINE Packet1Xi pcast<Packet1Xf, Packet1Xi>(const Packet1Xf& a) {
return __riscv_vfcvt_rtz_x_f_v_i32m1(a, unpacket_traits<Packet1Xf>::size);
}
template <>
EIGEN_STRONG_INLINE Packet1Xf preinterpret<Packet1Xf, Packet1Xi>(const Packet1Xi& a) {
return __riscv_vreinterpret_v_i32m1_f32m1(a);
}
template <>
EIGEN_STRONG_INLINE Packet1Xi preinterpret<Packet1Xi, Packet1Xf>(const Packet1Xf& a) {
return __riscv_vreinterpret_v_f32m1_i32m1(a);
}
template <>
EIGEN_STRONG_INLINE Packet4Xf pcast<Packet4Xi, Packet4Xf>(const Packet4Xi& a) {
return __riscv_vfcvt_f_x_v_f32m4(a, unpacket_traits<Packet4Xi>::size);
}
template <>
EIGEN_STRONG_INLINE Packet4Xi pcast<Packet4Xf, Packet4Xi>(const Packet4Xf& a) {
return __riscv_vfcvt_rtz_x_f_v_i32m4(a, unpacket_traits<Packet4Xf>::size);
}
template <>
EIGEN_STRONG_INLINE Packet4Xf preinterpret<Packet4Xf, Packet4Xi>(const Packet4Xi& a) {
return __riscv_vreinterpret_v_i32m4_f32m4(a);
}
template <>
EIGEN_STRONG_INLINE Packet4Xi preinterpret<Packet4Xi, Packet4Xf>(const Packet4Xf& a) {
return __riscv_vreinterpret_v_f32m4_i32m4(a);
}
template <>
EIGEN_STRONG_INLINE Packet2Xf pcast<Packet2Xi, Packet2Xf>(const Packet2Xi& a) {
return __riscv_vfcvt_f_x_v_f32m2(a, unpacket_traits<Packet2Xi>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xi pcast<Packet2Xf, Packet2Xi>(const Packet2Xf& a) {
return __riscv_vfcvt_rtz_x_f_v_i32m2(a, unpacket_traits<Packet2Xf>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xf preinterpret<Packet2Xf, Packet2Xi>(const Packet2Xi& a) {
return __riscv_vreinterpret_v_i32m2_f32m2(a);
}
template <>
EIGEN_STRONG_INLINE Packet2Xi preinterpret<Packet2Xi, Packet2Xf>(const Packet2Xf& a) {
return __riscv_vreinterpret_v_f32m2_i32m2(a);
}
template <>
EIGEN_STRONG_INLINE Packet4Xi pcast<Packet1Xi, Packet4Xi>(const Packet1Xi& a, const Packet1Xi& b, const Packet1Xi& c,
const Packet1Xi& d) {
return __riscv_vcreate_v_i32m1_i32m4(a, b, c, d);
}
template <>
EIGEN_STRONG_INLINE Packet4Xf pcast<Packet1Xi, Packet4Xf>(const Packet1Xi& a, const Packet1Xi& b, const Packet1Xi& c,
const Packet1Xi& d) {
return __riscv_vcreate_v_f32m1_f32m4(__riscv_vfcvt_f_x_v_f32m1(a, unpacket_traits<Packet1Xi>::size),
__riscv_vfcvt_f_x_v_f32m1(b, unpacket_traits<Packet1Xi>::size),
__riscv_vfcvt_f_x_v_f32m1(c, unpacket_traits<Packet1Xi>::size),
__riscv_vfcvt_f_x_v_f32m1(d, unpacket_traits<Packet1Xi>::size));
}
template <>
EIGEN_STRONG_INLINE Packet4Xf pcast<Packet1Xf, Packet4Xf>(const Packet1Xf& a, const Packet1Xf& b, const Packet1Xf& c,
const Packet1Xf& d) {
return __riscv_vcreate_v_f32m1_f32m4(a, b, c, d);
}
template <>
EIGEN_STRONG_INLINE Packet4Xi pcast<Packet1Xf, Packet4Xi>(const Packet1Xf& a, const Packet1Xf& b, const Packet1Xf& c,
const Packet1Xf& d) {
return __riscv_vcreate_v_i32m1_i32m4(__riscv_vfcvt_rtz_x_f_v_i32m1(a, unpacket_traits<Packet1Xf>::size),
__riscv_vfcvt_rtz_x_f_v_i32m1(b, unpacket_traits<Packet1Xf>::size),
__riscv_vfcvt_rtz_x_f_v_i32m1(c, unpacket_traits<Packet1Xf>::size),
__riscv_vfcvt_rtz_x_f_v_i32m1(d, unpacket_traits<Packet1Xf>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xi pcast<Packet1Xi, Packet2Xi>(const Packet1Xi& a, const Packet1Xi& b) {
return __riscv_vcreate_v_i32m1_i32m2(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2Xf pcast<Packet1Xi, Packet2Xf>(const Packet1Xi& a, const Packet1Xi& b) {
return __riscv_vcreate_v_f32m1_f32m2(__riscv_vfcvt_f_x_v_f32m1(a, unpacket_traits<Packet1Xi>::size),
__riscv_vfcvt_f_x_v_f32m1(b, unpacket_traits<Packet1Xi>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xf pcast<Packet1Xf, Packet2Xf>(const Packet1Xf& a, const Packet1Xf& b) {
return __riscv_vcreate_v_f32m1_f32m2(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2Xi pcast<Packet1Xf, Packet2Xi>(const Packet1Xf& a, const Packet1Xf& b) {
return __riscv_vcreate_v_i32m1_i32m2(__riscv_vfcvt_rtz_x_f_v_i32m1(a, unpacket_traits<Packet1Xf>::size),
__riscv_vfcvt_rtz_x_f_v_i32m1(b, unpacket_traits<Packet1Xf>::size));
}
/********************************* 64 bits ************************************/
template <>
struct type_casting_traits<double, numext::int64_t> {
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
template <>
struct type_casting_traits<numext::int64_t, double> {
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
template <>
EIGEN_STRONG_INLINE Packet1Xd pcast<Packet1Xl, Packet1Xd>(const Packet1Xl& a) {
return __riscv_vfcvt_f_x_v_f64m1(a, unpacket_traits<Packet1Xl>::size);
}
template <>
EIGEN_STRONG_INLINE Packet1Xl pcast<Packet1Xd, Packet1Xl>(const Packet1Xd& a) {
return __riscv_vfcvt_rtz_x_f_v_i64m1(a, unpacket_traits<Packet1Xd>::size);
}
template <>
EIGEN_STRONG_INLINE Packet1Xd preinterpret<Packet1Xd, Packet1Xl>(const Packet1Xl& a) {
return __riscv_vreinterpret_v_i64m1_f64m1(a);
}
template <>
EIGEN_STRONG_INLINE Packet1Xl preinterpret<Packet1Xl, Packet1Xd>(const Packet1Xd& a) {
return __riscv_vreinterpret_v_f64m1_i64m1(a);
}
template <>
EIGEN_STRONG_INLINE Packet4Xd pcast<Packet4Xl, Packet4Xd>(const Packet4Xl& a) {
return __riscv_vfcvt_f_x_v_f64m4(a, unpacket_traits<Packet4Xl>::size);
}
template <>
EIGEN_STRONG_INLINE Packet4Xl pcast<Packet4Xd, Packet4Xl>(const Packet4Xd& a) {
return __riscv_vfcvt_rtz_x_f_v_i64m4(a, unpacket_traits<Packet4Xd>::size);
}
template <>
EIGEN_STRONG_INLINE Packet4Xd preinterpret<Packet4Xd, Packet4Xl>(const Packet4Xl& a) {
return __riscv_vreinterpret_v_i64m4_f64m4(a);
}
template <>
EIGEN_STRONG_INLINE Packet4Xl preinterpret<Packet4Xl, Packet4Xd>(const Packet4Xd& a) {
return __riscv_vreinterpret_v_f64m4_i64m4(a);
}
template <>
EIGEN_STRONG_INLINE Packet2Xd pcast<Packet2Xl, Packet2Xd>(const Packet2Xl& a) {
return __riscv_vfcvt_f_x_v_f64m2(a, unpacket_traits<Packet2Xl>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xl pcast<Packet2Xd, Packet2Xl>(const Packet2Xd& a) {
return __riscv_vfcvt_rtz_x_f_v_i64m2(a, unpacket_traits<Packet2Xd>::size);
}
template <>
EIGEN_STRONG_INLINE Packet2Xd preinterpret<Packet2Xd, Packet2Xl>(const Packet2Xl& a) {
return __riscv_vreinterpret_v_i64m2_f64m2(a);
}
template <>
EIGEN_STRONG_INLINE Packet2Xl preinterpret<Packet2Xl, Packet2Xd>(const Packet2Xd& a) {
return __riscv_vreinterpret_v_f64m2_i64m2(a);
}
template <>
EIGEN_STRONG_INLINE Packet4Xl pcast<Packet1Xl, Packet4Xl>(const Packet1Xl& a, const Packet1Xl& b, const Packet1Xl& c,
const Packet1Xl& d) {
return __riscv_vcreate_v_i64m1_i64m4(a, b, c, d);
;
}
template <>
EIGEN_STRONG_INLINE Packet4Xd pcast<Packet1Xl, Packet4Xd>(const Packet1Xl& a, const Packet1Xl& b, const Packet1Xl& c,
const Packet1Xl& d) {
return __riscv_vcreate_v_f64m1_f64m4(__riscv_vfcvt_f_x_v_f64m1(a, unpacket_traits<Packet1Xl>::size),
__riscv_vfcvt_f_x_v_f64m1(b, unpacket_traits<Packet1Xl>::size),
__riscv_vfcvt_f_x_v_f64m1(c, unpacket_traits<Packet1Xl>::size),
__riscv_vfcvt_f_x_v_f64m1(d, unpacket_traits<Packet1Xl>::size));
}
template <>
EIGEN_STRONG_INLINE Packet4Xd pcast<Packet1Xd, Packet4Xd>(const Packet1Xd& a, const Packet1Xd& b, const Packet1Xd& c,
const Packet1Xd& d) {
return __riscv_vcreate_v_f64m1_f64m4(a, b, c, d);
}
template <>
EIGEN_STRONG_INLINE Packet4Xl pcast<Packet1Xd, Packet4Xl>(const Packet1Xd& a, const Packet1Xd& b, const Packet1Xd& c,
const Packet1Xd& d) {
return __riscv_vcreate_v_i64m1_i64m4(__riscv_vfcvt_rtz_x_f_v_i64m1(a, unpacket_traits<Packet1Xd>::size),
__riscv_vfcvt_rtz_x_f_v_i64m1(b, unpacket_traits<Packet1Xd>::size),
__riscv_vfcvt_rtz_x_f_v_i64m1(c, unpacket_traits<Packet1Xd>::size),
__riscv_vfcvt_rtz_x_f_v_i64m1(d, unpacket_traits<Packet1Xd>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xl pcast<Packet1Xl, Packet2Xl>(const Packet1Xl& a, const Packet1Xl& b) {
return __riscv_vcreate_v_i64m1_i64m2(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2Xd pcast<Packet1Xl, Packet2Xd>(const Packet1Xl& a, const Packet1Xl& b) {
return __riscv_vcreate_v_f64m1_f64m2(__riscv_vfcvt_f_x_v_f64m1(a, unpacket_traits<Packet1Xl>::size),
__riscv_vfcvt_f_x_v_f64m1(b, unpacket_traits<Packet1Xl>::size));
}
template <>
EIGEN_STRONG_INLINE Packet2Xd pcast<Packet1Xd, Packet2Xd>(const Packet1Xd& a, const Packet1Xd& b) {
return __riscv_vcreate_v_f64m1_f64m2(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2Xl pcast<Packet1Xd, Packet2Xl>(const Packet1Xd& a, const Packet1Xd& b) {
return __riscv_vcreate_v_i64m1_i64m2(__riscv_vfcvt_rtz_x_f_v_i64m1(a, unpacket_traits<Packet1Xd>::size),
__riscv_vfcvt_rtz_x_f_v_i64m1(b, unpacket_traits<Packet1Xd>::size));
}
/********************************* 16 bits ************************************/
template <>
EIGEN_STRONG_INLINE Packet2Xs pcast<Packet1Xs, Packet2Xs>(const Packet1Xs& a, const Packet1Xs& b) {
return __riscv_vcreate_v_i16m1_i16m2(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet4Xs pcast<Packet1Xs, Packet4Xs>(const Packet1Xs& a, const Packet1Xs& b, const Packet1Xs& c,
const Packet1Xs& d) {
return __riscv_vcreate_v_i16m1_i16m4(a, b, c, d);
}
} // namespace internal
} // namespace Eigen
#endif // EIGEN_TYPE_CASTING_RVV10_H

View File

@@ -80,6 +80,8 @@
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 128
#elif defined(EIGEN_RISCV64_USE_RVV10)
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
#else
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
#endif
@@ -116,7 +118,7 @@
// Only static alignment is really problematic (relies on nonstandard compiler extensions),
// try to keep heap alignment even when we have to disable static alignment.
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || \
EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64)
EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64 || EIGEN_ARCH_RISCV)
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
@@ -418,14 +420,55 @@ extern "C" {
#define EIGEN_VECTORIZE_SVE
#include <arm_sve.h>
// Since we depend on knowing SVE vector lengths at compile-time, we need
// to ensure a fixed lengths is set
// Since we depend on knowing SVE vector length at compile-time, we need
// to ensure a fixed length is set
#if defined __ARM_FEATURE_SVE_BITS
#define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
#else
#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
#endif
#elif defined(EIGEN_ARCH_RISCV)
#if defined(__riscv_zfh)
#define EIGEN_HAS_BUILTIN_FLOAT16
#endif
// We currently require RVV to be enabled explicitly via EIGEN_RISCV64_USE_RVV and
// will not select the backend automatically
#if (defined EIGEN_RISCV64_USE_RVV10)
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_RVV10
#include <riscv_vector.h>
// Since we depend on knowing RVV vector length at compile-time, we need
// to ensure a fixed length is set
#if defined(__riscv_v_fixed_vlen)
#define EIGEN_RISCV64_RVV_VL __riscv_v_fixed_vlen
#if __riscv_v_fixed_vlen >= 256
#undef EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#endif
#else
#error "Eigen requires a fixed RVV vector length but -mrvv-vector-bits=zvl is not set."
#endif
#undef EIGEN_STACK_ALLOCATION_LIMIT
#define EIGEN_STACK_ALLOCATION_LIMIT 196608
#if defined(__riscv_zvfh) && defined(__riscv_zfh)
#define EIGEN_VECTORIZE_RVV10FP16
#elif defined(__riscv_zvfh)
#if defined(__GNUC__) || defined(__clang__)
#warning "The Eigen::Half vectorization requires Zfh and Zvfh extensions."
#elif defined(_MSC_VER)
#pragma message("The Eigen::Half vectorization requires Zfh and Zvfh extensions.")
#endif
#endif
#endif // defined(EIGEN_ARCH_RISCV)
#elif (defined __s390x__ && defined __VEC__)
#define EIGEN_VECTORIZE
@@ -510,6 +553,13 @@ extern "C" {
#include <hip/hip_bfloat16.h>
#endif
#if defined(__riscv)
// Defines the default LMUL for RISC-V
#ifndef EIGEN_RISCV64_DEFAULT_LMUL
#define EIGEN_RISCV64_DEFAULT_LMUL 1
#endif
#endif
/** \brief Namespace containing all symbols from the %Eigen library. */
// IWYU pragma: private
#include "../InternalHeaderCheck.h"

View File

@@ -475,6 +475,7 @@ enum Type {
SVE = 0x6,
HVX = 0x7,
LSX = 0x8,
RVV10 = 0x9,
#if defined EIGEN_VECTORIZE_SSE
Target = SSE
#elif defined EIGEN_VECTORIZE_ALTIVEC
@@ -491,6 +492,8 @@ enum Type {
Target = HVX
#elif defined EIGEN_VECTORIZE_LSX
Target = LSX
#elif defined EIGEN_VECTORIZE_RVV10
Target = RVV10
#else
Target = Generic
#endif

View File

@@ -420,6 +420,13 @@
#define EIGEN_ARCH_PPC 0
#endif
/// \internal EIGEN_ARCH_RISCV set to 1 if the architecture is RISC-V.
#if defined(__riscv)
#define EIGEN_ARCH_RISCV 1
#else
#define EIGEN_ARCH_RISCV 0
#endif
//------------------------------------------------------------------------------------------
// Operating system identification, EIGEN_OS_*
//------------------------------------------------------------------------------------------
@@ -1023,7 +1030,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void ignore_unused_variable(cons
#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
#if !defined(EIGEN_ASM_COMMENT)
#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_RISCV)
#define EIGEN_ASM_COMMENT(X) __asm__("#" X)
#else
#define EIGEN_ASM_COMMENT(X)

View File

@@ -305,7 +305,7 @@ struct apply_rotation_in_the_plane_selector<Scalar, OtherScalar, SizeAtCompileTi
typedef typename packet_traits<OtherScalar>::type OtherPacket;
constexpr int RequiredAlignment =
(std::max)(unpacket_traits<Packet>::alignment, unpacket_traits<OtherPacket>::alignment);
(std::max<int>)(unpacket_traits<Packet>::alignment, unpacket_traits<OtherPacket>::alignment);
constexpr Index PacketSize = packet_traits<Scalar>::size;
/*** dynamic-size vectorized paths ***/