mirror of
https://github.com/openssl/openssl.git
synced 2026-01-18 17:11:31 +01:00
crypto/poly1305: Add SVE2 vector-length agnostic implementation.
Implement Poly1305 using SVE2 VLA instructions for AArch64. This implementation is selected at runtime if SVE2 is present and the vector length is 256, 512, 1024 or 2048 bits. Reviewed-by: Tom Cosgrove <tom.cosgrove@arm.com> Reviewed-by: Paul Dale <paul.dale@oracle.com> (Merged from https://github.com/openssl/openssl/pull/28454)
This commit is contained in:
@@ -120,6 +120,14 @@ _armv8_sve2_probe:
|
||||
ret
|
||||
.size _armv8_sve2_probe,.-_armv8_sve2_probe
|
||||
|
||||
.globl _armv8_sve_get_vl_bytes
|
||||
.type _armv8_sve_get_vl_bytes,%function
|
||||
_armv8_sve_get_vl_bytes:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
.inst 0x0420e3e0 // cntb x0
|
||||
ret
|
||||
.size _armv8_sve_get_vl_bytes,.-_armv8_sve_get_vl_bytes
|
||||
|
||||
.globl _armv8_cpuid_probe
|
||||
.type _armv8_cpuid_probe,%function
|
||||
_armv8_cpuid_probe:
|
||||
|
||||
@@ -86,9 +86,10 @@ extern unsigned int OPENSSL_armv8_rsa_neonized;
|
||||
# define ARMV8_SHA3 (1<<11)
|
||||
# define ARMV8_UNROLL8_EOR3 (1<<12)
|
||||
# define ARMV8_SVE (1<<13)
|
||||
# define ARMV8_SVE2 (1<<14)
|
||||
# define ARMV9_SVE2 (1<<14)
|
||||
# define ARMV8_HAVE_SHA3_AND_WORTH_USING (1<<15)
|
||||
# define ARMV8_UNROLL12_EOR3 (1<<16)
|
||||
# define ARMV9_SVE2_POLY1305 (1<<17)
|
||||
|
||||
/*
|
||||
* MIDR_EL1 system register
|
||||
|
||||
@@ -24,11 +24,18 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include "arm_arch.h"
|
||||
#ifdef __aarch64__
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
unsigned int OPENSSL_armcap_P = 0;
|
||||
unsigned int OPENSSL_arm_midr = 0;
|
||||
unsigned int OPENSSL_armv8_rsa_neonized = 0;
|
||||
|
||||
#ifdef __aarch64__
|
||||
uint64_t _armv8_sve_get_vl_bytes(void);
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
void OPENSSL_cpuid_setup(void)
|
||||
{
|
||||
@@ -346,7 +353,7 @@ void OPENSSL_cpuid_setup(void)
|
||||
OPENSSL_armcap_P |= ARMV8_SVE;
|
||||
|
||||
if (getauxval(OSSL_HWCAP2) & OSSL_HWCAP2_SVE2)
|
||||
OPENSSL_armcap_P |= ARMV8_SVE2;
|
||||
OPENSSL_armcap_P |= ARMV9_SVE2;
|
||||
|
||||
if (getauxval(OSSL_HWCAP2) & OSSL_HWCAP2_RNG)
|
||||
OPENSSL_armcap_P |= ARMV8_RNG;
|
||||
@@ -391,7 +398,7 @@ void OPENSSL_cpuid_setup(void)
|
||||
}
|
||||
# ifdef __aarch64__
|
||||
OPENSSL_armcap_P |= arm_probe_for(_armv8_sve_probe, ARMV8_SVE);
|
||||
OPENSSL_armcap_P |= arm_probe_for(_armv8_sve2_probe, ARMV8_SVE2);
|
||||
OPENSSL_armcap_P |= arm_probe_for(_armv8_sve2_probe, ARMV9_SVE2);
|
||||
OPENSSL_armcap_P |= arm_probe_for(_armv8_rng_probe, ARMV8_RNG);
|
||||
# endif
|
||||
|
||||
@@ -450,6 +457,17 @@ void OPENSSL_cpuid_setup(void)
|
||||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_QCOMM, QCOM_CPU_PART_ORYON_X1)) &&
|
||||
(OPENSSL_armcap_P & ARMV8_SHA3))
|
||||
OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
|
||||
if (OPENSSL_armcap_P & ARMV9_SVE2) {
|
||||
uint64_t vl_bytes = _armv8_sve_get_vl_bytes();
|
||||
|
||||
if (vl_bytes > 16 && (vl_bytes & (vl_bytes - 1)) == 0) {
|
||||
/*
|
||||
* This implementation faster if vector length > 128 bits
|
||||
* But vector length must be a power of 2 (e.g. 256, 512 bits)
|
||||
*/
|
||||
OPENSSL_armcap_P |= ARMV9_SVE2_POLY1305;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
}
|
||||
#endif /* _WIN32, __ARM_MAX_ARCH__ >= 7 */
|
||||
|
||||
@@ -756,7 +756,7 @@ ChaCha20_ctr32_sve:
|
||||
mov $sve2flag,0
|
||||
adrp $tmp,OPENSSL_armcap_P
|
||||
ldr $tmpw,[$tmp,#:lo12:OPENSSL_armcap_P]
|
||||
tst $tmpw,#ARMV8_SVE2
|
||||
tst $tmpw,#ARMV9_SVE2
|
||||
b.eq 1f
|
||||
mov $sve2flag,1
|
||||
b 2f
|
||||
|
||||
@@ -69,6 +69,8 @@ $code.=<<___;
|
||||
.globl poly1305_emit
|
||||
.hidden poly1305_emit
|
||||
|
||||
.extern poly1305_blocks_sve2
|
||||
|
||||
.type poly1305_init,%function
|
||||
.align 5
|
||||
poly1305_init:
|
||||
@@ -109,6 +111,13 @@ poly1305_init:
|
||||
csel $d0,$d0,$r0,eq
|
||||
csel $d1,$d1,$r1,eq
|
||||
|
||||
tst w17, #ARMV9_SVE2_POLY1305
|
||||
|
||||
adrp $r0,poly1305_blocks_sve2
|
||||
add $r0,$r0,#:lo12:poly1305_blocks_sve2
|
||||
|
||||
csel $d0,$d0,$r0,eq
|
||||
|
||||
#ifdef __ILP32__
|
||||
stp w12,w13,[$len]
|
||||
#else
|
||||
|
||||
1420
crypto/poly1305/asm/poly1305-armv9-sve2.pl
Executable file
1420
crypto/poly1305/asm/poly1305-armv9-sve2.pl
Executable file
File diff suppressed because it is too large
Load Diff
@@ -14,7 +14,7 @@ IF[{- !$disabled{asm} -}]
|
||||
$POLY1305ASM_s390x=poly1305-s390x.S
|
||||
|
||||
$POLY1305ASM_armv4=poly1305-armv4.S
|
||||
$POLY1305ASM_aarch64=poly1305-armv8.S
|
||||
$POLY1305ASM_aarch64=poly1305-armv8.S poly1305-armv9-sve2.S
|
||||
|
||||
$POLY1305ASM_ppc32=poly1305_ppc.c poly1305-ppc.s poly1305-ppcfp.s
|
||||
$POLY1305ASM_ppc64=$POLY1305ASM_ppc32
|
||||
@@ -45,7 +45,9 @@ GENERATE[poly1305-ppcfp.s]=asm/poly1305-ppcfp.pl
|
||||
GENERATE[poly1305-armv4.S]=asm/poly1305-armv4.pl
|
||||
INCLUDE[poly1305-armv4.o]=..
|
||||
GENERATE[poly1305-armv8.S]=asm/poly1305-armv8.pl
|
||||
GENERATE[poly1305-armv9-sve2.S]=asm/poly1305-armv9-sve2.pl
|
||||
INCLUDE[poly1305-armv8.o]=..
|
||||
INCLUDE[poly1305-armv9-sve2.o]=..
|
||||
GENERATE[poly1305-mips.S]=asm/poly1305-mips.pl
|
||||
INCLUDE[poly1305-mips.o]=..
|
||||
GENERATE[poly1305-c64xplus.S]=asm/poly1305-c64xplus.pl
|
||||
|
||||
Reference in New Issue
Block a user