From f1482a709d5e7c407948f50b155c8c9b086e6566 Mon Sep 17 00:00:00 2001 From: zhoulu Date: Mon, 5 Jan 2026 19:48:56 +0800 Subject: [PATCH] Instruction reordering to further improve SM4-CBC decryption performance on the RISC-V architecture Reviewed-by: Neil Horman Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/29544) --- crypto/sm4/asm/sm4-riscv64-zvksed.pl | 47 ++++++++++++++-------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/crypto/sm4/asm/sm4-riscv64-zvksed.pl b/crypto/sm4/asm/sm4-riscv64-zvksed.pl index c97095ed52..67cca8877a 100644 --- a/crypto/sm4/asm/sm4-riscv64-zvksed.pl +++ b/crypto/sm4/asm/sm4-riscv64-zvksed.pl @@ -451,56 +451,56 @@ rv64i_zvksed_sm4_cbc_decrypt: addi $base, $in, -128 @{[reverse_order_L $vivec, $base]} - # Save the plaintext (in reverse element order) - @{[reverse_order_S $vdata0, $out]} - addi $out, $out, $BLOCK_SIZE - @{[vxor_vv $vdata1, $vdata1, $vivec]} addi $base, $in, -112 @{[reverse_order_L $vivec, $base]} - @{[reverse_order_S $vdata1, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata2, $vdata2, $vivec]} addi $base, $in, -96 @{[reverse_order_L $vivec, $base]} - @{[reverse_order_S $vdata2, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata3, $vdata3, $vivec]} addi $base, $in, -80 @{[reverse_order_L $vivec, $base]} - @{[reverse_order_S $vdata3, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata4, $vdata4, $vivec]} addi $base, $in, -64 @{[reverse_order_L $vivec, $base]} - @{[reverse_order_S $vdata4, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata5, $vdata5, $vivec]} addi $base, $in, -48 @{[reverse_order_L $vivec, $base]} - @{[reverse_order_S $vdata5, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata6, $vdata6, $vivec]} addi $base, $in, -32 @{[reverse_order_L $vivec, $base]} - @{[reverse_order_S $vdata6, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata7, $vdata7, $vivec]} addi $base, $in, -16 @{[reverse_order_L $vivec, $base]} + + # Save the plaintext (in reverse element order) + @{[reverse_order_S $vdata0, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata1, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata2, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata3, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata4, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata5, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata6, $out]} + addi $out, $out, $BLOCK_SIZE @{[reverse_order_S $vdata7, $out]} addi $out, $out, $BLOCK_SIZE @@ -548,28 +548,29 @@ rv64i_zvksed_sm4_cbc_decrypt: # Update ciphertext to IV (in reverse element order) addi $base, $in, -64 @{[reverse_order_L $vivec, $base]} - # Save the plaintext (in reverse element order) - @{[reverse_order_S $vdata0, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata1, $vdata1, $vivec]} addi $base, $in, -48 @{[reverse_order_L $vivec, $base]} - @{[reverse_order_S $vdata1, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata2, $vdata2, $vivec]} addi $base, $in, -32 @{[reverse_order_L $vivec, $base]} - @{[reverse_order_S $vdata2, $out]} - addi $out, $out, $BLOCK_SIZE @{[vxor_vv $vdata3, $vdata3, $vivec]} addi $base, $in, -16 @{[reverse_order_L $vivec, $base]} + + # Save the plaintext (in reverse element order) + @{[reverse_order_S $vdata0, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata1, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata2, $out]} + addi $out, $out, $BLOCK_SIZE @{[reverse_order_S $vdata3, $out]} addi $out, $out, $BLOCK_SIZE