Commit f1482a709d for openssl.org

commit f1482a709d5e7c407948f50b155c8c9b086e6566
Author: zhoulu <zhou.lu1@zte.com.cn>
Date:   Mon Jan 5 19:48:56 2026 +0800

    Instruction reordering to further improve SM4-CBC decryption performance on the RISC-V architecture

    Reviewed-by: Neil Horman <nhorman@openssl.org>
    Reviewed-by: Paul Dale <paul.dale@oracle.com>
    (Merged from https://github.com/openssl/openssl/pull/29544)

diff --git a/crypto/sm4/asm/sm4-riscv64-zvksed.pl b/crypto/sm4/asm/sm4-riscv64-zvksed.pl
index c97095ed52..67cca8877a 100644
--- a/crypto/sm4/asm/sm4-riscv64-zvksed.pl
+++ b/crypto/sm4/asm/sm4-riscv64-zvksed.pl
@@ -451,56 +451,56 @@ rv64i_zvksed_sm4_cbc_decrypt:
     addi $base, $in, -128
     @{[reverse_order_L $vivec, $base]}

-    # Save the plaintext (in reverse element order)
-    @{[reverse_order_S $vdata0, $out]}
-    addi $out, $out, $BLOCK_SIZE
-
     @{[vxor_vv $vdata1, $vdata1, $vivec]}

     addi $base, $in, -112
     @{[reverse_order_L $vivec, $base]}
-    @{[reverse_order_S $vdata1, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata2, $vdata2, $vivec]}

     addi $base, $in, -96
     @{[reverse_order_L $vivec, $base]}
-    @{[reverse_order_S $vdata2, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata3, $vdata3, $vivec]}

     addi $base, $in, -80
     @{[reverse_order_L $vivec, $base]}
-    @{[reverse_order_S $vdata3, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata4, $vdata4, $vivec]}

     addi $base, $in, -64
     @{[reverse_order_L $vivec, $base]}
-    @{[reverse_order_S $vdata4, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata5, $vdata5, $vivec]}

     addi $base, $in, -48
     @{[reverse_order_L $vivec, $base]}
-    @{[reverse_order_S $vdata5, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata6, $vdata6, $vivec]}

     addi $base, $in, -32
     @{[reverse_order_L $vivec, $base]}
-    @{[reverse_order_S $vdata6, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata7, $vdata7, $vivec]}

     addi $base, $in, -16
     @{[reverse_order_L $vivec, $base]}
+
+    # Save the plaintext (in reverse element order)
+    @{[reverse_order_S $vdata0, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata1, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata2, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata3, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata4, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata5, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata6, $out]}
+    addi $out, $out, $BLOCK_SIZE
     @{[reverse_order_S $vdata7, $out]}
     addi $out, $out, $BLOCK_SIZE

@@ -548,28 +548,29 @@ rv64i_zvksed_sm4_cbc_decrypt:
     # Update ciphertext to IV (in reverse element order)
     addi $base, $in, -64
     @{[reverse_order_L $vivec, $base]}
-    # Save the plaintext (in reverse element order)
-    @{[reverse_order_S $vdata0, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata1, $vdata1, $vivec]}

     addi $base, $in, -48
     @{[reverse_order_L $vivec, $base]}
-    @{[reverse_order_S $vdata1, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata2, $vdata2, $vivec]}

     addi $base, $in, -32
     @{[reverse_order_L $vivec, $base]}
-    @{[reverse_order_S $vdata2, $out]}
-    addi $out, $out, $BLOCK_SIZE

     @{[vxor_vv $vdata3, $vdata3, $vivec]}

     addi $base, $in, -16
     @{[reverse_order_L $vivec, $base]}
+
+    # Save the plaintext (in reverse element order)
+    @{[reverse_order_S $vdata0, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata1, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata2, $out]}
+    addi $out, $out, $BLOCK_SIZE
     @{[reverse_order_S $vdata3, $out]}
     addi $out, $out, $BLOCK_SIZE