Commit 2d75c5e383 for openssl.org

commit 2d75c5e383194ec2d0e2306232bdb38c3b343c50
Author: zhoulu <zhou.lu1@zte.com.cn>
Date:   Fri Dec 19 10:08:02 2025 +0800

    SM4-CBC performance improvement on RISC-V

    Modify the IV update method to further improve the performance of
    SM4-CBC encryption on the RISC-V architecture.

    Reviewed-by: Paul Dale <paul.dale@oracle.com>
    Reviewed-by: Neil Horman <nhorman@openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/29451)

diff --git a/crypto/perlasm/riscv.pm b/crypto/perlasm/riscv.pm
index e5f543a3c3..5d62f3a660 100644
--- a/crypto/perlasm/riscv.pm
+++ b/crypto/perlasm/riscv.pm
@@ -468,6 +468,16 @@ sub vadd_vv {
     return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
 }

+sub vrgather_vv {
+    # vrgather.vv vd, vs2, vs1, vm
+    my $template = 0b001100_0_00000_00000_000_00000_1010111;
+    my $vd = read_vreg shift;
+    my $vs2 = read_vreg shift;
+    my $vs1 = read_vreg shift;
+    my $vm = read_mask_vreg shift;
+    return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
+}
+
 sub vadd_vx {
     # vadd.vx vd, vs2, rs1, vm
     my $template = 0b000000_0_00000_00000_100_00000_1010111;
diff --git a/crypto/sm4/asm/sm4-riscv64-zvksed.pl b/crypto/sm4/asm/sm4-riscv64-zvksed.pl
index 66fd127aed..c97095ed52 100644
--- a/crypto/sm4/asm/sm4-riscv64-zvksed.pl
+++ b/crypto/sm4/asm/sm4-riscv64-zvksed.pl
@@ -236,8 +236,14 @@ my ($in,$out,$len,$keys,$ivp)=("a0","a1","a2","a3","a4");
 my ($tmp,$base)=("t0","t2");
 my ($vdata0,$vdata1,$vdata2,$vdata3,$vdata4,$vdata5,$vdata6,$vdata7)=("v1","v2","v3","v4","v5","v6","v7","v24");
 my ($vivec)=("v8");
+my ($vindex)=("v0");

 $code .= <<___;
+.section .rodata
+.align 4
+.Lreverse_index:
+    .word 3, 2, 1, 0
+.text
 .p2align 3
 .globl rv64i_zvksed_sm4_cbc_encrypt
 .type rv64i_zvksed_sm4_cbc_encrypt,\@function
@@ -254,6 +260,10 @@ rv64i_zvksed_sm4_cbc_encrypt:

     # Load IV
     @{[vle32_v $vivec, $ivp]}
+
+    # Load the reverse index (for IV updates)
+    la $tmp, .Lreverse_index
+    @{[vle32_v $vindex, $tmp]}
 # =====================================================
 # If data length ≥ 64 bytes, process 4 blocks in batch:
 # 4-block CBC encryption pipeline:
@@ -285,12 +295,8 @@ rv64i_zvksed_sm4_cbc_encrypt:
     @{[enc_blk $vdata0]}
     @{[vrev8_v $vdata0, $vdata0]}

-    # Save the ciphertext (in reverse element order)
-    li $tmp_stride, $STRIDE
-    @{[reverse_order_S $vdata0, $out]}
     #Update IV to ciphertext block 0
-    @{[vle32_v $vivec, $out]}
-    addi $out, $out, $BLOCK_SIZE
+    @{[vrgather_vv $vivec, $vdata0, $vindex]}

     @{[vxor_vv $vdata1, $vdata1, $vivec]}

@@ -298,11 +304,8 @@ rv64i_zvksed_sm4_cbc_encrypt:
     @{[enc_blk $vdata1]}
     @{[vrev8_v $vdata1, $vdata1]}

-    @{[reverse_order_S $vdata1, $out]}
-
     #Update IV to ciphertext block 1
-    @{[vle32_v $vivec, $out]}
-    addi $out, $out, $BLOCK_SIZE
+    @{[vrgather_vv $vivec, $vdata1, $vindex]}

     @{[vxor_vv $vdata2, $vdata2, $vivec]}

@@ -310,10 +313,8 @@ rv64i_zvksed_sm4_cbc_encrypt:
     @{[enc_blk $vdata2]}
     @{[vrev8_v $vdata2, $vdata2]}

-    @{[reverse_order_S $vdata2, $out]}
     #Update IV to ciphertext block 2
-    @{[vle32_v $vivec, $out]}
-    addi $out, $out, $BLOCK_SIZE
+    @{[vrgather_vv $vivec, $vdata2, $vindex]}

     @{[vxor_vv $vdata3, $vdata3, $vivec]}

@@ -321,9 +322,18 @@ rv64i_zvksed_sm4_cbc_encrypt:
     @{[enc_blk $vdata3]}
     @{[vrev8_v $vdata3, $vdata3]}

-    @{[reverse_order_S $vdata3, $out]}
     #Update IV to ciphertext block 3
-    @{[vle32_v $vivec, $out]}
+    @{[vrgather_vv $vivec, $vdata3, $vindex]}
+
+    # Save the ciphertext (in reverse element order)
+    li $tmp_stride, $STRIDE
+    @{[reverse_order_S $vdata0, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata1, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata2, $out]}
+    addi $out, $out, $BLOCK_SIZE
+    @{[reverse_order_S $vdata3, $out]}
     addi $out, $out, $BLOCK_SIZE

     addi $len, $len, -$FOUR_BLOCKS
@@ -344,12 +354,12 @@ rv64i_zvksed_sm4_cbc_encrypt:
     @{[enc_blk $vdata0]}
     @{[vrev8_v $vdata0, $vdata0]}

+    # Update IV to ciphertext block 0
+    @{[vrgather_vv $vivec, $vdata0, $vindex]}
+
     # Save the ciphertext (in reverse element order)
     li $tmp_stride, $STRIDE
     @{[reverse_order_S $vdata0, $out]}
-
-    # Update IV to ciphertext block 0
-    @{[vle32_v $vivec, $out]}
     addi $out, $out, $BLOCK_SIZE
     addi $len, $len, -$BLOCK_SIZE