Commit 2d75c5e383 for openssl.org
commit 2d75c5e383194ec2d0e2306232bdb38c3b343c50
Author: zhoulu <zhou.lu1@zte.com.cn>
Date: Fri Dec 19 10:08:02 2025 +0800
SM4-CBC performance improvement on RISC-V
Modify the IV update method to further improve the performance of
SM4-CBC encryption on the RISC-V architecture.
Reviewed-by: Paul Dale <paul.dale@oracle.com>
Reviewed-by: Neil Horman <nhorman@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/29451)
diff --git a/crypto/perlasm/riscv.pm b/crypto/perlasm/riscv.pm
index e5f543a3c3..5d62f3a660 100644
--- a/crypto/perlasm/riscv.pm
+++ b/crypto/perlasm/riscv.pm
@@ -468,6 +468,16 @@ sub vadd_vv {
return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
}
+sub vrgather_vv {
+ # vrgather.vv vd, vs2, vs1, vm
+ my $template = 0b001100_0_00000_00000_000_00000_1010111;
+ my $vd = read_vreg shift;
+ my $vs2 = read_vreg shift;
+ my $vs1 = read_vreg shift;
+ my $vm = read_mask_vreg shift;
+ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
+}
+
sub vadd_vx {
# vadd.vx vd, vs2, rs1, vm
my $template = 0b000000_0_00000_00000_100_00000_1010111;
diff --git a/crypto/sm4/asm/sm4-riscv64-zvksed.pl b/crypto/sm4/asm/sm4-riscv64-zvksed.pl
index 66fd127aed..c97095ed52 100644
--- a/crypto/sm4/asm/sm4-riscv64-zvksed.pl
+++ b/crypto/sm4/asm/sm4-riscv64-zvksed.pl
@@ -236,8 +236,14 @@ my ($in,$out,$len,$keys,$ivp)=("a0","a1","a2","a3","a4");
my ($tmp,$base)=("t0","t2");
my ($vdata0,$vdata1,$vdata2,$vdata3,$vdata4,$vdata5,$vdata6,$vdata7)=("v1","v2","v3","v4","v5","v6","v7","v24");
my ($vivec)=("v8");
+my ($vindex)=("v0");
$code .= <<___;
+.section .rodata
+.align 4
+.Lreverse_index:
+ .word 3, 2, 1, 0
+.text
.p2align 3
.globl rv64i_zvksed_sm4_cbc_encrypt
.type rv64i_zvksed_sm4_cbc_encrypt,\@function
@@ -254,6 +260,10 @@ rv64i_zvksed_sm4_cbc_encrypt:
# Load IV
@{[vle32_v $vivec, $ivp]}
+
+ # Load the reverse index (for IV updates)
+ la $tmp, .Lreverse_index
+ @{[vle32_v $vindex, $tmp]}
# =====================================================
# If data length ≥ 64 bytes, process 4 blocks in batch:
# 4-block CBC encryption pipeline:
@@ -285,12 +295,8 @@ rv64i_zvksed_sm4_cbc_encrypt:
@{[enc_blk $vdata0]}
@{[vrev8_v $vdata0, $vdata0]}
- # Save the ciphertext (in reverse element order)
- li $tmp_stride, $STRIDE
- @{[reverse_order_S $vdata0, $out]}
#Update IV to ciphertext block 0
- @{[vle32_v $vivec, $out]}
- addi $out, $out, $BLOCK_SIZE
+ @{[vrgather_vv $vivec, $vdata0, $vindex]}
@{[vxor_vv $vdata1, $vdata1, $vivec]}
@@ -298,11 +304,8 @@ rv64i_zvksed_sm4_cbc_encrypt:
@{[enc_blk $vdata1]}
@{[vrev8_v $vdata1, $vdata1]}
- @{[reverse_order_S $vdata1, $out]}
-
#Update IV to ciphertext block 1
- @{[vle32_v $vivec, $out]}
- addi $out, $out, $BLOCK_SIZE
+ @{[vrgather_vv $vivec, $vdata1, $vindex]}
@{[vxor_vv $vdata2, $vdata2, $vivec]}
@@ -310,10 +313,8 @@ rv64i_zvksed_sm4_cbc_encrypt:
@{[enc_blk $vdata2]}
@{[vrev8_v $vdata2, $vdata2]}
- @{[reverse_order_S $vdata2, $out]}
#Update IV to ciphertext block 2
- @{[vle32_v $vivec, $out]}
- addi $out, $out, $BLOCK_SIZE
+ @{[vrgather_vv $vivec, $vdata2, $vindex]}
@{[vxor_vv $vdata3, $vdata3, $vivec]}
@@ -321,9 +322,18 @@ rv64i_zvksed_sm4_cbc_encrypt:
@{[enc_blk $vdata3]}
@{[vrev8_v $vdata3, $vdata3]}
- @{[reverse_order_S $vdata3, $out]}
#Update IV to ciphertext block 3
- @{[vle32_v $vivec, $out]}
+ @{[vrgather_vv $vivec, $vdata3, $vindex]}
+
+ # Save the ciphertext (in reverse element order)
+ li $tmp_stride, $STRIDE
+ @{[reverse_order_S $vdata0, $out]}
+ addi $out, $out, $BLOCK_SIZE
+ @{[reverse_order_S $vdata1, $out]}
+ addi $out, $out, $BLOCK_SIZE
+ @{[reverse_order_S $vdata2, $out]}
+ addi $out, $out, $BLOCK_SIZE
+ @{[reverse_order_S $vdata3, $out]}
addi $out, $out, $BLOCK_SIZE
addi $len, $len, -$FOUR_BLOCKS
@@ -344,12 +354,12 @@ rv64i_zvksed_sm4_cbc_encrypt:
@{[enc_blk $vdata0]}
@{[vrev8_v $vdata0, $vdata0]}
+ # Update IV to ciphertext block 0
+ @{[vrgather_vv $vivec, $vdata0, $vindex]}
+
# Save the ciphertext (in reverse element order)
li $tmp_stride, $STRIDE
@{[reverse_order_S $vdata0, $out]}
-
- # Update IV to ciphertext block 0
- @{[vle32_v $vivec, $out]}
addi $out, $out, $BLOCK_SIZE
addi $len, $len, -$BLOCK_SIZE