Commit c90b7dddf2 for openssl.org
commit c90b7dddf25f44c3ed27a69ae6bc67cfdb4894cd
Author: zhoulu <zhou.lu1@zte.com.cn>
Date: Tue Dec 2 16:34:51 2025 +0800
Performance Optimization of SM4-CBC Encryption and Decryption with Assembly on RISC-V Architecture
Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <paul.dale@oracle.com>
(Merged from https://github.com/openssl/openssl/pull/29137)
diff --git a/crypto/sm4/asm/sm4-riscv64-zvksed.pl b/crypto/sm4/asm/sm4-riscv64-zvksed.pl
index ad8585efc7..66fd127aed 100644
--- a/crypto/sm4/asm/sm4-riscv64-zvksed.pl
+++ b/crypto/sm4/asm/sm4-riscv64-zvksed.pl
@@ -59,6 +59,548 @@ my $code=<<___;
.text
___
+my $BLOCK_SIZE = 16;
+my $STRIDE = -4; # Used for reversing word order
+my $FOUR_BLOCKS = 64;
+my $EIGHT_BLOCKS = 128;
+my ($vk0,$vk1,$vk2,$vk3,$vk4,$vk5,$vk6,$vk7)=("v16","v17","v18","v19","v20","v21","v22","v23");
+my ($tmp_stride,$tmp_base)=("t1","t2");
+# Loading with word order reversed
+sub reverse_order_L {
+ my $vreg = shift;
+ my $base_reg = shift;
+
+ return <<___;
+ addi $tmp_base, $base_reg, 12
+ @{[vlse32_v $vreg, $tmp_base, $tmp_stride]}
+___
+}
+
+# Storing with word order reversed
+sub reverse_order_S {
+ my $vreg = shift;
+ my $base_reg = shift;
+
+ return <<___;
+ addi $tmp_base, $base_reg, 12
+ @{[vsse32_v $vreg, $tmp_base, $tmp_stride]}
+___
+}
+
+# Load 32 round keys
+sub enc_load_key {
+ my $keys = shift;
+
+ my $code=<<___;
+ # Order of elements was adjusted in set_encrypt_key()
+ @{[vle32_v $vk0, $keys]} # rk[0:3]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk1, $keys]} # rk[4:7]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk2, $keys]} # rk[8:11]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk3, $keys]} # rk[12:15]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk4, $keys]} # rk[16:19]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk5, $keys]} # rk[20:23]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk6, $keys]} # rk[24:27]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk7, $keys]} # rk[28:31]
+___
+
+ return $code;
+}
+
+sub dec_load_key {
+ my $keys = shift;
+
+ my $code=<<___;
+ # Order of elements was adjusted in set_decrypt_key()
+ @{[vle32_v $vk7, $keys]} # rk[31:28]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk6, $keys]} # rk[27:24]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk5, $keys]} # rk[23:20]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk4, $keys]} # rk[19:16]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk3, $keys]} # rk[15:12]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk2, $keys]} # rk[11:8]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk1, $keys]} # rk[7:4]
+ addi $keys, $keys, $BLOCK_SIZE
+ @{[vle32_v $vk0, $keys]} # rk[3:0]
+___
+
+ return $code;
+}
+
+# Encrypt with all keys
+sub enc_blk {
+ my $data = shift;
+
+ my $code=<<___;
+ @{[vsm4r_vs $data, $vk0]}
+ @{[vsm4r_vs $data, $vk1]}
+ @{[vsm4r_vs $data, $vk2]}
+ @{[vsm4r_vs $data, $vk3]}
+ @{[vsm4r_vs $data, $vk4]}
+ @{[vsm4r_vs $data, $vk5]}
+ @{[vsm4r_vs $data, $vk6]}
+ @{[vsm4r_vs $data, $vk7]}
+___
+
+ return $code;
+}
+
+# Decrypt with all keys
+sub dec_blk {
+ my $data = shift;
+
+ my $code=<<___;
+ @{[vsm4r_vs $data, $vk7]}
+ @{[vsm4r_vs $data, $vk6]}
+ @{[vsm4r_vs $data, $vk5]}
+ @{[vsm4r_vs $data, $vk4]}
+ @{[vsm4r_vs $data, $vk3]}
+ @{[vsm4r_vs $data, $vk2]}
+ @{[vsm4r_vs $data, $vk1]}
+ @{[vsm4r_vs $data, $vk0]}
+___
+
+ return $code;
+}
+
+# Decrypt 4 blocks with all keys
+sub dec_4blks {
+ my $data0 = shift;
+ my $data1 = shift;
+ my $data2 = shift;
+ my $data3 = shift;
+
+ my $code=<<___;
+ @{[vsm4r_vs $data0, $vk7]}
+ @{[vsm4r_vs $data1, $vk7]}
+ @{[vsm4r_vs $data2, $vk7]}
+ @{[vsm4r_vs $data3, $vk7]}
+
+ @{[vsm4r_vs $data0, $vk6]}
+ @{[vsm4r_vs $data1, $vk6]}
+ @{[vsm4r_vs $data2, $vk6]}
+ @{[vsm4r_vs $data3, $vk6]}
+
+ @{[vsm4r_vs $data0, $vk5]}
+ @{[vsm4r_vs $data1, $vk5]}
+ @{[vsm4r_vs $data2, $vk5]}
+ @{[vsm4r_vs $data3, $vk5]}
+
+ @{[vsm4r_vs $data0, $vk4]}
+ @{[vsm4r_vs $data1, $vk4]}
+ @{[vsm4r_vs $data2, $vk4]}
+ @{[vsm4r_vs $data3, $vk4]}
+
+ @{[vsm4r_vs $data0, $vk3]}
+ @{[vsm4r_vs $data1, $vk3]}
+ @{[vsm4r_vs $data2, $vk3]}
+ @{[vsm4r_vs $data3, $vk3]}
+
+ @{[vsm4r_vs $data0, $vk2]}
+ @{[vsm4r_vs $data1, $vk2]}
+ @{[vsm4r_vs $data2, $vk2]}
+ @{[vsm4r_vs $data3, $vk2]}
+
+ @{[vsm4r_vs $data0, $vk1]}
+ @{[vsm4r_vs $data1, $vk1]}
+ @{[vsm4r_vs $data2, $vk1]}
+ @{[vsm4r_vs $data3, $vk1]}
+
+ @{[vsm4r_vs $data0, $vk0]}
+ @{[vsm4r_vs $data1, $vk0]}
+ @{[vsm4r_vs $data2, $vk0]}
+ @{[vsm4r_vs $data3, $vk0]}
+___
+
+ return $code;
+}
+
+####
+# void rv64i_zvksed_sm4_cbc_encrypt(const unsigned char *in, unsigned char *out,
+# size_t len, const SM4_KEY *key,
+# unsigned char *iv, int enc);
+#
+{
+my ($in,$out,$len,$keys,$ivp)=("a0","a1","a2","a3","a4");
+my ($tmp,$base)=("t0","t2");
+my ($vdata0,$vdata1,$vdata2,$vdata3,$vdata4,$vdata5,$vdata6,$vdata7)=("v1","v2","v3","v4","v5","v6","v7","v24");
+my ($vivec)=("v8");
+
+$code .= <<___;
+.p2align 3
+.globl rv64i_zvksed_sm4_cbc_encrypt
+.type rv64i_zvksed_sm4_cbc_encrypt,\@function
+rv64i_zvksed_sm4_cbc_encrypt:
+ # check whether the length is a multiple of 16 and >= 16
+ li $tmp, $BLOCK_SIZE
+ bltu $len, $tmp, .Lcbc_enc_end
+ andi $tmp, $len, 15
+ bnez $tmp, .Lcbc_enc_end
+
+ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
+ # Load 32 round keys
+ @{[enc_load_key $keys]}
+
+ # Load IV
+ @{[vle32_v $vivec, $ivp]}
+# =====================================================
+# If data length ≥ 64 bytes, process 4 blocks in batch:
+# 4-block CBC encryption pipeline:
+# 1. Load 4 plaintext blocks
+# 2. Reverse bytes for SM4 endianness
+# 3. Perform XOR operation with IV or previous ciphertext block (CBC chain)
+# 4. Encrypt each data block using the enc_blk function
+# 5. Adjust the byte order and store the ciphertext block
+# 6. Update the initialization vector (IV)
+# If data length < 64 bytes, process it block by block using the Lcbc_enc_single function
+# =====================================================
+.Lcbc_enc_loop:
+ li $tmp, $FOUR_BLOCKS
+ bltu $len, $tmp, .Lcbc_enc_single
+ # Load input data0-data3
+ @{[vle32_v $vdata0, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata1, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata2, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata3, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ #XOR with IV
+ @{[vxor_vv $vdata0, $vdata0, $vivec]}
+
+ @{[vrev8_v $vdata0, $vdata0]}
+ # Encrypt with all keys
+ @{[enc_blk $vdata0]}
+ @{[vrev8_v $vdata0, $vdata0]}
+
+ # Save the ciphertext (in reverse element order)
+ li $tmp_stride, $STRIDE
+ @{[reverse_order_S $vdata0, $out]}
+ #Update IV to ciphertext block 0
+ @{[vle32_v $vivec, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata1, $vdata1, $vivec]}
+
+ @{[vrev8_v $vdata1, $vdata1]}
+ @{[enc_blk $vdata1]}
+ @{[vrev8_v $vdata1, $vdata1]}
+
+ @{[reverse_order_S $vdata1, $out]}
+
+ #Update IV to ciphertext block 1
+ @{[vle32_v $vivec, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata2, $vdata2, $vivec]}
+
+ @{[vrev8_v $vdata2, $vdata2]}
+ @{[enc_blk $vdata2]}
+ @{[vrev8_v $vdata2, $vdata2]}
+
+ @{[reverse_order_S $vdata2, $out]}
+ #Update IV to ciphertext block 2
+ @{[vle32_v $vivec, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata3, $vdata3, $vivec]}
+
+ @{[vrev8_v $vdata3, $vdata3]}
+ @{[enc_blk $vdata3]}
+ @{[vrev8_v $vdata3, $vdata3]}
+
+ @{[reverse_order_S $vdata3, $out]}
+ #Update IV to ciphertext block 3
+ @{[vle32_v $vivec, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ addi $len, $len, -$FOUR_BLOCKS
+ bnez $len, .Lcbc_enc_loop
+ #Save the final IV
+ @{[vse32_v $vivec, $ivp]}
+ ret
+
+.Lcbc_enc_single:
+ # Load input data0
+ @{[vle32_v $vdata0, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ #XOR with IV
+ @{[vxor_vv $vdata0, $vdata0, $vivec]}
+
+ @{[vrev8_v $vdata0, $vdata0]}
+ # Encrypt with all keys
+ @{[enc_blk $vdata0]}
+ @{[vrev8_v $vdata0, $vdata0]}
+
+ # Save the ciphertext (in reverse element order)
+ li $tmp_stride, $STRIDE
+ @{[reverse_order_S $vdata0, $out]}
+
+ # Update IV to ciphertext block 0
+ @{[vle32_v $vivec, $out]}
+ addi $out, $out, $BLOCK_SIZE
+ addi $len, $len, -$BLOCK_SIZE
+
+ li $tmp, $BLOCK_SIZE
+ bgeu $len, $tmp, .Lcbc_enc_single
+ # Save the final IV
+ @{[vse32_v $vivec, $ivp]}
+.Lcbc_enc_end:
+ ret
+.size rv64i_zvksed_sm4_cbc_encrypt,.-rv64i_zvksed_sm4_cbc_encrypt
+___
+
+####
+# void rv64i_zvksed_sm4_cbc_decrypt(const unsigned char *in, unsigned char *out,
+# size_t len, const SM4_KEY *key,
+# unsigned char *iv, int enc);
+#
+$code .= <<___;
+.p2align 3
+.globl rv64i_zvksed_sm4_cbc_decrypt
+.type rv64i_zvksed_sm4_cbc_decrypt,\@function
+rv64i_zvksed_sm4_cbc_decrypt:
+ # check whether the length is a multiple of 16 and >= 16
+ li $tmp, $BLOCK_SIZE
+ bltu $len, $tmp, .Lcbc_dec_end
+ andi $tmp, $len, 15
+ bnez $tmp, .Lcbc_dec_end
+
+ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
+ # Load IV (in reverse element order)
+ li $tmp_stride, $STRIDE
+ @{[reverse_order_L $vivec, $ivp]}
+
+ # Load 32 round keys
+ @{[dec_load_key $keys]}
+# =====================================================
+# If data length ≥ 128 bytes, process 8 blocks in batch:
+# 8-block CBC decryption pipeline:
+# 1. Load 8 ciphertext blocks
+# 2. Reverse bytes for SM4 endianness
+# 3. Use two calls to dec_4blks for decrypting each data block
+# 4. XOR with previous ciphertext block (CBC chain)
+# 5. Update IV and store plaintext with byte reversal
+# =====================================================
+.Lcbc_dec_loop:
+ li $tmp, $EIGHT_BLOCKS
+ bltu $len, $tmp, .Lcbc_check_64
+ # Load input data0-data7
+ @{[vle32_v $vdata0, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata1, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata2, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata3, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata4, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata5, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata6, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata7, $in]}
+ addi $in, $in, $BLOCK_SIZE
+
+ @{[vrev8_v $vdata0, $vdata0]}
+ @{[vrev8_v $vdata1, $vdata1]}
+ @{[vrev8_v $vdata2, $vdata2]}
+ @{[vrev8_v $vdata3, $vdata3]}
+ @{[vrev8_v $vdata4, $vdata4]}
+ @{[vrev8_v $vdata5, $vdata5]}
+ @{[vrev8_v $vdata6, $vdata6]}
+ @{[vrev8_v $vdata7, $vdata7]}
+ # Decrypt 8 data blocks
+ @{[dec_4blks $vdata0,$vdata1,$vdata2,$vdata3]}
+ @{[dec_4blks $vdata4,$vdata5,$vdata6,$vdata7]}
+ @{[vrev8_v $vdata0, $vdata0]}
+ @{[vrev8_v $vdata1, $vdata1]}
+ @{[vrev8_v $vdata2, $vdata2]}
+ @{[vrev8_v $vdata3, $vdata3]}
+ @{[vrev8_v $vdata4, $vdata4]}
+ @{[vrev8_v $vdata5, $vdata5]}
+ @{[vrev8_v $vdata6, $vdata6]}
+ @{[vrev8_v $vdata7, $vdata7]}
+
+ @{[vxor_vv $vdata0, $vdata0, $vivec]}
+
+ # Update ciphertext to IV (in reverse element order)
+ addi $base, $in, -128
+ @{[reverse_order_L $vivec, $base]}
+
+ # Save the plaintext (in reverse element order)
+ @{[reverse_order_S $vdata0, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata1, $vdata1, $vivec]}
+
+ addi $base, $in, -112
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata1, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata2, $vdata2, $vivec]}
+
+ addi $base, $in, -96
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata2, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata3, $vdata3, $vivec]}
+
+ addi $base, $in, -80
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata3, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata4, $vdata4, $vivec]}
+
+ addi $base, $in, -64
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata4, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata5, $vdata5, $vivec]}
+
+ addi $base, $in, -48
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata5, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata6, $vdata6, $vivec]}
+
+ addi $base, $in, -32
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata6, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata7, $vdata7, $vivec]}
+
+ addi $base, $in, -16
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata7, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ addi $len, $len, -$EIGHT_BLOCKS
+ bnez $len, .Lcbc_dec_loop
+ #Save the final IV (in reverse element order)
+ @{[reverse_order_S $vivec, $ivp]}
+ ret
+# =====================================================
+# If data length ≥ 64 bytes, process in batches of 4 blocks:
+# 4-block CBC decryption process:
+# 1. Load 4 ciphertext blocks
+# 2. Reverse byte order to fit SM4 byte order
+# 3. Decrypt each data block using the dec_4blks function
+# 4. XOR with previous ciphertext block (CBC chain)
+# 5. Update IV and store plaintext with byte reversal
+# If the data length is less than 64 bytes, process it block by block using the Lcbc_dec_single function
+# =====================================================
+.Lcbc_check_64:
+ li $tmp, $FOUR_BLOCKS
+ bltu $len, $tmp, .Lcbc_dec_single
+ # Load input data0-data3
+ @{[vle32_v $vdata0, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata1, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata2, $in]}
+ addi $in, $in, $BLOCK_SIZE
+ @{[vle32_v $vdata3, $in]}
+ addi $in, $in, $BLOCK_SIZE
+
+ @{[vrev8_v $vdata0, $vdata0]}
+ @{[vrev8_v $vdata1, $vdata1]}
+ @{[vrev8_v $vdata2, $vdata2]}
+ @{[vrev8_v $vdata3, $vdata3]}
+ # Decrypt 4 data blocks
+ @{[dec_4blks $vdata0,$vdata1,$vdata2,$vdata3]}
+ @{[vrev8_v $vdata0, $vdata0]}
+ @{[vrev8_v $vdata1, $vdata1]}
+ @{[vrev8_v $vdata2, $vdata2]}
+ @{[vrev8_v $vdata3, $vdata3]}
+
+ @{[vxor_vv $vdata0, $vdata0, $vivec]}
+
+ # Update ciphertext to IV (in reverse element order)
+ addi $base, $in, -64
+ @{[reverse_order_L $vivec, $base]}
+ # Save the plaintext (in reverse element order)
+ @{[reverse_order_S $vdata0, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata1, $vdata1, $vivec]}
+
+ addi $base, $in, -48
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata1, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata2, $vdata2, $vivec]}
+
+ addi $base, $in, -32
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata2, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ @{[vxor_vv $vdata3, $vdata3, $vivec]}
+
+ addi $base, $in, -16
+ @{[reverse_order_L $vivec, $base]}
+ @{[reverse_order_S $vdata3, $out]}
+ addi $out, $out, $BLOCK_SIZE
+
+ addi $len, $len, -$FOUR_BLOCKS
+ bnez $len, .Lcbc_check_64
+ #Save the final IV (in reverse element order)
+ @{[reverse_order_S $vivec, $ivp]}
+ ret
+
+.Lcbc_dec_single:
+ # Load input data0
+ @{[vle32_v $vdata0, $in]}
+ addi $in, $in, $BLOCK_SIZE
+
+ @{[vrev8_v $vdata0, $vdata0]}
+ # Decrypt with all keys
+ @{[dec_blk $vdata0]}
+ @{[vrev8_v $vdata0, $vdata0]}
+
+ #XOR with IV
+ @{[vxor_vv $vdata0, $vdata0, $vivec]}
+
+ # Update ciphertext to IV (in reverse element order)
+ li $tmp_stride, $STRIDE
+ addi $base, $in, -$BLOCK_SIZE
+ @{[reverse_order_L $vivec, $base]}
+ # Save the plaintext (in reverse element order)
+ @{[reverse_order_S $vdata0, $out]}
+ addi $out, $out, $BLOCK_SIZE
+ addi $len, $len, -$BLOCK_SIZE
+
+ li $tmp, $BLOCK_SIZE
+ bgeu $len, $tmp, .Lcbc_dec_single
+ #Save the final IV (in reverse element order)
+ @{[reverse_order_S $vivec, $ivp]}
+.Lcbc_dec_end:
+ ret
+.size rv64i_zvksed_sm4_cbc_decrypt,.-rv64i_zvksed_sm4_cbc_decrypt
+___
+}
+
####
# int rv64i_zvksed_sm4_set_encrypt_key(const unsigned char *userKey,
# SM4_KEY *key);
@@ -94,19 +636,19 @@ rv64i_zvksed_sm4_set_encrypt_key:
# Store round keys
@{[vse32_v $vk0, $keys]} # rk[0:3]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vse32_v $vk1, $keys]} # rk[4:7]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vse32_v $vk2, $keys]} # rk[8:11]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vse32_v $vk3, $keys]} # rk[12:15]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vse32_v $vk4, $keys]} # rk[16:19]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vse32_v $vk5, $keys]} # rk[20:23]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vse32_v $vk6, $keys]} # rk[24:27]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vse32_v $vk7, $keys]} # rk[28:31]
li a0, 1
@@ -150,21 +692,21 @@ rv64i_zvksed_sm4_set_decrypt_key:
# Store round keys in reverse order
addi $keys, $keys, 12
- li $stride, -4
+ li $stride, $STRIDE
@{[vsse32_v $vk7, $keys, $stride]} # rk[31:28]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vsse32_v $vk6, $keys, $stride]} # rk[27:24]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vsse32_v $vk5, $keys, $stride]} # rk[23:20]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vsse32_v $vk4, $keys, $stride]} # rk[19:16]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vsse32_v $vk3, $keys, $stride]} # rk[15:12]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vsse32_v $vk2, $keys, $stride]} # rk[11:8]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vsse32_v $vk1, $keys, $stride]} # rk[7:4]
- addi $keys, $keys, 16
+ addi $keys, $keys, $BLOCK_SIZE
@{[vsse32_v $vk0, $keys, $stride]} # rk[3:0]
li a0, 1
@@ -178,8 +720,8 @@ ___
# const SM4_KEY *key);
#
{
-my ($in,$out,$keys,$stride)=("a0","a1","a2","t0");
-my ($vdata,$vk0,$vk1,$vk2,$vk3,$vk4,$vk5,$vk6,$vk7,$vgen)=("v1","v2","v3","v4","v5","v6","v7","v8","v9","v10");
+my ($in,$out,$keys)=("a0","a1","a2");
+my ($vdata)=("v1");
$code .= <<___;
.p2align 3
.globl rv64i_zvksed_sm4_encrypt
@@ -187,42 +729,19 @@ $code .= <<___;
rv64i_zvksed_sm4_encrypt:
@{[vsetivli__x0_4_e32_m1_tu_mu]}
- # Order of elements was adjusted in set_encrypt_key()
- @{[vle32_v $vk0, $keys]} # rk[0:3]
- addi $keys, $keys, 16
- @{[vle32_v $vk1, $keys]} # rk[4:7]
- addi $keys, $keys, 16
- @{[vle32_v $vk2, $keys]} # rk[8:11]
- addi $keys, $keys, 16
- @{[vle32_v $vk3, $keys]} # rk[12:15]
- addi $keys, $keys, 16
- @{[vle32_v $vk4, $keys]} # rk[16:19]
- addi $keys, $keys, 16
- @{[vle32_v $vk5, $keys]} # rk[20:23]
- addi $keys, $keys, 16
- @{[vle32_v $vk6, $keys]} # rk[24:27]
- addi $keys, $keys, 16
- @{[vle32_v $vk7, $keys]} # rk[28:31]
+ @{[enc_load_key $keys]}
# Load input data
@{[vle32_v $vdata, $in]}
@{[vrev8_v $vdata, $vdata]}
# Encrypt with all keys
- @{[vsm4r_vs $vdata, $vk0]}
- @{[vsm4r_vs $vdata, $vk1]}
- @{[vsm4r_vs $vdata, $vk2]}
- @{[vsm4r_vs $vdata, $vk3]}
- @{[vsm4r_vs $vdata, $vk4]}
- @{[vsm4r_vs $vdata, $vk5]}
- @{[vsm4r_vs $vdata, $vk6]}
- @{[vsm4r_vs $vdata, $vk7]}
+ @{[enc_blk $vdata]}
# Save the ciphertext (in reverse element order)
@{[vrev8_v $vdata, $vdata]}
- li $stride, -4
- addi $out, $out, 12
- @{[vsse32_v $vdata, $out, $stride]}
+ li $tmp_stride, $STRIDE
+ @{[reverse_order_S $vdata, $out]}
ret
.size rv64i_zvksed_sm4_encrypt,.-rv64i_zvksed_sm4_encrypt
@@ -234,8 +753,8 @@ ___
# const SM4_KEY *key);
#
{
-my ($in,$out,$keys,$stride)=("a0","a1","a2","t0");
-my ($vdata,$vk0,$vk1,$vk2,$vk3,$vk4,$vk5,$vk6,$vk7,$vgen)=("v1","v2","v3","v4","v5","v6","v7","v8","v9","v10");
+my ($in,$out,$keys)=("a0","a1","a2");
+my ($vdata)=("v1");
$code .= <<___;
.p2align 3
.globl rv64i_zvksed_sm4_decrypt
@@ -243,42 +762,19 @@ $code .= <<___;
rv64i_zvksed_sm4_decrypt:
@{[vsetivli__x0_4_e32_m1_tu_mu]}
- # Order of elements was adjusted in set_decrypt_key()
- @{[vle32_v $vk7, $keys]} # rk[31:28]
- addi $keys, $keys, 16
- @{[vle32_v $vk6, $keys]} # rk[27:24]
- addi $keys, $keys, 16
- @{[vle32_v $vk5, $keys]} # rk[23:20]
- addi $keys, $keys, 16
- @{[vle32_v $vk4, $keys]} # rk[19:16]
- addi $keys, $keys, 16
- @{[vle32_v $vk3, $keys]} # rk[15:12]
- addi $keys, $keys, 16
- @{[vle32_v $vk2, $keys]} # rk[11:8]
- addi $keys, $keys, 16
- @{[vle32_v $vk1, $keys]} # rk[7:4]
- addi $keys, $keys, 16
- @{[vle32_v $vk0, $keys]} # rk[3:0]
+ @{[dec_load_key $keys]}
# Load input data
@{[vle32_v $vdata, $in]}
@{[vrev8_v $vdata, $vdata]}
# Decrypt with all keys
- @{[vsm4r_vs $vdata, $vk7]}
- @{[vsm4r_vs $vdata, $vk6]}
- @{[vsm4r_vs $vdata, $vk5]}
- @{[vsm4r_vs $vdata, $vk4]}
- @{[vsm4r_vs $vdata, $vk3]}
- @{[vsm4r_vs $vdata, $vk2]}
- @{[vsm4r_vs $vdata, $vk1]}
- @{[vsm4r_vs $vdata, $vk0]}
+ @{[dec_blk $vdata]}
# Save the plaintext (in reverse element order)
@{[vrev8_v $vdata, $vdata]}
- li $stride, -4
- addi $out, $out, 12
- @{[vsse32_v $vdata, $out, $stride]}
+ li $tmp_stride, $STRIDE
+ @{[reverse_order_S $vdata, $out]}
ret
.size rv64i_zvksed_sm4_decrypt,.-rv64i_zvksed_sm4_decrypt
diff --git a/include/crypto/sm4_platform.h b/include/crypto/sm4_platform.h
index 0e9e2e1d39..f7b436255c 100644
--- a/include/crypto/sm4_platform.h
+++ b/include/crypto/sm4_platform.h
@@ -50,6 +50,12 @@ void rv64i_zvksed_sm4_encrypt(const unsigned char *in, unsigned char *out,
const SM4_KEY *key);
void rv64i_zvksed_sm4_decrypt(const unsigned char *in, unsigned char *out,
const SM4_KEY *key);
+void rv64i_zvksed_sm4_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const SM4_KEY *key,
+ unsigned char *iv, int enc);
+void rv64i_zvksed_sm4_cbc_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const SM4_KEY *key,
+ unsigned char *iv, int enc);
# elif (defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64))
/* Intel x86_64 support */
# include "internal/cryptlib.h"
diff --git a/providers/implementations/ciphers/cipher_sm4_hw_rv64i.inc b/providers/implementations/ciphers/cipher_sm4_hw_rv64i.inc
index 763d9d09dd..683b2b25a1 100644
--- a/providers/implementations/ciphers/cipher_sm4_hw_rv64i.inc
+++ b/providers/implementations/ciphers/cipher_sm4_hw_rv64i.inc
@@ -26,6 +26,7 @@ static int cipher_hw_rv64i_zvksed_sm4_initkey(PROV_CIPHER_CTX *ctx,
SM4_KEY *ks = &sctx->ks.ks;
ctx->ks = ks;
+
if (ctx->enc
|| (ctx->mode != EVP_CIPH_ECB_MODE
&& ctx->mode != EVP_CIPH_CBC_MODE)) {
@@ -38,6 +39,14 @@ static int cipher_hw_rv64i_zvksed_sm4_initkey(PROV_CIPHER_CTX *ctx,
ctx->stream.cbc = NULL;
}
+ if (ctx->mode == EVP_CIPH_CBC_MODE) {
+ if (ctx->enc) {
+ ctx->stream.cbc = (cbc128_f) rv64i_zvksed_sm4_cbc_encrypt;
+ } else {
+ ctx->stream.cbc = (cbc128_f) rv64i_zvksed_sm4_cbc_decrypt;
+ }
+ }
+
return 1;
}
diff --git a/test/recipes/30-test_evp_data/evpciph_sm4.txt b/test/recipes/30-test_evp_data/evpciph_sm4.txt
index 993cf7b51e..f23129cd8d 100644
--- a/test/recipes/30-test_evp_data/evpciph_sm4.txt
+++ b/test/recipes/30-test_evp_data/evpciph_sm4.txt
@@ -13,6 +13,12 @@ Key = 0123456789ABCDEFFEDCBA9876543210
Plaintext = 0123456789ABCDEFFEDCBA9876543210
Ciphertext = 681EDF34D206965E86B3E94F536E4246
+Cipher = SM4-CBC
+Key = 0123456789ABCDEFFEDCBA9876543210
+IV = 0123456789ABCDEFFEDCBA9876543210
+Plaintext = 0123456789ABCDEFFEDCBA9876543210
+Ciphertext = 2677F46B09C122CC975533105BD4A22A
+
Cipher = SM4-CBC
Key = 0123456789ABCDEFFEDCBA9876543210
IV = 0123456789ABCDEFFEDCBA9876543210