Commit 3d1b8389cb for openssl.org
commit 3d1b8389cbe38e63b21d617dc8f480753787c8cf
Author: Samaresh Kumar Singh <ssam3003@gmail.com>
Date: Sat Mar 28 14:43:47 2026 -0500
chacha/asm: save f17 in 8x prologue for contiguous f14-f25 range
f17 is not directly clobbered by any vxxlor in this function, but
saving the full contiguous range f14-f25 is cleaner and avoids any
future ambiguity if the code is modified. Adjust all subsequent FPR
slot offsets and the VMX base offset accordingly, and update the frame
size comment.
Reviewed-by: Tomas Mraz <tomas@openssl.foundation>
Reviewed-by: Paul Dale <paul.dale@oracle.com>
MergeDate: Sat Apr 11 20:06:04 2026
(Merged from https://github.com/openssl/openssl/pull/30587)
diff --git a/crypto/chacha/asm/chachap10-ppc.pl b/crypto/chacha/asm/chachap10-ppc.pl
index 60992d6714..10f8a57749 100755
--- a/crypto/chacha/asm/chachap10-ppc.pl
+++ b/crypto/chacha/asm/chachap10-ppc.pl
@@ -501,7 +501,7 @@ my ($xv8,$xv9,$xv10,$xv11,$xv12,$xv13,$xv14,$xv15,$xv16,$xv17) = map("v$_",(8..1
my ($xv18,$xv19,$xv20,$xv21) = map("v$_",(18..21));
my ($xv22,$xv23,$xv24,$xv25,$xv26) = map("v$_",(22..26));
-my $FRAME=$LOCALS+64+9*16+12*8+4*16; # 8*16 for v24-v31 offload, 12*8 for f14-f26, 4*16 for v20-v23
+my $FRAME=$LOCALS+64+9*16+13*8+4*16; # 8*16 for v24-v31 offload, 13*8 for f14-f26, 4*16 for v20-v23
sub VSX_lane_ROUND_8x {
my ($a0,$b0,$c0,$d0,$a4,$b4,$c4,$d4)=@_;
@@ -665,20 +665,21 @@ $code.=<<___;
addi r11,r11,32
stvx v30,r10,$sp
stvx v31,r11,$sp
- stfd f14,`$LOCALS+64+9*16+0*8`($sp) # save FPR14-FPR25 (callee-saved per ELFv2 ABI)
+ stfd f14,`$LOCALS+64+9*16+0*8`($sp) # save FPR14-FPR26 (callee-saved per ELFv2 ABI)
stfd f15,`$LOCALS+64+9*16+1*8`($sp)
stfd f16,`$LOCALS+64+9*16+2*8`($sp)
- stfd f18,`$LOCALS+64+9*16+3*8`($sp)
- stfd f19,`$LOCALS+64+9*16+4*8`($sp)
- stfd f20,`$LOCALS+64+9*16+5*8`($sp)
- stfd f21,`$LOCALS+64+9*16+6*8`($sp)
- stfd f22,`$LOCALS+64+9*16+7*8`($sp)
- stfd f23,`$LOCALS+64+9*16+8*8`($sp)
- stfd f24,`$LOCALS+64+9*16+9*8`($sp)
- stfd f25,`$LOCALS+64+9*16+10*8`($sp)
- be?stfd f26,`$LOCALS+64+9*16+11*8`($sp) # BE only
- li r10,`$LOCALS+64+9*16+12*8+15`
- li r11,`$LOCALS+64+9*16+12*8+31`
+ stfd f17,`$LOCALS+64+9*16+3*8`($sp)
+ stfd f18,`$LOCALS+64+9*16+4*8`($sp)
+ stfd f19,`$LOCALS+64+9*16+5*8`($sp)
+ stfd f20,`$LOCALS+64+9*16+6*8`($sp)
+ stfd f21,`$LOCALS+64+9*16+7*8`($sp)
+ stfd f22,`$LOCALS+64+9*16+8*8`($sp)
+ stfd f23,`$LOCALS+64+9*16+9*8`($sp)
+ stfd f24,`$LOCALS+64+9*16+10*8`($sp)
+ stfd f25,`$LOCALS+64+9*16+11*8`($sp)
+ be?stfd f26,`$LOCALS+64+9*16+12*8`($sp) # BE only
+ li r10,`$LOCALS+64+9*16+13*8+15`
+ li r11,`$LOCALS+64+9*16+13*8+31`
stvx v20,r10,$sp # save VMX v20-v23 (callee-saved per ELFv2 ABI)
addi r10,r10,32
stvx v21,r11,$sp
@@ -1180,20 +1181,21 @@ $code.=<<___;
Ldone_vsx_8x:
lwz r12,`$LOCALS+64+9*16-4`($sp) # pull vrsave
- lfd f14,`$LOCALS+64+9*16+0*8`($sp) # restore FPR14-FPR25 (callee-saved per ELFv2 ABI)
+ lfd f14,`$LOCALS+64+9*16+0*8`($sp) # restore FPR14-FPR26 (callee-saved per ELFv2 ABI)
lfd f15,`$LOCALS+64+9*16+1*8`($sp)
lfd f16,`$LOCALS+64+9*16+2*8`($sp)
- lfd f18,`$LOCALS+64+9*16+3*8`($sp)
- lfd f19,`$LOCALS+64+9*16+4*8`($sp)
- lfd f20,`$LOCALS+64+9*16+5*8`($sp)
- lfd f21,`$LOCALS+64+9*16+6*8`($sp)
- lfd f22,`$LOCALS+64+9*16+7*8`($sp)
- lfd f23,`$LOCALS+64+9*16+8*8`($sp)
- lfd f24,`$LOCALS+64+9*16+9*8`($sp)
- lfd f25,`$LOCALS+64+9*16+10*8`($sp)
- be?lfd f26,`$LOCALS+64+9*16+11*8`($sp) # BE only
- li r10,`$LOCALS+64+9*16+12*8+15`
- li r11,`$LOCALS+64+9*16+12*8+31`
+ lfd f17,`$LOCALS+64+9*16+3*8`($sp)
+ lfd f18,`$LOCALS+64+9*16+4*8`($sp)
+ lfd f19,`$LOCALS+64+9*16+5*8`($sp)
+ lfd f20,`$LOCALS+64+9*16+6*8`($sp)
+ lfd f21,`$LOCALS+64+9*16+7*8`($sp)
+ lfd f22,`$LOCALS+64+9*16+8*8`($sp)
+ lfd f23,`$LOCALS+64+9*16+9*8`($sp)
+ lfd f24,`$LOCALS+64+9*16+10*8`($sp)
+ lfd f25,`$LOCALS+64+9*16+11*8`($sp)
+ be?lfd f26,`$LOCALS+64+9*16+12*8`($sp) # BE only
+ li r10,`$LOCALS+64+9*16+13*8+15`
+ li r11,`$LOCALS+64+9*16+13*8+31`
lvx v20,r10,$sp # restore VMX v20-v23 (callee-saved per ELFv2 ABI)
addi r10,r10,32
lvx v21,r11,$sp