Commit 65c8fdc6c6 for openssl.org

commit 65c8fdc6c614f31f0af00af2ddbfbd6ea0bec210
Author: slontis <shane.lontis@oracle.com>
Date:   Thu Feb 19 15:38:42 2026 +1100

    SHAKE - Fix 390x CI problems for SLH-DSA

    Fixes #30039

    In order to fix this, the ossl_sha3_ related functions have been
    renamed so that ossl_sha3_XXX() functions are the high level
    functions that contain calls to platform specific methods.
    ossl_sha3_XXX_default() etc are the 'general' platform methods.

    All of the state checking has been moved out of the platform specific
    methods. The sha3 provider dispatch functions now share the
    ossl_sha3_XXX() calls.

    Reviewed-by: Matt Caswell <matt@openssl.org>
    Reviewed-by: Paul Dale <paul.dale@oracle.com>
    Reviewed-by: Simo Sorce <simo@redhat.com>
    Reviewed-by: Tomas Mraz <tomas@openssl.org>
    MergeDate: Mon Mar  2 11:58:44 2026
    (Merged from https://github.com/openssl/openssl/pull/30104)

diff --git a/crypto/sha/sha3.c b/crypto/sha/sha3.c
index 1b1104e05b..698f51ba6f 100644
--- a/crypto/sha/sha3.c
+++ b/crypto/sha/sha3.c
@@ -60,67 +60,98 @@ int ossl_keccak_init(KECCAK1600_CTX *ctx, unsigned char pad, size_t bitlen, size
     return ret;
 }

-int ossl_sha3_update(KECCAK1600_CTX *ctx, const void *_inp, size_t len)
+/*
+ * A buffered absorb function that calls a platform specific absorb
+ * method.
+ */
+int ossl_sha3_absorb(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len)
 {
-    const unsigned char *inp = _inp;
-    size_t bsz = ctx->block_size;
+    const size_t bsz = ctx->block_size;
     size_t num, rem;

-    if (len == 0)
+    if (ossl_unlikely(len == 0))
         return 1;

-    if (ctx->xof_state == XOF_STATE_SQUEEZE
-        || ctx->xof_state == XOF_STATE_FINAL)
+    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
         return 0;

-    if ((num = ctx->bufsz) != 0) { /* process intermediate buffer? */
+    /* Is there anything in the buffer already ? */
+    if ((num = ctx->bufsz) != 0) {
+        /* Calculate how much space is left in the buffer */
         rem = bsz - num;
-
+        /* If the new input does not fill the buffer then just add it */
         if (len < rem) {
             memcpy(ctx->buf + num, inp, len);
             ctx->bufsz += len;
             return 1;
         }
-        /*
-         * We have enough data to fill or overflow the intermediate
-         * buffer. So we append |rem| bytes and process the block,
-         * leaving the rest for later processing...
-         */
+        /* otherwise fill up the buffer and absorb the buffer */
         memcpy(ctx->buf + num, inp, rem);
-        inp += rem, len -= rem;
-        (void)SHA3_absorb(ctx->A, ctx->buf, bsz, bsz);
+        /* Update the input pointer */
+        inp += rem;
+        len -= rem;
+        ctx->meth.absorb(ctx, ctx->buf, bsz);
         ctx->bufsz = 0;
-        /* ctx->buf is processed, ctx->num is guaranteed to be zero */
+        ctx->xof_state = XOF_STATE_ABSORB;
     }
-
+    /* Absorb the input - rem = leftover part of the input < blocksize) */
+    rem = ctx->meth.absorb(ctx, inp, len);
     if (len >= bsz)
-        rem = SHA3_absorb(ctx->A, inp, len, bsz);
-    else
-        rem = len;
-
-    if (rem) {
+        ctx->xof_state = XOF_STATE_ABSORB;
+    /* Copy the leftover bit of the input into the buffer */
+    if (ossl_likely(rem > 0)) {
         memcpy(ctx->buf, inp + len - rem, rem);
         ctx->bufsz = rem;
     }
-
     return 1;
 }

 /*
- * ossl_sha3_final()is a single shot method
- * (Use ossl_sha3_squeeze for multiple calls).
- * outlen is the variable size output.
+ * Call a platform specific final method.
+ * In most cases outlen should be set to ctx->mdlen.
+ * This function assumes the caller has checked outlen is bounded.
  */
 int ossl_sha3_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    size_t bsz = ctx->block_size;
-    size_t num = ctx->bufsz;
+    int ret;

-    if (outlen == 0)
-        return 1;
     if (ctx->xof_state == XOF_STATE_SQUEEZE
         || ctx->xof_state == XOF_STATE_FINAL)
         return 0;
+    if (outlen == 0)
+        return 1;
+
+    ret = ctx->meth.final(ctx, out, outlen);
+    ctx->xof_state = XOF_STATE_FINAL;
+    return ret;
+}
+
+/* Calls a platform specific squeeze method */
+int ossl_sha3_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
+{
+    int ret = 0;
+
+    if (ctx->xof_state == XOF_STATE_FINAL)
+        return 0;
+    ret = ctx->meth.squeeze(ctx, out, outlen);
+    ctx->xof_state = XOF_STATE_SQUEEZE;
+    return ret;
+}
+
+/* Default version of the absorb() */
+size_t ossl_sha3_absorb_default(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len)
+{
+    return SHA3_absorb(ctx->A, inp, len, ctx->block_size);
+}
+
+/*
+ * Default version of the final() is a single shot method
+ * (Use ossl_sha3_default_squeeze() for multiple calls).
+ */
+int ossl_sha3_final_default(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
+{
+    size_t bsz = ctx->block_size;
+    size_t num = ctx->bufsz;

     /*
      * Pad the data with 10*1. Note that |num| can be |bsz - 1|
@@ -133,48 +164,10 @@ int ossl_sha3_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)

     (void)SHA3_absorb(ctx->A, ctx->buf, bsz, bsz);

-    ctx->xof_state = XOF_STATE_FINAL;
     SHA3_squeeze(ctx->A, out, outlen, bsz, 0);
     return 1;
 }

-/* This is a buffered absorb function. */
-int ossl_sha3_absorb(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len)
-{
-    const size_t bsz = ctx->block_size;
-    size_t num, rem;
-
-    if (ossl_unlikely(len == 0))
-        return 1;
-
-    /* Is there anything in the buffer already ? */
-    if (ossl_likely((num = ctx->bufsz) != 0)) {
-        /* Calculate how much space is left in the buffer */
-        rem = bsz - num;
-        /* If the new input does not fill the buffer then just add it */
-        if (len < rem) {
-            memcpy(ctx->buf + num, inp, len);
-            ctx->bufsz += len;
-            return 1;
-        }
-        /* otherwise fill up the buffer and absorb the buffer */
-        memcpy(ctx->buf + num, inp, rem);
-        /* Update the input pointer */
-        inp += rem;
-        len -= rem;
-        ctx->meth.absorb(ctx, ctx->buf, bsz);
-        ctx->bufsz = 0;
-    }
-    /* Absorb the input - rem = leftover part of the input < blocksize) */
-    rem = ctx->meth.absorb(ctx, inp, len);
-    /* Copy the leftover bit of the input into the buffer */
-    if (ossl_likely(rem > 0)) {
-        memcpy(ctx->buf, inp + len - rem, rem);
-        ctx->bufsz = rem;
-    }
-    return 1;
-}
-
 /*
  * This method can be called multiple times.
  * Rather than heavily modifying assembler for SHA3_squeeze(),
@@ -185,19 +178,13 @@ int ossl_sha3_absorb(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len)
  * buffer the results. The next request will use the buffer first
  * to grab output bytes.
  */
-int ossl_sha3_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
+int ossl_shake_squeeze_default(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
     size_t bsz = ctx->block_size;
     size_t num = ctx->bufsz;
     size_t len;
     int next = 1;

-    if (outlen == 0)
-        return 1;
-
-    if (ctx->xof_state == XOF_STATE_FINAL)
-        return 0;
-
     /*
      * On the first squeeze call, finish the absorb process,
      * by adding the trailing padding and then doing
@@ -213,7 +200,6 @@ int ossl_sha3_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
         ctx->buf[num] = ctx->pad;
         ctx->buf[bsz - 1] |= 0x80;
         (void)SHA3_absorb(ctx->A, ctx->buf, bsz, bsz);
-        ctx->xof_state = XOF_STATE_SQUEEZE;
         num = ctx->bufsz = 0;
         next = 0;
     }
@@ -250,37 +236,13 @@ int ossl_sha3_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
         /* Step 4. Remember the leftover part of the squeezed block */
         ctx->bufsz = bsz - outlen;
     }
-
     return 1;
 }

-/*-
- * Generic software version of the absorb() and final().
- */
-static size_t generic_sha3_absorb(void *vctx, const void *inp, size_t len)
-{
-    KECCAK1600_CTX *ctx = vctx;
-
-    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
-        return 0;
-    ctx->xof_state = XOF_STATE_ABSORB;
-    return SHA3_absorb(ctx->A, inp, len, ctx->block_size);
-}
-
-static int generic_sha3_final(void *vctx, unsigned char *out, size_t outlen)
-{
-    return ossl_sha3_final((KECCAK1600_CTX *)vctx, out, outlen);
-}
-
-static int generic_sha3_squeeze(void *vctx, unsigned char *out, size_t outlen)
-{
-    return ossl_sha3_squeeze((KECCAK1600_CTX *)vctx, out, outlen);
-}
-
 static PROV_SHA3_METHOD shake_generic_meth = {
-    generic_sha3_absorb,
-    generic_sha3_final,
-    generic_sha3_squeeze
+    ossl_sha3_absorb_default,
+    ossl_sha3_final_default,
+    ossl_shake_squeeze_default
 };

 #if defined(S390_SHA3)
@@ -288,58 +250,45 @@ static PROV_SHA3_METHOD shake_generic_meth = {
 /*-
  * The platform specific parts of the absorb() and final() for S390X.
  */
-static size_t s390x_sha3_absorb(void *vctx, const void *inp, size_t len)
+static size_t sha3_absorb_s390x(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len)
 {
-    KECCAK1600_CTX *ctx = vctx;
     size_t rem = len % ctx->block_size;
     unsigned int fc;

-    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
-        return 0;
     if (len - rem > 0) {
         fc = ctx->pad;
         fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0;
-        ctx->xof_state = XOF_STATE_ABSORB;
         s390x_kimd(inp, len - rem, fc, ctx->A);
     }
     return rem;
 }

-static int s390x_shake_final(void *vctx, unsigned char *out, size_t outlen)
+static int shake_final_s390x(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    KECCAK1600_CTX *ctx = vctx;
     unsigned int fc;

-    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
-        return 0;
     fc = ctx->pad | S390X_KLMD_DUFOP;
     fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0;
-    ctx->xof_state = XOF_STATE_FINAL;
     s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, fc, ctx->A);
     return 1;
 }

-static int s390x_shake_squeeze(void *vctx, unsigned char *out, size_t outlen)
+static int shake_squeeze_s390x(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    KECCAK1600_CTX *ctx = vctx;
     unsigned int fc;
     size_t len;

-    if (ctx->xof_state == XOF_STATE_FINAL)
-        return 0;
     /*
      * On the first squeeze call, finish the absorb process (incl. padding).
      */
     if (ctx->xof_state != XOF_STATE_SQUEEZE) {
         fc = ctx->pad;
         fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0;
-        ctx->xof_state = XOF_STATE_SQUEEZE;
         s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, fc, ctx->A);
         ctx->bufsz = outlen % ctx->block_size;
         /* reuse ctx->bufsz to count bytes squeezed from current sponge */
         return 1;
     }
-    ctx->xof_state = XOF_STATE_SQUEEZE;
     if (ctx->bufsz != 0) {
         len = ctx->block_size - ctx->bufsz;
         if (outlen < len)
@@ -360,9 +309,9 @@ static int s390x_shake_squeeze(void *vctx, unsigned char *out, size_t outlen)
 }

 static PROV_SHA3_METHOD shake_s390x_meth = {
-    s390x_sha3_absorb,
-    s390x_shake_final,
-    s390x_shake_squeeze,
+    sha3_absorb_s390x,
+    shake_final_s390x,
+    shake_squeeze_s390x
 };
 #elif defined(__aarch64__) && defined(KECCAK1600_ASM)

@@ -371,17 +320,15 @@ size_t SHA3_absorb_cext(uint64_t A[5][5], const unsigned char *inp, size_t len,
 /*-
  * Hardware-assisted ARMv8.2 SHA3 extension version of the absorb()
  */
-static size_t armsha3_sha3_absorb(void *vctx, const void *inp, size_t len)
+static size_t sha3_absorb_arm(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len)
 {
-    KECCAK1600_CTX *ctx = vctx;
-
     return SHA3_absorb_cext(ctx->A, inp, len, ctx->block_size);
 }

 static PROV_SHA3_METHOD shake_ARMSHA3_meth = {
-    armsha3_sha3_absorb,
-    generic_sha3_final,
-    generic_sha3_squeeze
+    sha3_absorb_arm,
+    ossl_sha3_final_default,
+    ossl_shake_squeeze_default
 };
 #endif

diff --git a/crypto/slh_dsa/slh_hash.c b/crypto/slh_dsa/slh_hash.c
index 1deed75a32..e7d2e1c952 100644
--- a/crypto/slh_dsa/slh_hash.c
+++ b/crypto/slh_dsa/slh_hash.c
@@ -73,7 +73,7 @@ slh_hmsg_shake(SLH_DSA_HASH_CTX *hctx, const uint8_t *r,
     ossl_sha3_absorb(sctx, pk_seed, n);
     ossl_sha3_absorb(sctx, pk_root, n);
     ossl_sha3_absorb(sctx, msg, msg_len);
-    ossl_sha3_final(sctx, out, m);
+    ossl_sha3_squeeze(sctx, out, m);
     return 1;
 }

@@ -91,7 +91,7 @@ slh_prf_msg_shake(SLH_DSA_HASH_CTX *hctx, const uint8_t *sk_prf,
     ossl_sha3_absorb(sctx, sk_prf, n);
     ossl_sha3_absorb(sctx, opt_rand, n);
     ossl_sha3_absorb(sctx, msg, msg_len);
-    ossl_sha3_final(sctx, out, n);
+    ossl_sha3_squeeze(sctx, out, n);
     return WPACKET_memcpy(pkt, out, n);
 }

@@ -105,7 +105,7 @@ slh_f_shake(SLH_DSA_HASH_CTX *hctx, const uint8_t *pk_seed, const uint8_t *adrs,

     ossl_sha3_absorb(&sctx, adrs, SLH_ADRS_SIZE);
     ossl_sha3_absorb(&sctx, m1, m1_len);
-    ossl_sha3_final(&sctx, out, n);
+    ossl_sha3_squeeze(&sctx, out, n);
     return 1;
 }

@@ -120,7 +120,7 @@ slh_prf_shake(SLH_DSA_HASH_CTX *hctx,

     ossl_sha3_absorb(&sctx, adrs, SLH_ADRS_SIZE);
     ossl_sha3_absorb(&sctx, sk_seed, n);
-    ossl_sha3_final(&sctx, out, n);
+    ossl_sha3_squeeze(&sctx, out, n);
     return 1;
 }

@@ -135,7 +135,7 @@ slh_h_shake(SLH_DSA_HASH_CTX *hctx, const uint8_t *pk_seed, const uint8_t *adrs,
     ossl_sha3_absorb(sctx, adrs, SLH_ADRS_SIZE);
     ossl_sha3_absorb(sctx, m1, n);
     ossl_sha3_absorb(sctx, m2, n);
-    ossl_sha3_final(sctx, out, n);
+    ossl_sha3_squeeze(sctx, out, n);
     return 1;
 }

@@ -315,7 +315,7 @@ int slh_wots_pk_gen_shake(SLH_DSA_HASH_CTX *hctx,
         ctx = *sctx;
         ossl_sha3_absorb(&ctx, sk_adrs, SLH_ADRS_SIZE);
         ossl_sha3_absorb(&ctx, sk_seed, n);
-        ossl_sha3_final(&ctx, sk, n);
+        ossl_sha3_squeeze(&ctx, sk, n);

         set_chain_address(adrs, (uint32_t)i);
         for (j = 0; j < NIBBLE_MASK; ++j) {
@@ -324,7 +324,7 @@ int slh_wots_pk_gen_shake(SLH_DSA_HASH_CTX *hctx,
             ctx = *sctx;
             ossl_sha3_absorb(&ctx, adrs, SLH_ADRS_SIZE);
             ossl_sha3_absorb(&ctx, sk, n);
-            ossl_sha3_final(&ctx, sk, n);
+            ossl_sha3_squeeze(&ctx, sk, n);
         }
         memcpy(pk_out, sk, n);
         pk_out += n;
diff --git a/include/internal/sha3.h b/include/internal/sha3.h
index b71581cf4a..df75224e08 100644
--- a/include/internal/sha3.h
+++ b/include/internal/sha3.h
@@ -22,9 +22,9 @@

 typedef struct keccak_st KECCAK1600_CTX;

-typedef size_t(sha3_absorb_fn)(void *vctx, const void *in, size_t inlen);
-typedef int(sha3_final_fn)(void *vctx, unsigned char *out, size_t outlen);
-typedef int(sha3_squeeze_fn)(void *vctx, unsigned char *out, size_t outlen);
+typedef size_t(sha3_absorb_fn)(KECCAK1600_CTX *vctx, const unsigned char *in, size_t inlen);
+typedef int(sha3_final_fn)(KECCAK1600_CTX *vctx, unsigned char *out, size_t outlen);
+typedef int(sha3_squeeze_fn)(KECCAK1600_CTX *vctx, unsigned char *out, size_t outlen);

 typedef struct prov_sha3_meth_st {
     sha3_absorb_fn *absorb;
@@ -53,10 +53,14 @@ void ossl_sha3_reset(KECCAK1600_CTX *ctx);
 int ossl_sha3_init(KECCAK1600_CTX *ctx, unsigned char pad, size_t bitlen);
 int ossl_keccak_init(KECCAK1600_CTX *ctx, unsigned char pad,
     size_t typelen, size_t mdlen);
-int ossl_sha3_update(KECCAK1600_CTX *ctx, const void *_inp, size_t len);
+
+int ossl_sha3_absorb(KECCAK1600_CTX *ctx, const unsigned char *in, size_t len);
 int ossl_sha3_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen);
 int ossl_sha3_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen);
-int ossl_sha3_absorb(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len);
+
+size_t ossl_sha3_absorb_default(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len);
+int ossl_sha3_final_default(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen);
+int ossl_shake_squeeze_default(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen);

 size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
     size_t r);
diff --git a/providers/implementations/digests/sha3_prov.c b/providers/implementations/digests/sha3_prov.c
index 38b7c6ee77..998d4d38c8 100644
--- a/providers/implementations/digests/sha3_prov.c
+++ b/providers/implementations/digests/sha3_prov.c
@@ -55,6 +55,16 @@
 #define SHAKE_PADDING 0x1f
 #define CSHAKE_KECCAK_PADDING 0x04

+#if defined(OPENSSL_CPUID_OBJ) && defined(__s390__) && defined(KECCAK1600_ASM)
+/*
+ * IBM S390X support
+ */
+#include "s390x_arch.h"
+#define S390_SHA3 1
+#define S390_SHA3_CAPABLE(name) \
+    ((OPENSSL_s390xcap_P.kimd[0] & S390X_CAPBIT(S390X_##name)) && (OPENSSL_s390xcap_P.klmd[0] & S390X_CAPBIT(S390X_##name)))
+#endif
+
 /*
  * Forward declaration of any unique methods implemented here. This is not strictly
  * necessary for the compiler, but provides an assurance that the signatures
@@ -68,23 +78,23 @@ static OSSL_FUNC_digest_freectx_fn keccak_freectx;
 static OSSL_FUNC_digest_copyctx_fn keccak_copyctx;
 static OSSL_FUNC_digest_dupctx_fn keccak_dupctx;
 static OSSL_FUNC_digest_squeeze_fn shake_squeeze;
+
 static OSSL_FUNC_digest_get_ctx_params_fn shake_get_ctx_params;
 static OSSL_FUNC_digest_gettable_ctx_params_fn shake_gettable_ctx_params;
 static OSSL_FUNC_digest_set_ctx_params_fn shake_set_ctx_params;
 static OSSL_FUNC_digest_settable_ctx_params_fn shake_settable_ctx_params;
-static sha3_absorb_fn generic_sha3_absorb;
-static sha3_final_fn generic_sha3_final;
-static sha3_squeeze_fn generic_sha3_squeeze;

-#if defined(OPENSSL_CPUID_OBJ) && defined(__s390__) && defined(KECCAK1600_ASM)
-/*
- * IBM S390X support
- */
-#include "s390x_arch.h"
-#define S390_SHA3 1
-#define S390_SHA3_CAPABLE(name) \
-    ((OPENSSL_s390xcap_P.kimd[0] & S390X_CAPBIT(S390X_##name)) && (OPENSSL_s390xcap_P.klmd[0] & S390X_CAPBIT(S390X_##name)))
-#endif
+static PROV_SHA3_METHOD sha3_generic_md = {
+    ossl_sha3_absorb_default,
+    ossl_sha3_final_default,
+    NULL
+};
+
+static PROV_SHA3_METHOD shake_generic_md = {
+    ossl_sha3_absorb_default,
+    ossl_sha3_final_default,
+    ossl_shake_squeeze_default
+};

 static int keccak_init(void *vctx, ossl_unused const OSSL_PARAM params[])
 {
@@ -118,9 +128,7 @@ static int keccak_final(void *vctx, unsigned char *out, size_t *outl,
         ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_DIGEST_LENGTH);
         return 0;
     }
-    if (ossl_likely(outlen > 0))
-        ret = ctx->meth.final(ctx, out, ctx->md_size);
-
+    ret = ossl_sha3_final(ctx, out, ctx->md_size);
     *outl = ctx->md_size;
     return ret;
 }
@@ -136,47 +144,12 @@ static int shake_squeeze(void *vctx, unsigned char *out, size_t *outl,
     if (ctx->meth.squeeze == NULL)
         return 0;
     if (outlen > 0)
-        ret = ctx->meth.squeeze(ctx, out, outlen);
-
-    *outl = outlen;
+        ret = ossl_sha3_squeeze(ctx, out, outlen);
+    if (outl != NULL)
+        *outl = outlen;
     return ret;
 }

-/*-
- * Generic software version of the absorb() and final().
- */
-static size_t generic_sha3_absorb(void *vctx, const void *inp, size_t len)
-{
-    KECCAK1600_CTX *ctx = vctx;
-
-    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
-        return 0;
-    ctx->xof_state = XOF_STATE_ABSORB;
-    return SHA3_absorb(ctx->A, inp, len, ctx->block_size);
-}
-
-static int generic_sha3_final(void *vctx, unsigned char *out, size_t outlen)
-{
-    return ossl_sha3_final((KECCAK1600_CTX *)vctx, out, outlen);
-}
-
-static int generic_sha3_squeeze(void *vctx, unsigned char *out, size_t outlen)
-{
-    return ossl_sha3_squeeze((KECCAK1600_CTX *)vctx, out, outlen);
-}
-
-static PROV_SHA3_METHOD sha3_generic_md = {
-    generic_sha3_absorb,
-    generic_sha3_final,
-    NULL
-};
-
-static PROV_SHA3_METHOD shake_generic_md = {
-    generic_sha3_absorb,
-    generic_sha3_final,
-    generic_sha3_squeeze
-};
-
 #if defined(S390_SHA3)

 static sha3_absorb_fn s390x_sha3_absorb;
@@ -186,79 +159,56 @@ static sha3_final_fn s390x_shake_final;
 /*-
  * The platform specific parts of the absorb() and final() for S390X.
  */
-static size_t s390x_sha3_absorb(void *vctx, const void *inp, size_t len)
+static size_t s390x_sha3_absorb(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len)
 {
-    KECCAK1600_CTX *ctx = vctx;
     size_t rem = len % ctx->block_size;
     unsigned int fc;

-    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
-        return 0;
     if (len - rem > 0) {
         fc = ctx->pad;
         fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0;
-        ctx->xof_state = XOF_STATE_ABSORB;
         s390x_kimd(inp, len - rem, fc, ctx->A);
     }
     return rem;
 }

-static int s390x_sha3_final(void *vctx, unsigned char *out, size_t outlen)
+static int s390x_sha3_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    KECCAK1600_CTX *ctx = vctx;
     unsigned int fc;

-    if (!ossl_prov_is_running())
-        return 0;
-    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
-        return 0;
     fc = ctx->pad | S390X_KLMD_DUFOP;
     fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0;
-    ctx->xof_state = XOF_STATE_FINAL;
     s390x_klmd(ctx->buf, ctx->bufsz, NULL, 0, fc, ctx->A);
     memcpy(out, ctx->A, outlen);
     return 1;
 }

-static int s390x_shake_final(void *vctx, unsigned char *out, size_t outlen)
+static int s390x_shake_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    KECCAK1600_CTX *ctx = vctx;
     unsigned int fc;

-    if (!ossl_prov_is_running())
-        return 0;
-    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
-        return 0;
     fc = ctx->pad | S390X_KLMD_DUFOP;
     fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0;
-    ctx->xof_state = XOF_STATE_FINAL;
     s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, fc, ctx->A);
     return 1;
 }

-static int s390x_shake_squeeze(void *vctx, unsigned char *out, size_t outlen)
+static int s390x_shake_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    KECCAK1600_CTX *ctx = vctx;
     unsigned int fc;
     size_t len;

-    if (!ossl_prov_is_running())
-        return 0;
-    if (ctx->xof_state == XOF_STATE_FINAL)
-        return 0;
     /*
      * On the first squeeze call, finish the absorb process (incl. padding).
      */
     if (ctx->xof_state != XOF_STATE_SQUEEZE) {
         fc = ctx->pad;
         fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0;
-        ctx->xof_state = XOF_STATE_SQUEEZE;
         s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, fc, ctx->A);
         ctx->bufsz = outlen % ctx->block_size;
         /* reuse ctx->bufsz to count bytes squeezed from current sponge */
         return 1;
     }
-    ctx->xof_state = XOF_STATE_SQUEEZE;
     if (ctx->bufsz != 0) {
         len = ctx->block_size - ctx->bufsz;
         if (outlen < len)
@@ -278,22 +228,16 @@ static int s390x_shake_squeeze(void *vctx, unsigned char *out, size_t outlen)
     return 1;
 }

-static int s390x_keccakc_final(void *vctx, unsigned char *out, size_t outlen,
+static int s390x_keccakc_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen,
     int padding)
 {
-    KECCAK1600_CTX *ctx = vctx;
     size_t bsz = ctx->block_size;
     size_t num = ctx->bufsz;
     size_t needed = outlen;
     unsigned int fc;

-    if (!ossl_prov_is_running())
-        return 0;
-    if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB))
-        return 0;
     fc = ctx->pad;
     fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0;
-    ctx->xof_state = XOF_STATE_FINAL;
     if (outlen == 0)
         return 1;
     memset(ctx->buf + num, 0, bsz - num);
@@ -310,27 +254,22 @@ static int s390x_keccakc_final(void *vctx, unsigned char *out, size_t outlen,
     return 1;
 }

-static int s390x_keccak_final(void *vctx, unsigned char *out, size_t outlen)
+static int s390x_keccak_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    return s390x_keccakc_final(vctx, out, outlen, 0x01);
+    return s390x_keccakc_final(ctx, out, outlen, 0x01);
 }

-static int s390x_cshake_keccak_final(void *vctx, unsigned char *out, size_t outlen)
+static int s390x_cshake_keccak_final(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    return s390x_keccakc_final(vctx, out, outlen, 0x04);
+    return s390x_keccakc_final(ctx, out, outlen, 0x04);
 }

-static int s390x_keccakc_squeeze(void *vctx, unsigned char *out, size_t outlen,
+static int s390x_keccakc_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen,
     int padding)
 {
-    KECCAK1600_CTX *ctx = vctx;
     size_t len;
     unsigned int fc;

-    if (!ossl_prov_is_running())
-        return 0;
-    if (ctx->xof_state == XOF_STATE_FINAL)
-        return 0;
     /*
      * On the first squeeze call, finish the absorb process
      * by adding the trailing padding and then doing
@@ -358,7 +297,6 @@ static int s390x_keccakc_squeeze(void *vctx, unsigned char *out, size_t outlen,
         if (ctx->bufsz == ctx->block_size)
             ctx->bufsz = 0;
     }
-    ctx->xof_state = XOF_STATE_SQUEEZE;
     if (outlen == 0)
         return 1;
     s390x_klmd(NULL, 0, out, outlen, ctx->pad | S390X_KLMD_PS, ctx->A);
@@ -367,14 +305,14 @@ static int s390x_keccakc_squeeze(void *vctx, unsigned char *out, size_t outlen,
     return 1;
 }

-static int s390x_keccak_squeeze(void *vctx, unsigned char *out, size_t outlen)
+static int s390x_keccak_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    return s390x_keccakc_squeeze(vctx, out, outlen, KECCAK_PADDING);
+    return s390x_keccakc_squeeze(ctx, out, outlen, KECCAK_PADDING);
 }

-static int s390x_cshake_keccak_squeeze(void *vctx, unsigned char *out, size_t outlen)
+static int s390x_cshake_keccak_squeeze(KECCAK1600_CTX *ctx, unsigned char *out, size_t outlen)
 {
-    return s390x_keccakc_squeeze(vctx, out, outlen, CSHAKE_KECCAK_PADDING);
+    return s390x_keccakc_squeeze(ctx, out, outlen, CSHAKE_KECCAK_PADDING);
 }

 static PROV_SHA3_METHOD sha3_s390x_md = {
@@ -433,21 +371,20 @@ size_t SHA3_absorb_cext(uint64_t A[5][5], const unsigned char *inp, size_t len,
 /*-
  * Hardware-assisted ARMv8.2 SHA3 extension version of the absorb()
  */
-static size_t armsha3_sha3_absorb(void *vctx, const void *inp, size_t len)
+static size_t armsha3_sha3_absorb(KECCAK1600_CTX *ctx, const unsigned char *inp, size_t len)
 {
-    KECCAK1600_CTX *ctx = vctx;
-
     return SHA3_absorb_cext(ctx->A, inp, len, ctx->block_size);
 }

 static PROV_SHA3_METHOD sha3_ARMSHA3_md = {
     armsha3_sha3_absorb,
-    generic_sha3_final
+    ossl_sha3_final_default,
+    NULL
 };
 static PROV_SHA3_METHOD shake_ARMSHA3_md = {
     armsha3_sha3_absorb,
-    generic_sha3_final,
-    generic_sha3_squeeze
+    ossl_sha3_final_default,
+    ossl_shake_squeeze_default
 };
 #define SHAKE_SET_MD(uname, typ)                              \
     if (OPENSSL_armcap_P & ARMV8_HAVE_SHA3_AND_WORTH_USING) { \