Commit c1f74671ae for openssl.org

commit c1f74671ae2f842f550b493cfc914113fe3ced87
Author: Viktor Dukhovni <openssl-users@dukhovni.org>
Date:   Thu Apr 16 18:28:33 2026 +1000

    Add valgrind CT support to ML-DSA

    Also slightly refactor the ML-KEM version to share the necesasry
    defines, and add a daily CI run to check both (presently, for just some
    platforms with known working valgrind support).

    Reviewed-by: Nikola Pajkovsky <nikolap@openssl.org>
    Reviewed-by: Shane Lontis <shane.lontis@oracle.com>
    MergeDate: Wed Apr 22 07:55:14 2026
    (Merged from https://github.com/openssl/openssl/pull/30863)

diff --git a/.github/workflows/ct-validation-daily.yml b/.github/workflows/ct-validation-daily.yml
new file mode 100644
index 0000000000..5ae384e5a5
--- /dev/null
+++ b/.github/workflows/ct-validation-daily.yml
@@ -0,0 +1,89 @@
+# Copyright 2026 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+name: Constant-time validation (daily)
+# Verifies that ML-KEM and ML-DSA signing do not branch on secret data.
+#
+# The library is built with enable-ct-validation, which defines
+# OPENSSL_CONSTANT_TIME_VALIDATION and causes secret regions to be marked
+# as "uninitialised" from Valgrind memcheck's perspective.  The tests are
+# then run via "make test" with OSSL_VALGRIND_CT=yes, which makes
+# OpenSSL::Test::test() wrap every test binary with:
+#
+#   valgrind --tool=memcheck --track-origins=yes --error-exitcode=1
+#
+# The wrapper chain (util/wrap.pl -> util/shlib_wrap.sh) is preserved, so
+# LD_LIBRARY_PATH is set correctly for shared-library builds.  Any
+# control-flow branch or memory index that depends on secret data causes
+# valgrind to exit with code 1, which propagates back through the test
+# harness and fails the job.
+#
+# See include/internal/constant_time.h for the CONSTTIME_SECRET /
+# CONSTTIME_DECLASSIFY macro documentation.
+#
+# Architecture note: Valgrind's memcheck supports x86_64, aarch64, s390x,
+# and ppc64 well.  GitHub Actions provides hosted runners for x86_64
+# (ubuntu-latest) and aarch64 (ubuntu-24.04-arm); we test both here.
+# s390x and ppc64 runners are not available in the public GitHub Actions
+# fleet, so they are not included.
+#
+# Package note: on Debian/Ubuntu the valgrind package bundles the C headers
+# (valgrind/memcheck.h) — no separate -dev package is required.  On Fedora
+# the headers are in valgrind-devel; see Configure for the full list.
+
+on:
+  schedule:
+    - cron: '45 03 * * *'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  ct-validation:
+    if: github.repository == 'openssl/openssl'
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: linux-x86_64
+            runs-on: ubuntu-latest
+          - name: linux-aarch64
+            runs-on: ubuntu-24.04-arm
+
+    name: CT validation (${{ matrix.name }})
+    runs-on: ${{ matrix.runs-on }}
+
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Install valgrind
+        # On Debian/Ubuntu the main 'valgrind' package includes
+        # /usr/include/valgrind/memcheck.h — no separate -dev package needed.
+        run: |
+          sudo apt-get -y update
+          sudo apt-get -y install valgrind
+
+      - name: Configure with CT validation enabled
+        run: |
+          ./Configure enable-ct-validation
+          ./configdata.pm --dump
+
+      - name: Build
+        run: make -j$(nproc)
+
+      - name: Run ML-KEM and ML-DSA CT validation under Valgrind
+        # OSSL_VALGRIND_CT=yes causes OpenSSL::Test::test() to wrap each
+        # test binary with valgrind --track-origins=yes --error-exitcode=1.
+        # util/wrap.pl -> util/shlib_wrap.sh sets LD_LIBRARY_PATH first, so
+        # the shared libraries are found correctly.
+        run: |
+          make TESTS="test_internal_ml_kem test_internal_ml_dsa" \
+               OSSL_VALGRIND_CT=yes \
+               test
diff --git a/Configure b/Configure
index 38b7399de0..4cd67bb41e 100755
--- a/Configure
+++ b/Configure
@@ -518,6 +518,7 @@ my @disablables_features = (
     "crypto-mdebug",
     "allocfail-tests",
     "ct",
+    "ct-validation",
     "default-thread-pool",
     "demos",
     "h3demo",
@@ -620,6 +621,7 @@ our %disabled = ( # "what"         => "comment"
                   "buildtest-c++"       => "default",
                   "crypto-mdebug"       => "default",
                   "allocfail-tests"     => "default",
+                  "ct-validation"       => "default",
                   "demos"               => "default",
                   "h3demo"              => "default",
                   "hqinterop"           => "default",
@@ -1674,6 +1676,46 @@ unless ($disabled{"fuzz-libfuzzer"} && $disabled{"fuzz-afl"}
     push @{$config{cflags}}, "-fno-omit-frame-pointer", "-g";
     push @{$config{cxxflags}}, "-fno-omit-frame-pointer", "-g" if $config{CXX};
 }
+
+# Valgrind-based constant-time validation: marks secret data as "undefined"
+# to Valgrind's memcheck tool, so that any control flow or memory indexing
+# that depends on secret data is flagged as an error.  Requires valgrind
+# headers at build time and running the tests under valgrind at test time.
+# Use |make TESTS="test_internal_ml_kem test_internal_ml_dsa" test| under
+# valgrind after building with this option.
+#
+# Package names for the required valgrind headers:
+#   Debian/Ubuntu  : valgrind          (headers bundled in the main package)
+#   Fedora/RHEL    : valgrind-devel
+#   Alpine         : valgrind-dev
+#   Arch Linux     : valgrind
+unless ($disabled{"ct-validation"}) {
+    # Probe for <valgrind/memcheck.h> so that we give a clear error here
+    # rather than a cryptic compile failure inside constant_time.h later.
+    my $cc = ($config{CROSS_COMPILE} // "").$config{CC};
+    my $probe_src = "ct_valgrind_probe_$$.c";
+    my $probe_obj = "ct_valgrind_probe_$$.o";
+    open(my $fh, ">", $probe_src)
+        or die "Cannot write probe file '$probe_src': $!";
+    print $fh "#include <valgrind/memcheck.h>\n";
+    close($fh);
+    my $probe_ok = (system("$cc -c -o $probe_obj $probe_src 2>/dev/null") == 0);
+    unlink($probe_src, $probe_obj);
+    if (!$probe_ok) {
+        die <<EOT;
+
+***** enable-ct-validation requires <valgrind/memcheck.h> at build time,
+***** but the header was not found by '$cc'.
+*****
+***** Install the appropriate package and re-run Configure:
+*****   Debian/Ubuntu : sudo apt-get install valgrind
+*****   Fedora/RHEL   : sudo dnf install valgrind-devel
+*****   Alpine        : sudo apk add valgrind-dev
+*****   Arch Linux    : sudo pacman -S valgrind
+EOT
+    }
+    push @{$config{openssl_feature_defines}}, "OPENSSL_CONSTANT_TIME_VALIDATION";
+}
 #
 # Platform fix-ups
 #
diff --git a/crypto/ml_dsa/ml_dsa_sample.c b/crypto/ml_dsa/ml_dsa_sample.c
index 6fae4c4a0d..5d9dc84a54 100644
--- a/crypto/ml_dsa/ml_dsa_sample.c
+++ b/crypto/ml_dsa/ml_dsa_sample.c
@@ -12,6 +12,7 @@
 #include "ml_dsa_vector.h"
 #include "ml_dsa_matrix.h"
 #include "ml_dsa_hash.h"
+#include "internal/constant_time.h"
 #include "internal/sha3.h"
 #include "internal/packet.h"

@@ -325,6 +326,23 @@ int ossl_ml_dsa_poly_sample_in_ball(POLY *out_c, const uint8_t *seed, int seed_l
      */
     OPENSSL_load_u64_le(&signs, block);

+    /*
+     * SampleInBall implements a Fisher-Yates shuffle whose rejection-sampling
+     * inner loop and data-dependent array index unavoidably leak the structure
+     * of the challenge polynomial via memory-access pattern and branch timing.
+     * This is safe: c_tilde = H(mu ‖ w1) is the Fiat-Shamir commitment and is
+     * published in the accepted signature, so the SHAKE bytes that build c are
+     * effectively public.  See the BoringSSL design discussion at
+     * https://boringssl-review.googlesource.com/c/boringssl/+/67747/comment/8d8f01ac_70af3f21/
+     *
+     * The first 8 bytes (the sign bits loaded into |signs| above) are left
+     * tainted: they determine only the ±1 values written into c, which flow
+     * into the CT arithmetic of cs1/cs2/ct0 alongside the already-tainted
+     * secret polynomials and cause no spurious violations there.
+     * Only the rejection-sampling bytes need to be declassified.
+     */
+    CONSTTIME_DECLASSIFY(block + offset, sizeof(block) - offset);
+
     poly_zero(out_c);

     /* Loop tau times */
@@ -337,6 +355,8 @@ int ossl_ml_dsa_poly_sample_in_ball(POLY *out_c, const uint8_t *seed, int seed_l
                 /* squeeze another block if the bytes from block have been used */
                 if (!EVP_DigestSqueeze(h_ctx, block, sizeof(block)))
                     return 0;
+                /* See comment above for why the block is declassified. */
+                CONSTTIME_DECLASSIFY(block, sizeof(block));
                 offset = 0;
             }

diff --git a/crypto/ml_dsa/ml_dsa_sign.c b/crypto/ml_dsa/ml_dsa_sign.c
index 9fae96b067..51c2709ddb 100644
--- a/crypto/ml_dsa/ml_dsa_sign.c
+++ b/crypto/ml_dsa/ml_dsa_sign.c
@@ -14,6 +14,7 @@
 #include <openssl/err.h>
 #include <openssl/proverr.h>
 #include "internal/common.h"
+#include "internal/constant_time.h"
 #include "ml_dsa_local.h"
 #include "ml_dsa_key.h"
 #include "ml_dsa_matrix.h"
@@ -220,9 +221,26 @@ static int ml_dsa_sign_internal(const ML_DSA_KEY *priv,
     signature_init(&sig, p, k, p + k, l, c_tilde, c_tilde_len);
     /* End of the allocated blob setup */

+    /*
+     * Mark the private key material as secret before we start computing with
+     * it.  Any control-flow branch or memory-index that transitively depends
+     * on these bytes will be flagged by Valgrind when the library is built
+     * with enable-ct-validation.
+     */
+    CONSTTIME_SECRET(priv->K, sizeof(priv->K));
+    CONSTTIME_SECRET_VECTOR(priv->s1);
+    CONSTTIME_SECRET_VECTOR(priv->s2);
+    CONSTTIME_SECRET_VECTOR(priv->t0);
+
     if (!matrix_expand_A(md_ctx, priv->shake128_md, priv->rho, &a_ntt))
         goto err;

+    /*
+     * rho_prime is derived from the secret K and must remain tainted
+     * throughout the rejection loop: knowing it would let an attacker
+     * reconstruct every mask y and recover c*s1 = z - y from the final
+     * signature.  Do NOT declassify it.
+     */
     if (!shake_xof_3(md_ctx, priv->shake256_md, priv->K, sizeof(priv->K),
             rnd, rnd_len, mu, mu_len,
             rho_prime, sizeof(rho_prime)))
@@ -276,12 +294,17 @@ static int ml_dsa_sign_internal(const ML_DSA_KEY *priv,
         vector_low_bits(r0, gamma2, r0);

         /*
-         * Leaking that the signature is rejected is fine as the next attempt at a
-         * signature will be (indistinguishable from) independent of this one.
+         * Leaking that the signature is rejected is fine: the next attempt
+         * is (indistinguishable from) independent of this one, so an
+         * observer learns nothing about the secret key beyond the number of
+         * iterations, which is itself safe to reveal.
+         * Declassify the bound-check output so that Valgrind does not flag
+         * these intentional leaks.
          */
         z_max = vector_max(&sig.z);
         r0_max = vector_max_signed(r0);
-        if (value_barrier_32(constant_time_ge(z_max, gamma1 - params->beta)
+        if (constant_time_declassify_u32(
+                constant_time_ge(z_max, gamma1 - params->beta)
                 | constant_time_ge(r0_max, gamma2 - params->beta)))
             continue;

@@ -292,9 +315,32 @@ static int ml_dsa_sign_internal(const ML_DSA_KEY *priv,
         ct0_max = vector_max(ct0);
         h_ones = (uint32_t)vector_count_ones(&sig.hint);
         /* Same reasoning applies to the leak as above */
-        if (value_barrier_32(constant_time_ge(ct0_max, gamma2)
+        if (constant_time_declassify_u32(
+                constant_time_ge(ct0_max, gamma2)
                 | constant_time_lt(params->omega, h_ones)))
             continue;
+
+        /*
+         * The iteration has passed both rejection tests: the signature is
+         * accepted.  Declassify all three public outputs before encoding.
+         *
+         * sig.z and sig.hint were computed from secret key material (s1,
+         * s2, t0) and carry taint, but the rejection checks above have
+         * verified they lie within the ranges required by the security
+         * proof, so they reveal nothing about the key.
+         *
+         * c_tilde = H(mu || w1) carries taint that propagated from the
+         * secret rho_prime through y → w → w1.  It is the Fiat-Shamir
+         * challenge commitment and is published as part of the signature.
+         * We defer its declassification to here (rather than immediately
+         * after the SHAKE call) so that Valgrind can check that
+         * poly_sample_in_ball_ntt and the NTT challenge arithmetic are
+         * data-oblivious with respect to their tainted inputs.
+         */
+        CONSTTIME_DECLASSIFY(c_tilde, c_tilde_len);
+        CONSTTIME_DECLASSIFY_VECTOR(sig.z);
+        CONSTTIME_DECLASSIFY_VECTOR(sig.hint);
+
         ret = ossl_ml_dsa_sig_encode(&sig, params, out_sig);
         break;
     }
@@ -302,6 +348,17 @@ err:
     EVP_MD_CTX_free(md_ctx);
     OPENSSL_clear_free(alloc, alloc_len);
     OPENSSL_cleanse(rho_prime, sizeof(rho_prime));
+    /*
+     * Declassify the private key material before returning.  The key struct
+     * is not owned here, so we do not free it, but we must remove the
+     * "secret" taint so that the caller does not inherit spurious Valgrind
+     * "uninitialised" state.  The polynomial data in |alloc| was already
+     * zeroed and freed above; rho_prime is stack-allocated and cleansed.
+     */
+    CONSTTIME_DECLASSIFY(priv->K, sizeof(priv->K));
+    CONSTTIME_DECLASSIFY_VECTOR(priv->s1);
+    CONSTTIME_DECLASSIFY_VECTOR(priv->s2);
+    CONSTTIME_DECLASSIFY_VECTOR(priv->t0);
     return ret;
 }

diff --git a/crypto/ml_dsa/ml_dsa_vector.h b/crypto/ml_dsa/ml_dsa_vector.h
index b68a4d95d0..0693eb6e3c 100644
--- a/crypto/ml_dsa/ml_dsa_vector.h
+++ b/crypto/ml_dsa/ml_dsa_vector.h
@@ -15,6 +15,11 @@ struct vector_st {
     size_t num_poly;
 };

+#define CONSTTIME_SECRET_VECTOR(v) \
+    CONSTTIME_SECRET(v.poly, v.num_poly * sizeof(POLY));
+#define CONSTTIME_DECLASSIFY_VECTOR(v) \
+    CONSTTIME_DECLASSIFY(v.poly, v.num_poly * sizeof(POLY));
+
 /**
  * @brief Initialize a Vector object.
  *
diff --git a/crypto/ml_kem/ml_kem.c b/crypto/ml_kem/ml_kem.c
index 04ba04cdc9..97cc3cad7d 100644
--- a/crypto/ml_kem/ml_kem.c
+++ b/crypto/ml_kem/ml_kem.c
@@ -15,10 +15,6 @@
 #include "internal/constant_time.h"
 #include "internal/sha3.h"

-#if defined(OPENSSL_CONSTANT_TIME_VALIDATION)
-#include <valgrind/memcheck.h>
-#endif
-
 #if ML_KEM_SEED_BYTES != ML_KEM_SHARED_SECRET_BYTES + ML_KEM_RANDOM_BYTES
 #error "ML-KEM keygen seed length != shared secret + random bytes length"
 #endif
@@ -47,29 +43,6 @@
 #define SHAKE128_BLOCKSIZE SHA3_BLOCKSIZE(128)
 #endif

-/*
- * Return whether a value that can only be 0 or 1 is non-zero, in constant time
- * in practice!  The return value is a mask that is all ones if true, and all
- * zeros otherwise (twos-complement arithmetic assumed for unsigned values).
- *
- * Although this is used in constant-time selects, we omit a value barrier
- * here.  Value barriers impede auto-vectorization (likely because it forces
- * the value to transit through a general-purpose register). On AArch64, this
- * is a difference of 2x.
- *
- * We usually add value barriers to selects because Clang turns consecutive
- * selects with the same condition into a branch instead of CMOV/CSEL. This
- * condition does not occur in Kyber, so omitting it seems to be safe so far,
- * but see |cbd_2|, |cbd_3|, where reduction needs to be specialised to the
- * sign of the input, rather than adding |q| in advance, and using the generic
- * |reduce_once|.  (David Benjamin, Chromium)
- */
-#if 0
-#define constish_time_non_zero(b) (~constant_time_is_zero(b));
-#else
-#define constish_time_non_zero(b) (0u - (b))
-#endif
-
 /*
  * The scalar rejection-sampling buffer size needs to be a multiple of 12, but
  * is otherwise arbitrary, the preferred block size matches the internal buffer
@@ -147,30 +120,6 @@ static void scalar_encode(uint8_t *out, const scalar *s, int bits);
 #define V_SCALAR_BYTES(b) ((DEGREE / 8) * ML_KEM_##b##_DV)
 #define CTEXT_BYTES(b) (U_VECTOR_BYTES(b) + V_SCALAR_BYTES(b))

-#if defined(OPENSSL_CONSTANT_TIME_VALIDATION)
-
-/*
- * CONSTTIME_SECRET takes a pointer and a number of bytes and marks that region
- * of memory as secret. Secret data is tracked as it flows to registers and
- * other parts of a memory. If secret data is used as a condition for a branch,
- * or as a memory index, it will trigger warnings in valgrind.
- */
-#define CONSTTIME_SECRET(ptr, len) VALGRIND_MAKE_MEM_UNDEFINED(ptr, len)
-
-/*
- * CONSTTIME_DECLASSIFY takes a pointer and a number of bytes and marks that
- * region of memory as public. Public data is not subject to constant-time
- * rules.
- */
-#define CONSTTIME_DECLASSIFY(ptr, len) VALGRIND_MAKE_MEM_DEFINED(ptr, len)
-
-#else
-
-#define CONSTTIME_SECRET(ptr, len)
-#define CONSTTIME_DECLASSIFY(ptr, len)
-
-#endif
-
 /*
  * Indices of slots in the vinfo tables below
  */
@@ -512,13 +461,13 @@ static void ml_kem_ntt_init(void)
  * reduce_once reduces 0 <= x < 2*kPrime, mod kPrime.
  *
  * Subtract |q| if the input is larger, without exposing a side-channel,
- * avoiding the "clangover" attack.  See |constish_time_non_zero| for a
+ * avoiding the "clangover" attack.  See |constish_time_true| for a
  * discussion on why the value barrier is by default omitted.
  */
 static __owur uint16_t reduce_once(uint16_t x)
 {
     const uint16_t subtracted = x - kPrime;
-    uint16_t mask = constish_time_non_zero(subtracted >> 15);
+    uint16_t mask = constish_time_true(subtracted >> 15);

     return (mask & x) | (~mask & subtracted);
 }
@@ -818,11 +767,11 @@ scalar_decode_decompress_add(scalar *out, const uint8_t in[DEGREE / 8])

     /*
      * Add |half_q_plus_1| if the bit is set, without exposing a side-channel,
-     * avoiding the "clangover" attack.  See |constish_time_non_zero| for a
+     * avoiding the "clangover" attack.  See |constish_time_true| for a
      * discussion on why the value barrier is by default omitted.
      */
 #define decode_decompress_add_bit                        \
-    mask = constish_time_non_zero(bit0(b));              \
+    mask = constish_time_true(bit0(b));                  \
     *curr = reduce_once(*curr + (mask & half_q_plus_1)); \
     curr++;                                              \
     b >>= 1
@@ -1073,20 +1022,20 @@ static __owur int cbd_2(scalar *out, uint8_t in[ML_KEM_RANDOM_BYTES + 1],
         b = *r++;

         /*
-         * Add |kPrime| if |value| underflowed.  See |constish_time_non_zero|
-         * for a discussion on why the value barrier is by default omitted.
-         * While this could have been written reduce_once(value + kPrime), this
-         * is one extra addition and small range of |value| tempts some
-         * versions of Clang to emit a branch.
+         * Add |kPrime| if |value| underflowed.  See |constish_time_true| for
+         * a discussion on why the value barrier is by default omitted.  While
+         * this could have been written reduce_once(value + kPrime), this is
+         * one extra addition and small range of |value| tempts some versions
+         * of Clang to emit a branch.
          */
         value = bit0(b) + bitn(1, b);
         value -= bitn(2, b) + bitn(3, b);
-        mask = constish_time_non_zero(value >> 15);
+        mask = constish_time_true(value >> 15);
         *curr++ = value + (kPrime & mask);

         value = bitn(4, b) + bitn(5, b);
         value -= bitn(6, b) + bitn(7, b);
-        mask = constish_time_non_zero(value >> 15);
+        mask = constish_time_true(value >> 15);
         *curr++ = value + (kPrime & mask);
     } while (curr < end);
     return 1;
@@ -1115,7 +1064,7 @@ static __owur int cbd_3(scalar *out, uint8_t in[ML_KEM_RANDOM_BYTES + 1],
         b3 = *r++;

         /*
-         * Add |kPrime| if |value| underflowed.  See |constish_time_non_zero|
+         * Add |kPrime| if |value| underflowed.  See |constish_time_true|
          * for a discussion on why the value barrier is by default omitted.
          * While this could have been written reduce_once(value + kPrime), this
          * is one extra addition and small range of |value| tempts some
@@ -1123,22 +1072,22 @@ static __owur int cbd_3(scalar *out, uint8_t in[ML_KEM_RANDOM_BYTES + 1],
          */
         value = bit0(b1) + bitn(1, b1) + bitn(2, b1);
         value -= bitn(3, b1) + bitn(4, b1) + bitn(5, b1);
-        mask = constish_time_non_zero(value >> 15);
+        mask = constish_time_true(value >> 15);
         *curr++ = value + (kPrime & mask);

         value = bitn(6, b1) + bitn(7, b1) + bit0(b2);
         value -= bitn(1, b2) + bitn(2, b2) + bitn(3, b2);
-        mask = constish_time_non_zero(value >> 15);
+        mask = constish_time_true(value >> 15);
         *curr++ = value + (kPrime & mask);

         value = bitn(4, b2) + bitn(5, b2) + bitn(6, b2);
         value -= bitn(7, b2) + bit0(b3) + bitn(1, b3);
-        mask = constish_time_non_zero(value >> 15);
+        mask = constish_time_true(value >> 15);
         *curr++ = value + (kPrime & mask);

         value = bitn(2, b3) + bitn(3, b3) + bitn(4, b3);
         value -= bitn(5, b3) + bitn(6, b3) + bitn(7, b3);
-        mask = constish_time_non_zero(value >> 15);
+        mask = constish_time_true(value >> 15);
         *curr++ = value + (kPrime & mask);
     } while (curr < end);
     return 1;
@@ -2079,7 +2028,7 @@ int ossl_ml_kem_decap(uint8_t *shared_secret, size_t slen,
     EVP_MD_CTX *mdctx;
     int ret = 0;
 #if defined(OPENSSL_CONSTANT_TIME_VALIDATION)
-    int classify_bytes = 2 * sizeof(scalar) + ML_KEM_RANDOM_BYTES;
+    int classify_bytes;
 #endif

     /* Need a private key here */
@@ -2098,6 +2047,9 @@ int ossl_ml_kem_decap(uint8_t *shared_secret, size_t slen,
      * Data derived from |s| and |z| defaults secret, and to avoid side-channel
      * leaks should not influence control flow.
      */
+#if defined(OPENSSL_CONSTANT_TIME_VALIDATION)
+    classify_bytes = vinfo->rank * sizeof(scalar) + ML_KEM_RANDOM_BYTES;
+#endif
     CONSTTIME_SECRET(key->s, classify_bytes);

     /*-
diff --git a/include/internal/constant_time.h b/include/internal/constant_time.h
index c27f52a153..ddb15d7b6f 100644
--- a/include/internal/constant_time.h
+++ b/include/internal/constant_time.h
@@ -477,4 +477,73 @@ static ossl_inline void constant_time_lookup(void *out,
  */
 void err_clear_last_constant_time(int clear);

+/*
+ * Return whether a value that can only be 0 or 1 is non-zero, in constant time
+ * in practice!  The return value is a mask that is all ones if true, and all
+ * zeros otherwise (twos-complement arithmetic assumed for unsigned values).
+ *
+ * Although this is used in constant-time selects, we omit a value barrier
+ * here.  Value barriers impede auto-vectorization (likely because it forces
+ * the value to transit through a general-purpose register). On AArch64, this
+ * is a difference of 2x.
+ *
+ * We usually add value barriers to selects because Clang turns consecutive
+ * selects with the same condition into a branch instead of CMOV/CSEL.
+ * Omitting it seems to be safe so far (David Benjamin, Chromium).  This is
+ * used in the |reduce_once| functions in ML-KEM and ML-DSA in BoringSSL, and
+ * is now also used in OpenSSL.  Any use in new contexts requires careful prior
+ * evaluation and should otherwise be avoided.
+ */
+#if 0
+#define constish_time_true(b) (~constant_time_is_zero(b));
+#else
+#define constish_time_true(b) (0u - (b))
+#endif
+
+/*
+ * Valgrind-based constant-time validation helpers.
+ *
+ * CONSTTIME_SECRET marks a region of memory as secret.  Valgrind's memcheck
+ * tool will then flag any control-flow branch or memory index that depends on
+ * those bytes as an error, because the branch/index would vary with the secret
+ * and could therefore leak it via a timing side-channel.
+ *
+ * CONSTTIME_DECLASSIFY marks a region as no longer secret.  Call this:
+ *   - on values that are derived from, but do not expose, secret data (e.g.
+ *     the rejection decision in ML-DSA, or the public outputs of a KEM), and
+ *   - on all secret regions before returning from a function, so that callers
+ *     do not inherit spurious "uninitialised" state from Valgrind's perspective.
+ *
+ * Both macros are no-ops unless the library is built with
+ * enable-ct-validation (which defines OPENSSL_CONSTANT_TIME_VALIDATION and
+ * requires valgrind headers at build time).
+ */
+#if defined(OPENSSL_CONSTANT_TIME_VALIDATION)
+#include <valgrind/memcheck.h>
+#define CONSTTIME_SECRET(ptr, len) VALGRIND_MAKE_MEM_UNDEFINED((ptr), (len))
+#define CONSTTIME_DECLASSIFY(ptr, len) VALGRIND_MAKE_MEM_DEFINED((ptr), (len))
+#else
+#define CONSTTIME_SECRET(ptr, len)
+#define CONSTTIME_DECLASSIFY(ptr, len)
+#endif
+
+static ossl_inline uint32_t constant_time_declassify_u32(uint32_t v)
+{
+    /*
+     * Return |v| through a value barrier to be safe. Valgrind-based
+     * constant-time validation is partly to check the compiler has not undone
+     * any constant-time work. Any place |OPENSSL_CONSTANT_TIME_VALIDATION|
+     * influences optimizations, this validation is inaccurate.
+     *
+     * However, by sending pointers through valgrind, we likely inhibit escape
+     * analysis. On local variables, particularly booleans, we likely
+     * significantly impact optimizations.
+     *
+     * Thus, to be safe, stick a value barrier, in hopes of comparably
+     * inhibiting compiler analysis.
+     */
+    CONSTTIME_DECLASSIFY(&v, sizeof(v));
+    return value_barrier_32(v);
+}
+
 #endif /* OSSL_INTERNAL_CONSTANT_TIME_H */
diff --git a/test/build.info b/test/build.info
index 6255b93e40..678139e453 100644
--- a/test/build.info
+++ b/test/build.info
@@ -328,6 +328,11 @@ IF[{- !$disabled{tests} -}]
     SOURCE[ml_dsa_test]=ml_dsa_test.c
     INCLUDE[ml_dsa_test]=../include ../apps/include
     DEPEND[ml_dsa_test]=../libcrypto.a libtestutil.a
+
+    PROGRAMS{noinst}=ml_dsa_internal_test
+    SOURCE[ml_dsa_internal_test]=ml_dsa_internal_test.c
+    INCLUDE[ml_dsa_internal_test]=../include ../apps/include
+    DEPEND[ml_dsa_internal_test]=../libcrypto.a libtestutil.a
   ENDIF

   PROGRAMS{noinst}=aeswrap_test
diff --git a/test/ml_dsa_internal_test.c b/test/ml_dsa_internal_test.c
new file mode 100644
index 0000000000..b992a2301b
--- /dev/null
+++ b/test/ml_dsa_internal_test.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2024-2026 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+/*
+ * Internal ML-DSA test exercising the low-level sign/verify path directly.
+ *
+ * Primary purpose: constant-time validation.  When the library is built with
+ * enable-ct-validation and the test is run under Valgrind, any control-flow
+ * branch or memory index that depends on secret key material (other than the
+ * explicitly declassified rejection decisions) will be reported as an error.
+ *
+ * Secondary purpose: a quick sanity check that sign→verify round-trips for
+ * all three parameter sets using a fully deterministic key and message.
+ */
+
+#include <string.h>
+#include <openssl/evp.h>
+#include "crypto/ml_dsa.h"
+#include "testutil.h"
+
+/* Fixed 32-byte seed used for all three parameter-set tests. */
+static const uint8_t test_seed[ML_DSA_SEED_BYTES] = {
+    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+    0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+    0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20
+};
+
+/* A short, fixed test message. */
+static const uint8_t test_msg[] = "ML-DSA constant-time validation test";
+
+/*
+ * Exercise keygen + sign + verify for one ML-DSA parameter set.
+ *
+ * The sign call uses rnd=NULL (deterministic mode) so the output is fully
+ * determined by the seed and message.  Correctness against ACVP test vectors
+ * is verified separately in ml_dsa_test.c; here we only check that the
+ * round-trip succeeds, giving Valgrind something to instrument.
+ */
+static int test_sign_verify(int evp_type)
+{
+    ML_DSA_KEY *key = NULL;
+    uint8_t *sig = NULL;
+    size_t sig_len = 0;
+    const ML_DSA_PARAMS *params;
+    int ret = 0;
+
+    if (!TEST_ptr(key = ossl_ml_dsa_key_new(NULL, NULL, evp_type)))
+        goto err;
+    if (!TEST_true(ossl_ml_dsa_key_fetch_digests(key, NULL)))
+        goto err;
+    if (!TEST_ptr(params = ossl_ml_dsa_key_params(key)))
+        goto err;
+
+    /* Load the fixed seed and expand into a full key pair. */
+    if (!TEST_true(ossl_ml_dsa_set_prekey(key, ML_DSA_KEY_PREFER_SEED,
+            0, test_seed, sizeof(test_seed),
+            NULL, 0)))
+        goto err;
+    if (!TEST_true(ossl_ml_dsa_generate_key(key)))
+        goto err;
+
+    sig_len = params->sig_len;
+    if (!TEST_ptr(sig = OPENSSL_malloc(sig_len)))
+        goto err;
+
+    /*
+     * Sign deterministically (rnd=NULL).  This exercises the rejection loop
+     * under Valgrind without relying on external randomness.
+     */
+    if (!TEST_true(ossl_ml_dsa_sign(key,
+            0 /* msg_is_mu */,
+            test_msg, sizeof(test_msg) - 1,
+            NULL, 0 /* no context */,
+            NULL, 0 /* deterministic */,
+            1 /* encode */,
+            sig, &sig_len, params->sig_len)))
+        goto err;
+    if (!TEST_size_t_eq(sig_len, params->sig_len))
+        goto err;
+
+    /* Verify the signature we just produced. */
+    if (!TEST_true(ossl_ml_dsa_verify(key,
+            0 /* msg_is_mu */,
+            test_msg, sizeof(test_msg) - 1,
+            NULL, 0 /* no context */,
+            1 /* encode */,
+            sig, sig_len)))
+        goto err;
+
+    ret = 1;
+err:
+    ossl_ml_dsa_key_free(key);
+    OPENSSL_free(sig);
+    return ret;
+}
+
+static int test_ml_dsa_44(void) { return test_sign_verify(EVP_PKEY_ML_DSA_44); }
+static int test_ml_dsa_65(void) { return test_sign_verify(EVP_PKEY_ML_DSA_65); }
+static int test_ml_dsa_87(void) { return test_sign_verify(EVP_PKEY_ML_DSA_87); }
+
+int setup_tests(void)
+{
+    ADD_TEST(test_ml_dsa_44);
+    ADD_TEST(test_ml_dsa_65);
+    ADD_TEST(test_ml_dsa_87);
+    return 1;
+}
diff --git a/test/recipes/03-test_internal_ml_dsa.t b/test/recipes/03-test_internal_ml_dsa.t
new file mode 100644
index 0000000000..8cdf917ed9
--- /dev/null
+++ b/test/recipes/03-test_internal_ml_dsa.t
@@ -0,0 +1,18 @@
+#! /usr/bin/env perl
+# Copyright 2024-2026 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+use OpenSSL::Test;
+use OpenSSL::Test::Utils;
+use OpenSSL::Test qw/:DEFAULT srctop_file/;
+
+setup("ml_dsa_internal_test");
+plan skip_all => 'ML-DSA is not supported in this build'
+    if disabled('ml-dsa');
+plan tests => 1;
+
+ok(run(test(["ml_dsa_internal_test"])));
diff --git a/util/perl/OpenSSL/Test.pm b/util/perl/OpenSSL/Test.pm
index 4bddecf491..369d4f2b9a 100644
--- a/util/perl/OpenSSL/Test.pm
+++ b/util/perl/OpenSSL/Test.pm
@@ -365,6 +365,19 @@ sub test {
            my $srcdir = srctop_dir();
            return cmd([ "valgrind", "--leak-check=full", "--show-leak-kinds=all", "--gen-suppressions=all", "--suppressions=$srcdir/util/valgrind.suppression", "--log-file=$resultdir/valgrind.log.$idx", "--suppressions=$srcdir/util/valgrind.suppression", @prog, @cmdargs ],
                    exe_shell => $ENV{EXE_SHELL}, %opts) -> (shift);
+        } elsif (defined $ENV{OSSL_VALGRIND_CT}) {
+           # Constant-time validation mode: mark secret data as undefined and
+           # fail immediately if any branch or memory index depends on it.
+           # Unlike OSSL_USE_VALGRIND this writes to stdout (not a log file)
+           # and uses --error-exitcode so the test fails on any CT violation.
+           # Set OSSL_VALGRIND_CT=yes when building with enable-ct-validation.
+           return cmd([ "valgrind",
+                        "--tool=memcheck",
+                        "--track-origins=yes",
+                        "--error-exitcode=1",
+                        "--num-callers=20",
+                        @prog, @cmdargs ],
+                   exe_shell => $ENV{EXE_SHELL}, %opts) -> (shift);
         } else {
             return cmd([ @prog, @cmdargs ],
                    exe_shell => $ENV{EXE_SHELL}, %opts) -> (shift);