Commit a2079b82ba for openssl.org
commit a2079b82ba9b78f8a9e76c2f3387c39c0b44d28a
Author: Viktor Dukhovni <openssl-users@dukhovni.org>
Date: Thu Apr 16 21:41:07 2026 +1000
Drop value barrier from ML-DSA reduce_once
This mirrors the corresponding code in ML-KEM and works under
the same conditions/assumptions. Also adjusted related
functions with unnecessary 2-layers of constant_time selects
where one suffices (now also matching BoringSSL).
Intentionally uses the constant time instrumentation PR as its
merge-base, so to be merged after than has baked in for a few
days and shows working CT tests in daily CI runs.
Sample before/after performance pairs and percent throughput
increases for one X86_64 CPU:
keygens/s sign/s verify/s
ML-DSA-44 18728.3 6061.2 23251.6
ML-DSA-44 21077.2 7392.4 27244.3
ML-DSA-44 12.5% 22.0% 17.2%
ML-DSA-65 10084.3 3603.0 13988.6
ML-DSA-65 11197.9 4549.7 16208.4
ML-DSA-65 11.0% 26.3% 15.9%
ML-DSA-87 7184.8 2917.3 8141.0
ML-DSA-87 8132.4 3693.7 9430.7
ML-DSA-87 13.2% 26.6% 15.8%
and here's the same for an Apple silicon M2:
keygens/s sign/s verify/s
ML-DSA-44 17235.7 3099.3 15744.5
ML-DSA-44 21855.2 4907.6 22849.0
ML-DSA-44 26.8% 58.3% 45.1%
ML-DSA-65 9165.8 1908.5 10058.3
ML-DSA-65 11262.7 3069.6 14348.1
ML-DSA-65 22.9% 60.8% 42.6%
ML-DSA-87 6596.1 1563.6 6330.8
ML-DSA-87 8404.9 2584.6 8767.6
ML-DSA-87 27.4% 65.3% 38.5%
Reviewed-by: Eugene Syromiatnikov <esyr@openssl.org>
Reviewed-by: Nikola Pajkovsky <nikolap@openssl.org>
MergeDate: Thu Apr 23 13:55:05 2026
(Merged from https://github.com/openssl/openssl/pull/30864)
diff --git a/crypto/ml_dsa/ml_dsa_local.h b/crypto/ml_dsa/ml_dsa_local.h
index d4f63f7e99..bbaa6dafc7 100644
--- a/crypto/ml_dsa/ml_dsa_local.h
+++ b/crypto/ml_dsa/ml_dsa_local.h
@@ -101,20 +101,26 @@ int ossl_ml_dsa_poly_decode_expand_mask(POLY *out,
const uint8_t *in, size_t in_len,
uint32_t gamma1);
-/*
- * @brief Reduces x mod q in constant time
+/*-
+ * @brief Reduces 0 <= x < 2*q, mod q.
* i.e. return x < q ? x : x - q;
*
- * @param x Where x is assumed to be in the range 0 <= x < 2*q
+ * Subtract |q| if the input is larger, without exposing a side-channel,
+ * avoiding the "clangover" attack. See |constish_time_true| for a discussion
+ * on why the value barrier is by default omitted.
+ *
* @returns the difference in the range 0..q-1
*/
-static ossl_inline ossl_unused uint32_t reduce_once(uint32_t x)
+static ossl_inline ossl_unused __owur uint32_t reduce_once(uint32_t x)
{
- return constant_time_select_32(constant_time_lt_32(x, ML_DSA_Q), x, x - ML_DSA_Q);
+ const uint32_t subtracted = x - ML_DSA_Q;
+ uint32_t mask = constish_time_true(subtracted >> 31);
+
+ return (mask & x) | (~mask & subtracted);
}
/*
- * @brief Calculate The positive value of (a-b) mod q in constant time.
+ * @brief Calculates the positive value of (a-b) mod q in constant time.
*
* a - b mod q gives a value in the range -(q-1)..(q-1)
* By adding q we get a range of 1..(2q-1).
@@ -131,21 +137,25 @@ static ossl_inline ossl_unused uint32_t mod_sub(uint32_t a, uint32_t b)
/*
* @brief Returns the absolute value in constant time.
- * i.e. return is_positive(x) ? x : -x;
+ * i.e. return is_negative(x) ? -x : x;
*/
static ossl_inline ossl_unused uint32_t abs_signed(uint32_t x)
{
- return constant_time_select_32(constant_time_lt_32(x, 0x80000000), x, 0u - x);
+ uint32_t mask = 0u - (x >> 31);
+
+ return constant_time_select_32(mask, 0u - x, x);
}
/*
* @brief Returns the absolute value modulo q in constant time
- * i.e return x > (q - 1) / 2 ? q - x : x;
+ * i.e return x <= (q-1)/2 ? x : q - x;
*/
static ossl_inline ossl_unused uint32_t abs_mod_prime(uint32_t x)
{
- return constant_time_select_32(constant_time_lt_32(ML_DSA_Q_MINUS1_DIV2, x),
- ML_DSA_Q - x, x);
+ uint32_t mask = x - ML_DSA_Q_MINUS1_DIV2;
+
+ mask = 0u - (mask >> 31);
+ return constant_time_select_32(mask, x, ML_DSA_Q - x);
}
/*
@@ -154,7 +164,9 @@ static ossl_inline ossl_unused uint32_t abs_mod_prime(uint32_t x)
*/
static ossl_inline ossl_unused uint32_t maximum(uint32_t x, uint32_t y)
{
- return constant_time_select_int(constant_time_lt(x, y), y, x);
+ uint32_t mask = x - y;
+ mask = 0u - (mask >> 31);
+ return constant_time_select_int(mask, y, x);
}
#endif /* OSSL_CRYPTO_ML_DSA_LOCAL_H */