Commit cefa8bb0ac for openssl.org

commit cefa8bb0ac6b2a83b4e57de2b5c40ac939cf1f95
Author: Milan Broz <gmazyland@gmail.com>
Date:   Wed Mar 11 23:03:36 2026 +0100

    Optimize Windows RCU thread signalling.

    With the pthread variant, a thread truly wakes up after
    the pthread_mutex_unlock call, even if pthread_cond_signal
    is called before.

    This is not true for the Windows variant. The thread is
    woken up in WakeConditionVariable but goes back to sleep.
    Reordering (signalling thread after unlocking) should save
    some time during transitions and should be safe in this context.

    The speedup is visible on lhash_test, running on many CPUs
    (on 32 cores, a speedup from 1:40 to 1:05 minutes on test hw).

    Co-Authored-By: Claude Opus 4.6 Extended <noreply@anthropic.com>

    Signed-off-by: Milan Broz <gmazyland@gmail.com>

    Reviewed-by: Saša NedvÄ›dický <sashan@openssl.org>
    Reviewed-by: Nikola Pajkovsky <nikolap@openssl.org>
    Reviewed-by: Tomas Mraz <tomas@openssl.org>
    MergeDate: Fri Mar 13 17:25:49 2026
    (Merged from https://github.com/openssl/openssl/pull/30388)

diff --git a/crypto/threads_win.c b/crypto/threads_win.c
index f96c625a3b..db24796009 100644
--- a/crypto/threads_win.c
+++ b/crypto/threads_win.c
@@ -347,9 +347,9 @@ static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock, uint32_t *curr_id)
     InterlockedExchange((LONG volatile *)&lock->reader_idx, tmp);
 #endif

+    ossl_crypto_mutex_unlock(lock->alloc_lock);
     /* wake up any waiters */
     ossl_crypto_condvar_signal(lock->alloc_signal);
-    ossl_crypto_mutex_unlock(lock->alloc_lock);
     return &lock->qp_group[current_idx];
 }

@@ -358,8 +358,8 @@ static void retire_qp(CRYPTO_RCU_LOCK *lock,
 {
     ossl_crypto_mutex_lock(lock->alloc_lock);
     lock->writers_alloced--;
-    ossl_crypto_condvar_signal(lock->alloc_signal);
     ossl_crypto_mutex_unlock(lock->alloc_lock);
+    ossl_crypto_condvar_signal(lock->alloc_signal);
 }

 void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
@@ -388,8 +388,8 @@ void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
     } while (count != (uint64_t)0);

     lock->next_to_retire++;
-    ossl_crypto_condvar_broadcast(lock->prior_signal);
     ossl_crypto_mutex_unlock(lock->prior_lock);
+    ossl_crypto_condvar_broadcast(lock->prior_signal);

     retire_qp(lock, qp);