Commit 864333b455 for openssl.org
commit 864333b455eb36ba84562d6482547bf4c8b49581
Author: Neil Horman <nhorman@openssl.org>
Date: Tue May 27 09:36:35 2025 -0400
Attempt to fix occasional failure of quicapi test in ci
https://github.com/openssl/openssl/actions/runs/15214054228/job/42795224720
the theory I have for the cause of this failure is:
1. qtest_create_quic_connection_ex is called for the client
2. The client is in blocking mode, so we fall into the conditional on line 512
3. We create the server thread on line 519, which is non-blocking
4. The scheduler in the failing case, lets the server run ahead of the client
5. Server thread enters qtest_create_quic_connection_ex and iterates steps
6-9 in the do_while loop starting on line 530
6. Server calls qtest_add_time
7. Server calls ossl_quic_tserver_tick
8. Server calls ossl_quic_tserver_is_term_any, received NULL return
9. Server calls qtest_wait_for_timeout
10. Eventually qtest_wait_for_timeout returns zero, adn the server jumps to
the error label, returning zero to globservret, and the thread exits
11. Client thread regains the cpu, and attempts to call SSL_connect, which
fails, as the server is no longer listening
12. We fall into the error case on line 556, and SSL_get_error returns
SSL_ERROR_SSL, which causes clienterr to get set to 1
13. We exit the do{} while loop on line 581, and do the TEST_true check on
line 593. The server having exited wait_for_thread returns true, but
globserverret is still zero from step 10 above, and so the test fails
I can't prove this is the case, as the test only appears to fail in CI,
and we can't dump verbose logging there, lest we affect the timing of
the tests, so this is just a theory, but it seems to fit the
observations we have.
Attempting to fix this, by creating a thread interlock with a condition
variable that blocks the server from ticking the quic reactor until such
time as the client is about to call SSL_connect to prevent the race
condition
Reviewed-by: Saša NedvÄ›dický <sashan@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/27704)
diff --git a/test/helpers/quictestlib.c b/test/helpers/quictestlib.c
index 032505a65e..166e102a2d 100644
--- a/test/helpers/quictestlib.c
+++ b/test/helpers/quictestlib.c
@@ -72,6 +72,12 @@ struct qtest_fault {
struct noise_args_data_st noiseargs;
};
+#if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG)
+static int client_ready = 0;
+static CRYPTO_CONDVAR *client_ready_cond = NULL;
+static CRYPTO_MUTEX *client_ready_mutex = NULL;
+#endif
+
static void packet_plain_finish(void *arg);
static void handshake_finish(void *arg);
static OSSL_TIME qtest_get_time(void);
@@ -136,6 +142,23 @@ int qtest_create_quic_objects(OSSL_LIB_CTX *libctx, SSL_CTX *clientctx,
BIO *tmpbio = NULL;
QTEST_DATA *bdata = NULL;
+#if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG)
+ if (client_ready_cond == NULL) {
+ client_ready_cond = ossl_crypto_condvar_new();
+ if (client_ready_cond == NULL)
+ return 0;
+ }
+
+ if (client_ready_mutex == NULL) {
+ client_ready_mutex = ossl_crypto_mutex_new();
+ if (client_ready_mutex == NULL) {
+ ossl_crypto_condvar_free(&client_ready_cond);
+ client_ready_cond = NULL;
+ return 0;
+ }
+ }
+#endif
+
bdata = OPENSSL_zalloc(sizeof(QTEST_DATA));
if (bdata == NULL)
return 0;
@@ -433,7 +456,6 @@ static int globserverret = 0;
static TSAN_QUALIFIER int abortserverthread = 0;
static QUIC_TSERVER *globtserv;
static const thread_t thread_zero;
-
static void run_server_thread(void)
{
/*
@@ -495,6 +517,7 @@ int qtest_create_quic_connection_ex(QUIC_TSERVER *qtserv, SSL *clientssl,
int retc = -1, rets = 0, abortctr = 0, ret = 0;
int clienterr = 0, servererr = 0;
#if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG)
+
/*
* Pointless initialisation to avoid bogus compiler warnings about using
* t uninitialised
@@ -503,6 +526,15 @@ int qtest_create_quic_connection_ex(QUIC_TSERVER *qtserv, SSL *clientssl,
if (clientssl != NULL)
abortserverthread = 0;
+
+ /*
+ * Only set the client_ready flag to zero if we are the client
+ */
+ if (clientssl != NULL) {
+ ossl_crypto_mutex_lock(client_ready_mutex);
+ client_ready = 0;
+ ossl_crypto_mutex_unlock(client_ready_mutex);
+ }
#endif
if (!TEST_ptr(qtserv)) {
@@ -531,6 +563,12 @@ int qtest_create_quic_connection_ex(QUIC_TSERVER *qtserv, SSL *clientssl,
if (!clienterr && retc <= 0) {
int err;
+#if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG)
+ ossl_crypto_mutex_lock(client_ready_mutex);
+ client_ready = 1;
+ ossl_crypto_condvar_broadcast(client_ready_cond);
+ ossl_crypto_mutex_unlock(client_ready_mutex);
+#endif
retc = SSL_connect(clientssl);
if (retc <= 0) {
err = SSL_get_error(clientssl, retc);
@@ -557,8 +595,18 @@ int qtest_create_quic_connection_ex(QUIC_TSERVER *qtserv, SSL *clientssl,
qtest_add_time(1);
if (clientssl != NULL)
SSL_handle_events(clientssl);
- if (qtserv != NULL)
+ if (qtserv != NULL) {
+#if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG)
+ ossl_crypto_mutex_lock(client_ready_mutex);
+ for (;;) {
+ if (client_ready == 1)
+ break;
+ ossl_crypto_condvar_wait(client_ready_cond, client_ready_mutex);
+ }
+ ossl_crypto_mutex_unlock(client_ready_mutex);
+#endif
ossl_quic_tserver_tick(qtserv);
+ }
if (!servererr && rets <= 0) {
servererr = ossl_quic_tserver_is_term_any(qtserv);
@@ -587,6 +635,14 @@ int qtest_create_quic_connection_ex(QUIC_TSERVER *qtserv, SSL *clientssl,
if (qtserv == NULL && rets > 0) {
#if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG)
+ /*
+ * Make sure we unblock the server before we wait on completion here
+ * in case it didn't happen in the connect loop above
+ */
+ ossl_crypto_mutex_lock(client_ready_mutex);
+ client_ready = 1;
+ ossl_crypto_condvar_broadcast(client_ready_cond);
+ ossl_crypto_mutex_unlock(client_ready_mutex);
if (!TEST_true(wait_for_thread(t)) || !TEST_true(globserverret))
goto err;
#else
@@ -632,6 +688,11 @@ int qtest_shutdown(QUIC_TSERVER *qtserv, SSL *clientssl)
* t uninitialised
*/
thread_t t = thread_zero;
+
+ ossl_crypto_condvar_free(&client_ready_cond);
+ client_ready_cond = NULL;
+ ossl_crypto_mutex_free(&client_ready_mutex);
+ client_ready_mutex = NULL;
#endif
if (SSL_get_blocking_mode(clientssl) > 0) {