Commit 34a6763776 for qemu.org
commit 34a67637767d3ed1ac813c44effe827bbfba5996
Author: Kevin Wolf <kwolf@redhat.com>
Date: Tue Apr 21 18:11:27 2026 +0200
block: Add blk_co_start/end_request() and BDRV_REQ_NO_QUEUE
If a device uses blk_inc/dec_in_flight() in order to build macro
operations that involve multiple requests for the block layer and that
need to be completed as a unit before the BlockBackend can be considered
drained, it sets the stage for a deadlock: When a drain is requested,
the inner request at the BlockBackend level will be queued in
blk_wait_while_drained() and wait until the drained section ends, but at
the same time, drain_begin can only return if the whole macro operation
at the device level has completed.
Introduce a new interface to allow implementing the logic correctly:
Instead of queueing individual requests, blk_co_start_request() calls
blk_wait_while_drained() once at the beginning. The individual requests
must then set BDRV_REQ_NO_QUEUE to avoid being queued and running into
the deadlock; being wrapped in blk_co_start/end_request() makes sure
that drain_begin waits for them and they don't sneak in when the
BlockBackend is supposed to already be quiescent.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20260421161132.99878-3-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
diff --git a/block/block-backend.c b/block/block-backend.c
index 9944657120..ee00440e28 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -82,6 +82,7 @@ struct BlockBackend {
QemuMutex queued_requests_lock; /* protects queued_requests */
CoQueue queued_requests;
bool disable_request_queuing; /* atomic */
+ int start_request_count; /* atomic */
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
@@ -1306,10 +1307,16 @@ bool blk_in_drain(BlockBackend *blk)
}
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
+static void coroutine_fn blk_wait_while_drained(BlockBackend *blk,
+ BdrvRequestFlags flags)
{
assert(blk->in_flight > 0);
+ if (flags & BDRV_REQ_NO_QUEUE) {
+ assert(qatomic_read(&blk->start_request_count));
+ return;
+ }
+
if (qatomic_read(&blk->quiesce_counter) &&
!qatomic_read(&blk->disable_request_queuing)) {
/*
@@ -1335,7 +1342,7 @@ blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
BlockDriverState *bs;
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, flags);
GRAPH_RDLOCK_GUARD();
/* Call blk_bs() only after waiting, the graph may have changed */
@@ -1410,7 +1417,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
BlockDriverState *bs;
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, flags);
GRAPH_RDLOCK_GUARD();
/* Call blk_bs() only after waiting, the graph may have changed */
@@ -1523,6 +1530,19 @@ void blk_dec_in_flight(BlockBackend *blk)
aio_wait_kick();
}
+void coroutine_fn blk_co_start_request(BlockBackend *blk)
+{
+ blk_inc_in_flight(blk);
+ blk_wait_while_drained(blk, 0);
+ qatomic_inc(&blk->start_request_count);
+}
+
+void blk_end_request(BlockBackend *blk)
+{
+ qatomic_dec(&blk->start_request_count);
+ blk_dec_in_flight(blk);
+}
+
static void error_callback_bh(void *opaque)
{
struct BlockBackendAIOCB *acb = opaque;
@@ -1741,7 +1761,7 @@ blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
if (!blk_co_is_available(blk)) {
@@ -1788,7 +1808,7 @@ blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
int ret;
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
ret = blk_check_byte_request(blk, offset, bytes);
@@ -1834,7 +1854,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
static int coroutine_fn blk_co_do_flush(BlockBackend *blk)
{
IO_CODE();
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
if (!blk_co_is_available(blk)) {
@@ -2009,7 +2029,7 @@ int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
IO_CODE();
blk_inc_in_flight(blk); /* increase before waiting */
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
@@ -2034,7 +2054,7 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
IO_CODE();
blk_inc_in_flight(blk);
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, 0);
GRAPH_RDLOCK_GUARD();
ret = blk_check_byte_request(blk, offset, len);
@@ -2058,7 +2078,7 @@ int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
IO_CODE();
blk_inc_in_flight(blk);
- blk_wait_while_drained(blk);
+ blk_wait_while_drained(blk, flags);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
diff --git a/include/block/block-common.h b/include/block/block-common.h
index c8c626daea..895ea17541 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -215,8 +215,17 @@ typedef enum {
*/
BDRV_REQ_NO_WAIT = 0x400,
+ /*
+ * Used between blk_co_start_request() and blk_end_request() to avoid
+ * that the request waits in a drained BlockBackend until the drained
+ * section ends. Waiting would cause a deadlock because drain waits for
+ * blk_end_request() to be called, but the request never completes
+ * because it waits for the drain to end.
+ */
+ BDRV_REQ_NO_QUEUE = 0x800,
+
/* Mask of valid flags */
- BDRV_REQ_MASK = 0x7ff,
+ BDRV_REQ_MASK = 0xfff,
} BdrvRequestFlags;
#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
diff --git a/include/system/block-backend-io.h b/include/system/block-backend-io.h
index 6d5ac476fc..0248c1c36e 100644
--- a/include/system/block-backend-io.h
+++ b/include/system/block-backend-io.h
@@ -71,6 +71,8 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
void blk_inc_in_flight(BlockBackend *blk);
void blk_dec_in_flight(BlockBackend *blk);
+void coroutine_fn blk_co_start_request(BlockBackend *blk);
+void blk_end_request(BlockBackend *blk);
bool coroutine_fn GRAPH_RDLOCK blk_co_is_inserted(BlockBackend *blk);
bool co_wrapper_mixed_bdrv_rdlock blk_is_inserted(BlockBackend *blk);