Dev news

Commit 7c20d5f31d for qemu.org

commit 7c20d5f31dca62789ac526336e396d4e207066b1
Author: Cédric Le Goater <clg@redhat.com>
Date:   Wed May 13 11:45:22 2026 +0200

    vfio/migration: Detect and report overflow in migration size queries

    VFIO migration ioctls (VFIO_DEVICE_FEATURE_MIG_DATA_SIZE and
    VFIO_MIG_GET_PRECOPY_INFO) return device-estimated migration sizes as
    uint64_t values. A misbehaving kernel driver could return values that
    are unreasonably large, which would corrupt the size accounting used
    to decide migration convergence.

    This misbehavior occurred a few times when testing migration of a VM
    with an assigned NVIDIA vGPU and an MLX5 VF. In some of the save
    iterations, the reported precopy and stopcopy sizes were unreasonably
    large (close to UINT64_MAX):

      vfio_state_pending  (4fbce62c-8ce2-4cc9-b429-41635bc94f24) stopcopy size 0 precopy initial size 18446744073708667040 precopy dirty size 0
      vfio_save_iterate   (4fbce62c-8ce2-4cc9-b429-41635bc94f24) precopy initial size 18446744073707618464 precopy dirty size 0
      vfio_state_pending  (4fbce62c-8ce2-4cc9-b429-41635bc94f24) stopcopy size 18446744073708503040 precopy initial size 18446744073707618464 precopy dirty size 0
      vfio_state_pending  (4fbce62c-8ce2-4cc9-b429-41635bc94f24) stopcopy size 0 precopy initial size 18446744073707618464 precopy dirty size 0
      vfio_state_pending  (0000:b1:01.0) stopcopy size 18446744073709543408 precopy initial size 0 precopy dirty size 1008

    This had the effect of corrupting migration convergence, as reported
    by the HMP migrate command:

      (qemu) info migrate
      Status:                 active
      Time (ms):              total=21140, setup=86, exp_down=152455434886355
      Remaining:              16 EiB
      RAM info:
        Throughput (Mbps):    967.98
        Sizes:                pagesize=4 KiB, total=4 GiB
        Transfers:            transferred=2.29 GiB, remain=4.7 MiB
          Channels:           precopy=1.91 GiB, multifd=0 B, postcopy=0 B, vfio=387 MiB
          Page Types:         normal=499427, zero=559708
        Page Rates (pps):     transfer=0, dirty=1892
        Others:               dirty_syncs=3

    Add a helper to detect values that exceed INT64_MAX, which is far
    beyond any realistic device state size, and report them with an error
    message. Return -ERANGE from the query functions so callers can abort
    the migration rather than proceeding with corrupted estimates.
    However, the callers don't yet check the return value to actually stop
    the migration.

    Cc: Avihai Horon <avihaih@nvidia.com>
    Cc: Peter Xu <peterx@redhat.com>
    Reviewed-by: Avihai Horon <avihaih@nvidia.com>
    Reviewed-by: Peter Xu <peterx@redhat.com>
    Link: https://lore.kernel.org/qemu-devel/20260513094522.346314-1-clg@redhat.com
    Signed-off-by: Cédric Le Goater <clg@redhat.com>

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 150e28656e..fb12b9717f 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -320,6 +320,18 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev)
     migration->data_fd = -1;
 }

+static bool vfio_migration_check_overflow(VFIODevice *vbasedev, uint64_t size,
+                                          const char *name)
+{
+    if (size > INT64_MAX) {
+        error_report("%s: Estimated %s size overflow: 0x%"PRIx64,
+                     vbasedev->name, name, size);
+        return true;
+    }
+
+    return false;
+}
+
 static int vfio_query_stop_copy_size(VFIODevice *vbasedev)
 {
     uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -329,7 +341,7 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev)
     struct vfio_device_feature_mig_data_size *mig_data_size =
         (struct vfio_device_feature_mig_data_size *)feature->data;
     VFIOMigration *migration = vbasedev->migration;
-    int ret;
+    int ret = 0;

     feature->argsz = sizeof(buf);
     feature->flags =
@@ -347,7 +359,10 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev)
                          vbasedev->name, ret);
     } else {
         migration->stopcopy_size = mig_data_size->stop_copy_length;
-        ret = 0;
+        if (vfio_migration_check_overflow(vbasedev, migration->stopcopy_size,
+                                          "stop copy size")) {
+            ret = -ERANGE;
+        }
     }

     trace_vfio_query_stop_copy_size(vbasedev->name,
@@ -361,7 +376,7 @@ static int vfio_query_precopy_size(VFIOMigration *migration)
     struct vfio_precopy_info precopy = {
         .argsz = sizeof(precopy),
     };
-    int ret;
+    int ret = 0;

     if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) {
         migration->precopy_init_size = 0;
@@ -370,9 +385,18 @@ static int vfio_query_precopy_size(VFIOMigration *migration)
         warn_report_once("VFIO device %s ioctl(VFIO_MIG_GET_PRECOPY_INFO) "
                          "failed (%d)", migration->vbasedev->name, ret);
     } else {
+        bool overflow;
+
         migration->precopy_init_size = precopy.initial_bytes;
         migration->precopy_dirty_size = precopy.dirty_bytes;
-        ret = 0;
+
+        overflow  = vfio_migration_check_overflow(migration->vbasedev,
+                         migration->precopy_init_size,  "precopy init size");
+        overflow |= vfio_migration_check_overflow(migration->vbasedev,
+                         migration->precopy_dirty_size, "precopy dirty size");
+        if (overflow) {
+            ret = -ERANGE;
+        }
     }

     trace_vfio_query_precopy_size(migration->vbasedev->name,