Dev news

Commit cc25df3e2e22 for kernel

commit cc25df3e2e22a956d3a0d427369367b4a901d203
Merge: 0abcfd8983e3 d211a2803551
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Wed Dec 3 19:26:18 2025 -0800

    Merge tag 'for-6.19/block-20251201' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

    Pull block updates from Jens Axboe:

     - Fix head insertion for mq-deadline, a regression from when priority
       support was added

     - Series simplifying and improving the ublk user copy code

     - Various ublk related cleanups

     - Fixup REQ_NOWAIT handling in loop/zloop, clearing NOWAIT when the
       request is punted to a thread for handling

     - Merge and then later revert loop dio nowait support, as it ended up
       causing excessive stack usage for when the inline issue code needs to
       dip back into the full file system code

     - Improve auto integrity code, making it less deadlock prone

     - Speedup polled IO handling, but manually managing the hctx lookups

     - Fixes for blk-throttle for SSD devices

     - Small series with fixes for the S390 dasd driver

     - Add support for caching zones, avoiding unnecessary report zone
       queries

     - MD pull requests via Yu:
          - fix null-ptr-dereference regression for dm-raid0
          - fix IO hang for raid5 when array is broken with IO inflight
          - remove legacy 1s delay to speed up system shutdown
          - change maintainer's email address
          - data can be lost if array is created with different lbs devices,
            fix this problem and record lbs of the array in metadata
          - fix rcu protection for md_thread
          - fix mddev kobject lifetime regression
          - enable atomic writes for md-linear
          - some cleanups

     - bcache updates via Coly
          - remove useless discard and cache device code
          - improve usage of per-cpu workqueues

     - Reorganize the IO scheduler switching code, fixing some lockdep
       reports as well

     - Improve the block layer P2P DMA support

     - Add support to the block tracing code for zoned devices

     - Segment calculation improves, and memory alignment flexibility
       improvements

     - Set of prep and cleanups patches for ublk batching support. The
       actual batching hasn't been added yet, but helps shrink down the
       workload of getting that patchset ready for 6.20

     - Fix for how the ps3 block driver handles segments offsets

     - Improve how block plugging handles batch tag allocations

     - nbd fixes for use-after-free of the configuration on device clear/put

     - Set of improvements and fixes for zloop

     - Add Damien as maintainer of the block zoned device code handling

     - Various other fixes and cleanups

    * tag 'for-6.19/block-20251201' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (162 commits)
      block/rnbd: correct all kernel-doc complaints
      blk-mq: use queue_hctx in blk_mq_map_queue_type
      md: remove legacy 1s delay in md_notify_reboot
      md/raid5: fix IO hang when array is broken with IO inflight
      md: warn about updating super block failure
      md/raid0: fix NULL pointer dereference in create_strip_zones() for dm-raid
      sbitmap: fix all kernel-doc warnings
      ublk: add helper of __ublk_fetch()
      ublk: pass const pointer to ublk_queue_is_zoned()
      ublk: refactor auto buffer register in ublk_dispatch_req()
      ublk: add `union ublk_io_buf` with improved naming
      ublk: add parameter `struct io_uring_cmd *` to ublk_prep_auto_buf_reg()
      kfifo: add kfifo_alloc_node() helper for NUMA awareness
      blk-mq: fix potential uaf for 'queue_hw_ctx'
      blk-mq: use array manage hctx map instead of xarray
      ublk: prevent invalid access with DEBUG
      s390/dasd: Use scnprintf() instead of sprintf()
      s390/dasd: Move device name formatting into separate function
      s390/dasd: Remove unnecessary debugfs_create() return checks
      s390/dasd: Fix gendisk parent after copy pair swap
      ...

diff --cc block/blk-settings.c
index d74b13ec8e54,b38e94c85402..51401f08ce05
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@@ -184,16 -197,14 +197,24 @@@ static int blk_validate_integrity_limit
  	if (!bi->interval_exp)
  		bi->interval_exp = ilog2(lim->logical_block_size);

 +	/*
 +	 * The PI generation / validation helpers do not expect intervals to
 +	 * straddle multiple bio_vecs.  Enforce alignment so that those are
 +	 * never generated, and that each buffer is aligned as expected.
 +	 */
 +	if (bi->csum_type) {
 +		lim->dma_alignment = max(lim->dma_alignment,
 +					(1U << bi->interval_exp) - 1);
 +	}
 +
+ 	/*
+ 	 * The block layer automatically adds integrity data for bios that don't
+ 	 * already have it.  Limit the I/O size so that a single maximum size
+ 	 * metadata segment can cover the integrity data for the entire I/O.
+ 	 */
+ 	lim->max_sectors = min(lim->max_sectors,
+ 		max_integrity_io_size(lim) >> SECTOR_SHIFT);
+
  	return 0;
  }

diff --cc drivers/block/ublk_drv.c
index e0c601128efa,c2250172de4c..2c715df63f23
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@@ -1343,13 -1299,17 +1298,17 @@@ static void ublk_dispatch_req(struct ub
  	if (!ublk_start_io(ubq, req, io))
  		return;

- 	if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags))
+ 	if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) {
+ 		ublk_do_auto_buf_reg(ubq, req, io, io->cmd, issue_flags);
+ 	} else {
+ 		ublk_init_req_ref(ubq, io);
  		ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
+ 	}
  }

 -static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
 -			   unsigned int issue_flags)
 +static void ublk_cmd_tw_cb(struct io_tw_req tw_req, io_tw_token_t tw)
  {
 +	struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
  	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
  	struct ublk_queue *ubq = pdu->ubq;

diff --cc fs/xfs/xfs_zone_alloc.c
index 8dde444596f1,d121768dbccb..98f65d99b776
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@@ -1235,35 -1245,12 +1235,35 @@@ xfs_mount_zones
  		return -ENOMEM;

  	xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
 -		 mp->m_sb.sb_rgcount, mp->m_groups[XG_TYPE_RTG].blocks,
 -		 mp->m_max_open_zones);
 +		 mp->m_sb.sb_rgcount, zone_blocks, mp->m_max_open_zones);
  	trace_xfs_zones_mount(mp);

 +	/*
 +	 * The writeback code switches between inodes regularly to provide
 +	 * fairness.  The default lower bound is 4MiB, but for zoned file
 +	 * systems we want to increase that both to reduce seeks, but also more
 +	 * importantly so that workloads that writes files in a multiple of the
 +	 * zone size do not get fragmented and require garbage collection when
 +	 * they shouldn't.  Increase is to the zone size capped by the max
 +	 * extent len.
 +	 *
 +	 * Note that because s_min_writeback_pages is a superblock field, this
 +	 * value also get applied to non-zoned files on the data device if
 +	 * there are any.  On typical zoned setup all data is on the RT device
 +	 * because using the more efficient sequential write required zones
 +	 * is the reason for using the zone allocator, and either the RT device
 +	 * and the (meta)data device are on the same block device, or the
 +	 * (meta)data device is on a fast SSD while the data on the RT device
 +	 * is on a SMR HDD.  In any combination of the above cases enforcing
 +	 * the higher min_writeback_pages for non-RT inodes is either a noop
 +	 * or beneficial.
 +	 */
 +	mp->m_super->s_min_writeback_pages =
 +		XFS_FSB_TO_B(mp, min(zone_blocks, XFS_MAX_BMBT_EXTLEN)) >>
 +			PAGE_SHIFT;
 +
  	if (bdev_is_zoned(bt->bt_bdev)) {
- 		error = blkdev_report_zones(bt->bt_bdev,
+ 		error = blkdev_report_zones_cached(bt->bt_bdev,
  				XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart),
  				mp->m_sb.sb_rgcount, xfs_get_zone_info_cb, &iz);
  		if (error < 0)