Commit 1d18101a644e for kernel
commit 1d18101a644e6ece450d5b0a93f21a71a21b6222
Merge: f2e74ecfba1b c8e00cdc7425
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon Dec 1 13:45:41 2025 -0800
Merge tag 'kernel-6.19-rc1.cred' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull cred guard updates from Christian Brauner:
"This contains substantial credential infrastructure improvements
adding guard-based credential management that simplifies code and
eliminates manual reference counting in many subsystems.
Features:
- Kernel Credential Guards
Add with_kernel_creds() and scoped_with_kernel_creds() guards that
allow using the kernel credentials without allocating and copying
them. This was requested by Linus after seeing repeated
prepare_kernel_creds() calls that duplicate the kernel credentials
only to drop them again later.
The new guards completely avoid the allocation and never expose the
temporary variable to hold the kernel credentials anywhere in
callers.
- Generic Credential Guards
Add scoped_with_creds() guards for the common override_creds() and
revert_creds() pattern. This builds on earlier work that made
override_creds()/revert_creds() completely reference count free.
- Prepare Credential Guards
Add prepare credential guards for the more complex pattern of
preparing a new set of credentials and overriding the current
credentials with them:
- prepare_creds()
- modify new creds
- override_creds()
- revert_creds()
- put_cred()
Cleanups:
- Make init_cred static since it should not be directly accessed
- Add kernel_cred() helper to properly access the kernel credentials
- Fix scoped_class() macro that was introduced two cycles ago
- coredump: split out do_coredump() from vfs_coredump() for cleaner
credential handling
- coredump: move revert_cred() before coredump_cleanup()
- coredump: mark struct mm_struct as const
- coredump: pass struct linux_binfmt as const
- sev-dev: use guard for path"
* tag 'kernel-6.19-rc1.cred' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (36 commits)
trace: use override credential guard
trace: use prepare credential guard
coredump: use override credential guard
coredump: use prepare credential guard
coredump: split out do_coredump() from vfs_coredump()
coredump: mark struct mm_struct as const
coredump: pass struct linux_binfmt as const
coredump: move revert_cred() before coredump_cleanup()
sev-dev: use override credential guards
sev-dev: use prepare credential guard
sev-dev: use guard for path
cred: add prepare credential guard
net/dns_resolver: use credential guards in dns_query()
cgroup: use credential guards in cgroup_attach_permissions()
act: use credential guards in acct_write_process()
smb: use credential guards in cifs_get_spnego_key()
nfs: use credential guards in nfs_idmap_get_key()
nfs: use credential guards in nfs_local_call_write()
nfs: use credential guards in nfs_local_call_read()
erofs: use credential guards
...
diff --cc fs/backing-file.c
index 2a86bb6fcd13,ea137be16331..45da8600d564
--- a/fs/backing-file.c
+++ b/fs/backing-file.c
@@@ -227,40 -267,14 +267,8 @@@ ssize_t backing_file_write_iter(struct
!(file->f_mode & FMODE_CAN_ODIRECT))
return -EINVAL;
- old_cred = override_creds(ctx->cred);
- if (is_sync_kiocb(iocb)) {
- rwf_t rwf = iocb_to_rw_flags(flags);
-
- ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
- if (ctx->end_write)
- ctx->end_write(iocb, ret);
- } else {
- struct backing_aio *aio;
-
- ret = backing_aio_init_wq(iocb);
- if (ret)
- goto out;
-
- ret = -ENOMEM;
- aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
- if (!aio)
- goto out;
-
- aio->orig_iocb = iocb;
- aio->end_write = ctx->end_write;
- kiocb_clone(&aio->iocb, iocb, get_file(file));
- aio->iocb.ki_flags = flags;
- aio->iocb.ki_complete = backing_aio_queue_completion;
- refcount_set(&aio->ref, 2);
- ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
- backing_aio_put(aio);
- if (ret != -EIOCBQUEUED)
- backing_aio_cleanup(aio, ret);
- }
- out:
- revert_creds(old_cred);
- /*
- * Stacked filesystems don't support deferred completions, don't copy
- * this property in case it is set by the issuer.
- */
- flags &= ~IOCB_DIO_CALLER_COMP;
--
- return ret;
+ scoped_with_creds(ctx->cred)
+ return do_backing_file_write_iter(file, iter, iocb, flags, ctx->end_write);
}
EXPORT_SYMBOL_GPL(backing_file_write_iter);
diff --cc fs/nfs/localio.c
index 656976b4f42c,0c89a9d1e089..49ed90c6b9f2
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@@ -615,24 -590,17 +615,18 @@@ static void nfs_local_read_aio_complete
nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
}
--static void nfs_local_call_read(struct work_struct *work)
++static void do_nfs_local_call_read(struct nfs_local_kiocb *iocb, struct file *filp)
{
-- struct nfs_local_kiocb *iocb =
-- container_of(work, struct nfs_local_kiocb, work);
-- struct file *filp = iocb->kiocb.ki_filp;
- const struct cred *save_cred;
+ bool force_done = false;
ssize_t status;
+ int n_iters;
- save_cred = override_creds(filp->f_cred);
-
- scoped_with_creds(filp->f_cred) {
- for (int i = 0; i < iocb->n_iters ; i++) {
- if (iocb->iter_is_dio_aligned[i]) {
- iocb->kiocb.ki_flags |= IOCB_DIRECT;
+ n_iters = atomic_read(&iocb->n_iters);
+ for (int i = 0; i < n_iters ; i++) {
+ if (iocb->iter_is_dio_aligned[i]) {
+ iocb->kiocb.ki_flags |= IOCB_DIRECT;
+ /* Only use AIO completion if DIO-aligned segment is last */
+ if (i == iocb->end_iter_index) {
iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
iocb->aio_complete_work = nfs_local_read_aio_complete_work;
}
@@@ -649,8 -614,11 +643,16 @@@
}
}
}
++}
+
- if (status != -EIOCBQUEUED) {
- nfs_local_read_done(iocb, status);
- nfs_local_pgio_release(iocb);
- }
++static void nfs_local_call_read(struct work_struct *work)
++{
++ struct nfs_local_kiocb *iocb =
++ container_of(work, struct nfs_local_kiocb, work);
++ struct file *filp = iocb->kiocb.ki_filp;
+
- revert_creds(save_cred);
++ scoped_with_creds(filp->f_cred)
++ do_nfs_local_call_read(iocb, filp);
}
static int
@@@ -820,47 -781,78 +822,55 @@@ static void nfs_local_write_aio_complet
nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
}
- static void nfs_local_call_write(struct work_struct *work)
+ static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb,
+ struct file *filp)
{
- struct nfs_local_kiocb *iocb =
- container_of(work, struct nfs_local_kiocb, work);
- struct file *filp = iocb->kiocb.ki_filp;
- unsigned long old_flags = current->flags;
- const struct cred *save_cred;
+ bool force_done = false;
ssize_t status;
+ int n_iters;
- current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
- save_cred = override_creds(filp->f_cred);
-
file_start_write(filp);
- for (int i = 0; i < iocb->n_iters ; i++) {
+ n_iters = atomic_read(&iocb->n_iters);
+ for (int i = 0; i < n_iters ; i++) {
if (iocb->iter_is_dio_aligned[i]) {
iocb->kiocb.ki_flags |= IOCB_DIRECT;
- iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
- iocb->aio_complete_work = nfs_local_write_aio_complete_work;
- }
-retry:
- iocb->kiocb.ki_pos = iocb->offset[i];
+ /* Only use AIO completion if DIO-aligned segment is last */
+ if (i == iocb->end_iter_index) {
+ iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
+ iocb->aio_complete_work = nfs_local_write_aio_complete_work;
+ }
+ } else
+ iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
+
status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
if (status != -EIOCBQUEUED) {
- if (unlikely(status >= 0 && status < iocb->iters[i].count)) {
- /* partial write */
- if (i == iocb->end_iter_index) {
- /* Must not account partial end, otherwise, due
- * to end being issued before middle: the partial
- * write accounting in nfs_local_write_done()
- * would incorrectly advance hdr->args.offset
- */
- status = 0;
- } else {
- /* Partial write at start or buffered middle,
- * exit early.
- */
- nfs_local_pgio_done(iocb->hdr, status);
- break;
- }
- } else if (unlikely(status == -ENOTBLK &&
- (iocb->kiocb.ki_flags & IOCB_DIRECT))) {
- /* VFS will return -ENOTBLK if DIO WRITE fails to
- * invalidate the page cache. Retry using buffered IO.
- */
- iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
- iocb->kiocb.ki_complete = NULL;
- iocb->aio_complete_work = NULL;
- goto retry;
- }
- nfs_local_pgio_done(iocb->hdr, status);
- if (iocb->hdr->task.tk_status)
+ if (unlikely(status >= 0 && status < iocb->iters[i].count))
+ force_done = true; /* Partial write */
+ if (nfs_local_pgio_done(iocb, status, force_done)) {
+ nfs_local_write_iocb_done(iocb);
break;
+ }
}
}
file_end_write(filp);
- revert_creds(save_cred);
+ return status;
+ }
+
+ static void nfs_local_call_write(struct work_struct *work)
+ {
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
+ unsigned long old_flags = current->flags;
+ ssize_t status;
+
+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+
+ scoped_with_creds(filp->f_cred)
+ status = do_nfs_local_call_write(iocb, filp);
+
current->flags = old_flags;
-
- if (status != -EIOCBQUEUED) {
- nfs_local_write_done(iocb, status);
- nfs_local_vfs_getattr(iocb);
- nfs_local_pgio_release(iocb);
- }
}
static int