Commit d5811e6297f3 for kernel
commit d5811e6297f3fd9020ac31f51fc317dfdb260cb0
Author: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat Jan 10 18:53:34 2026 -0500
NFS: Fix size read races in truncate, fallocate and copy offload
If the pre-operation file size is read before locking the inode and
quiescing O_DIRECT writes, then nfs_truncate_last_folio() might end up
overwriting valid file data.
Fixes: b1817b18ff20 ("NFS: Protect against 'eof page pollution'")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 84049f3cd340..de2cce1d08f4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -716,7 +716,7 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
{
struct inode *inode = d_inode(dentry);
struct nfs_fattr *fattr;
- loff_t oldsize = i_size_read(inode);
+ loff_t oldsize;
int error = 0;
kuid_t task_uid = current_fsuid();
kuid_t owner_uid = inode->i_uid;
@@ -727,6 +727,10 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
attr->ia_valid &= ~ATTR_MODE;
+ if (S_ISREG(inode->i_mode))
+ nfs_file_block_o_direct(NFS_I(inode));
+
+ oldsize = i_size_read(inode);
if (attr->ia_valid & ATTR_SIZE) {
BUG_ON(!S_ISREG(inode->i_mode));
@@ -774,10 +778,8 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
trace_nfs_setattr_enter(inode);
/* Write all dirty data */
- if (S_ISREG(inode->i_mode)) {
- nfs_file_block_o_direct(NFS_I(inode));
+ if (S_ISREG(inode->i_mode))
nfs_sync_inode(inode);
- }
fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
if (fattr == NULL) {
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index d275b0a250bf..8337f0ae852d 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -84,6 +84,7 @@ nfs_start_io_write(struct inode *inode)
nfs_file_block_o_direct(NFS_I(inode));
return err;
}
+EXPORT_SYMBOL_GPL(nfs_start_io_write);
/**
* nfs_end_io_write - declare that the buffered write operation is done
@@ -97,6 +98,7 @@ nfs_end_io_write(struct inode *inode)
{
up_write(&inode->i_rwsem);
}
+EXPORT_SYMBOL_GPL(nfs_end_io_write);
/* Call with exclusively locked inode->i_rwsem */
static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index d537fb0c230e..c08520828708 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -114,7 +114,6 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
exception.inode = inode;
exception.state = lock->open_context->state;
- nfs_file_block_o_direct(NFS_I(inode));
err = nfs_sync_inode(inode);
if (err)
goto out;
@@ -138,13 +137,17 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE],
};
struct inode *inode = file_inode(filep);
- loff_t oldsize = i_size_read(inode);
+ loff_t oldsize;
int err;
if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
return -EOPNOTSUPP;
- inode_lock(inode);
+ err = nfs_start_io_write(inode);
+ if (err)
+ return err;
+
+ oldsize = i_size_read(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
@@ -155,7 +158,7 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
NFS_SERVER(inode)->caps &= ~(NFS_CAP_ALLOCATE |
NFS_CAP_ZERO_RANGE);
- inode_unlock(inode);
+ nfs_end_io_write(inode);
return err;
}
@@ -170,7 +173,9 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE))
return -EOPNOTSUPP;
- inode_lock(inode);
+ err = nfs_start_io_write(inode);
+ if (err)
+ return err;
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0)
@@ -179,7 +184,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
NFS_SERVER(inode)->caps &= ~(NFS_CAP_DEALLOCATE |
NFS_CAP_ZERO_RANGE);
- inode_unlock(inode);
+ nfs_end_io_write(inode);
return err;
}
@@ -189,14 +194,17 @@ int nfs42_proc_zero_range(struct file *filep, loff_t offset, loff_t len)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ZERO_RANGE],
};
struct inode *inode = file_inode(filep);
- loff_t oldsize = i_size_read(inode);
+ loff_t oldsize;
int err;
if (!nfs_server_capable(inode, NFS_CAP_ZERO_RANGE))
return -EOPNOTSUPP;
- inode_lock(inode);
+ err = nfs_start_io_write(inode);
+ if (err)
+ return err;
+ oldsize = i_size_read(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0) {
nfs_truncate_last_folio(inode->i_mapping, oldsize,
@@ -205,7 +213,7 @@ int nfs42_proc_zero_range(struct file *filep, loff_t offset, loff_t len)
} else if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_ZERO_RANGE;
- inode_unlock(inode);
+ nfs_end_io_write(inode);
return err;
}
@@ -416,7 +424,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_server *src_server = NFS_SERVER(src_inode);
loff_t pos_src = args->src_pos;
loff_t pos_dst = args->dst_pos;
- loff_t oldsize_dst = i_size_read(dst_inode);
+ loff_t oldsize_dst;
size_t count = args->count;
ssize_t status;
@@ -461,6 +469,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
&src_lock->open_context->state->flags);
set_bit(NFS_CLNT_DST_SSC_COPY_STATE,
&dst_lock->open_context->state->flags);
+ oldsize_dst = i_size_read(dst_inode);
status = nfs4_call_sync(dst_server->client, dst_server, &msg,
&args->seq_args, &res->seq_res, 0);