Skip to content

Commit a3d0145

Browse files
Trond MyklebustTrond Myklebust
authored andcommitted
NFS: Remove BKL requirement from attribute updates
The main problem is dealing with inode->i_size: we need to set the inode->i_lock on all attribute updates, and so vmtruncate won't cut it. Make an NFS-private version of vmtruncate that has the necessary locking semantics. The result should be that the following inode attribute updates are protected by inode->i_lock nfsi->cache_validity nfsi->read_cache_jiffies nfsi->attrtimeo nfsi->attrtimeo_timestamp nfsi->change_attr nfsi->last_updated nfsi->cache_change_attribute nfsi->access_cache nfsi->access_cache_entry_lru nfsi->access_cache_inode_lru nfsi->acl_access nfsi->acl_default nfsi->nfs_page_tree nfsi->ncommit nfsi->npages nfsi->open_files nfsi->silly_list nfsi->acl nfsi->open_states inode->i_size inode->i_atime inode->i_mtime inode->i_ctime inode->i_nlink inode->i_uid inode->i_gid The following is protected by dir->i_mutex nfsi->cookieverf Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
1 parent 1b83d70 commit a3d0145

File tree

2 files changed

+71
-11
lines changed

2 files changed

+71
-11
lines changed

fs/nfs/inode.c

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,62 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
388388
return error;
389389
}
390390

391+
/**
392+
* nfs_vmtruncate - unmap mappings "freed" by truncate() syscall
393+
* @inode: inode of the file used
394+
* @offset: file offset to start truncating
395+
*
396+
* This is a copy of the common vmtruncate, but with the locking
397+
* corrected to take into account the fact that NFS requires
398+
* inode->i_size to be updated under the inode->i_lock.
399+
*/
400+
static int nfs_vmtruncate(struct inode * inode, loff_t offset)
401+
{
402+
if (i_size_read(inode) < offset) {
403+
unsigned long limit;
404+
405+
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
406+
if (limit != RLIM_INFINITY && offset > limit)
407+
goto out_sig;
408+
if (offset > inode->i_sb->s_maxbytes)
409+
goto out_big;
410+
spin_lock(&inode->i_lock);
411+
i_size_write(inode, offset);
412+
spin_unlock(&inode->i_lock);
413+
} else {
414+
struct address_space *mapping = inode->i_mapping;
415+
416+
/*
417+
* truncation of in-use swapfiles is disallowed - it would
418+
* cause subsequent swapout to scribble on the now-freed
419+
* blocks.
420+
*/
421+
if (IS_SWAPFILE(inode))
422+
return -ETXTBSY;
423+
spin_lock(&inode->i_lock);
424+
i_size_write(inode, offset);
425+
spin_unlock(&inode->i_lock);
426+
427+
/*
428+
* unmap_mapping_range is called twice, first simply for
429+
* efficiency so that truncate_inode_pages does fewer
430+
* single-page unmaps. However after this first call, and
431+
* before truncate_inode_pages finishes, it is possible for
432+
* private pages to be COWed, which remain after
433+
* truncate_inode_pages finishes, hence the second
434+
* unmap_mapping_range call must be made for correctness.
435+
*/
436+
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
437+
truncate_inode_pages(mapping, offset);
438+
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
439+
}
440+
return 0;
441+
out_sig:
442+
send_sig(SIGXFSZ, current, 0);
443+
out_big:
444+
return -EFBIG;
445+
}
446+
391447
/**
392448
* nfs_setattr_update_inode - Update inode metadata after a setattr call.
393449
* @inode: pointer to struct inode
@@ -414,8 +470,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
414470
}
415471
if ((attr->ia_valid & ATTR_SIZE) != 0) {
416472
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
417-
inode->i_size = attr->ia_size;
418-
vmtruncate(inode, attr->ia_size);
473+
nfs_vmtruncate(inode, attr->ia_size);
419474
}
420475
}
421476

@@ -829,9 +884,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
829884
if (S_ISDIR(inode->i_mode))
830885
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
831886
}
832-
if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) &&
887+
if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) &&
833888
nfsi->npages == 0)
834-
inode->i_size = nfs_size_to_loff_t(fattr->size);
889+
i_size_write(inode, nfs_size_to_loff_t(fattr->size));
835890
}
836891
}
837892

@@ -972,7 +1027,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
9721027
(fattr->valid & NFS_ATTR_WCC) == 0) {
9731028
memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
9741029
memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
975-
fattr->pre_size = inode->i_size;
1030+
fattr->pre_size = i_size_read(inode);
9761031
fattr->valid |= NFS_ATTR_WCC;
9771032
}
9781033
return nfs_post_op_update_inode(inode, fattr);
@@ -1057,7 +1112,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
10571112
/* Do we perhaps have any outstanding writes, or has
10581113
* the file grown beyond our last write? */
10591114
if (nfsi->npages == 0 || new_isize > cur_isize) {
1060-
inode->i_size = new_isize;
1115+
i_size_write(inode, new_isize);
10611116
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
10621117
}
10631118
dprintk("NFS: isize change on server for file %s/%ld\n",

fs/nfs/write.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,16 +133,21 @@ static struct nfs_page *nfs_page_find_request(struct page *page)
133133
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
134134
{
135135
struct inode *inode = page->mapping->host;
136-
loff_t end, i_size = i_size_read(inode);
137-
pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
136+
loff_t end, i_size;
137+
pgoff_t end_index;
138138

139+
spin_lock(&inode->i_lock);
140+
i_size = i_size_read(inode);
141+
end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
139142
if (i_size > 0 && page->index < end_index)
140-
return;
143+
goto out;
141144
end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
142145
if (i_size >= end)
143-
return;
144-
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
146+
goto out;
145147
i_size_write(inode, end);
148+
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
149+
out:
150+
spin_unlock(&inode->i_lock);
146151
}
147152

148153
/* A writeback failed: mark the page as bad, and invalidate the page cache */

0 commit comments

Comments
 (0)