summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-04 19:36:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-04 19:36:06 -0700
commit1dc51b8288007753ad7cd7d08bb8fa930fc8bb10 (patch)
tree0616c0ff7d877e64d9c248a6cdff074eae258840 /fs
parent9b284cbdb5de3b8871014f8290d1b540e5181c21 (diff)
parent0f1db7dee200127da4c07928189748918c312031 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull more vfs updates from Al Viro: "Assorted VFS fixes and related cleanups (IMO the most interesting in that part are f_path-related things and Eric's descriptor-related stuff). UFS regression fixes (it got broken last cycle). 9P fixes. fs-cache series, DAX patches, Jan's file_remove_suid() work" [ I'd say this is much more than "fixes and related cleanups". The file_table locking rule change by Eric Dumazet is a rather big and fundamental update even if the patch isn't huge. - Linus ] * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (49 commits) 9p: cope with bogus responses from server in p9_client_{read,write} p9_client_write(): avoid double p9_free_req() 9p: forgetting to cancel request on interrupted zero-copy RPC dax: bdev_direct_access() may sleep block: Add support for DAX reads/writes to block devices dax: Use copy_from_iter_nocache dax: Add block size note to documentation fs/file.c: __fget() and dup2() atomicity rules fs/file.c: don't acquire files->file_lock in fd_install() fs:super:get_anon_bdev: fix race condition could cause dev exceed its upper limitation vfs: avoid creation of inode number 0 in get_next_ino namei: make set_root_rcu() return void make simple_positive() public ufs: use dir_pages instead of ufs_dir_pages() pagemap.h: move dir_pages() over there remove the pointless include of lglock.h fs: cleanup slight list_entry abuse xfs: Correctly lock inode when removing suid and file capabilities fs: Call security_ops->inode_killpriv on truncate fs: Provide function telling whether file_remove_privs() will do anything ...
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/autofs4/autofs_i.h5
-rw-r--r--fs/befs/befs.h2
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/block_dev.c10
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/namei.c33
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/coda/coda_linux.h2
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dax.c8
-rw-r--r--fs/dcache.c5
-rw-r--r--fs/debugfs/inode.c11
-rw-r--r--fs/exofs/dir.c6
-rw-r--r--fs/ext2/dir.c5
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/file.c77
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/freevxfs/vxfs_lookup.c7
-rw-r--r--fs/fscache/cookie.c8
-rw-r--r--fs/fscache/internal.h12
-rw-r--r--fs/fscache/object.c69
-rw-r--r--fs/fscache/operation.c254
-rw-r--r--fs/fscache/page.c86
-rw-r--r--fs/fscache/stats.c14
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/inode.c66
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jfs/jfs_incore.h2
-rw-r--r--fs/libfs.c5
-rw-r--r--fs/minix/dir.c5
-rw-r--r--fs/minix/minix.h2
-rw-r--r--fs/namei.c6
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nilfs2/dir.c5
-rw-r--r--fs/nilfs2/inode.c22
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/open.c61
-rw-r--r--fs/overlayfs/inode.c22
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/super.c1
-rw-r--r--fs/posix_acl.c46
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/qnx6/dir.c5
-rw-r--r--fs/seq_file.c14
-rw-r--r--fs/squashfs/squashfs_fs_i.h2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysv/dir.c5
-rw-r--r--fs/sysv/sysv.h2
-rw-r--r--fs/tracefs/inode.c11
-rw-r--r--fs/udf/udf_i.h2
-rw-r--r--fs/ufs/balloc.c34
-rw-r--r--fs/ufs/dir.c19
-rw-r--r--fs/ufs/ialloc.c16
-rw-r--r--fs/ufs/inode.c5
-rw-r--r--fs/ufs/namei.c79
-rw-r--r--fs/ufs/super.c11
-rw-r--r--fs/ufs/ufs.h3
-rw-r--r--fs/xfs/xfs_file.c11
69 files changed, 657 insertions, 467 deletions
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index cffe8370fb44..c69a87eaf57d 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -64,7 +64,7 @@ struct affs_inode_info {
/* short cut to get to the affs specific inode data */
static inline struct affs_inode_info *AFFS_I(struct inode *inode)
{
- return list_entry(inode, struct affs_inode_info, vfs_inode);
+ return container_of(inode, struct affs_inode_info, vfs_inode);
}
/*
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 5b700ef1e59d..c37149b929be 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -238,11 +238,6 @@ static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
return d_inode(sbi->sb->s_root)->i_ino;
}
-static inline int simple_positive(struct dentry *dentry)
-{
- return d_really_is_positive(dentry) && !d_unhashed(dentry);
-}
-
static inline void __autofs4_add_expiring(struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index 1fead8d56a98..35d19e8731e3 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -112,7 +112,7 @@ BEFS_SB(const struct super_block *super)
static inline struct befs_inode_info *
BEFS_I(const struct inode *inode)
{
- return list_entry(inode, struct befs_inode_info, vfs_inode);
+ return container_of(inode, struct befs_inode_info, vfs_inode);
}
static inline befs_blocknr_t
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index cd46e4158830..6b659967898e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1530,7 +1530,7 @@ static int fill_files_note(struct memelfnote *note)
file = vma->vm_file;
if (!file)
continue;
- filename = d_path(&file->f_path, name_curpos, remaining);
+ filename = file_path(file, name_curpos, remaining);
if (IS_ERR(filename)) {
if (PTR_ERR(filename) == -ENAMETOOLONG) {
vfree(data);
@@ -1540,7 +1540,7 @@ static int fill_files_note(struct memelfnote *note)
continue;
}
- /* d_path() fills at the end, move name down */
+ /* file_path() fills at the end, move name down */
/* n = strlen(filename) + 1: */
n = (name_curpos + remaining) - filename;
remaining = filename - name_curpos;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4fe10f93db8a..198243717da5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -152,6 +152,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ if (IS_DAX(inode))
+ return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
+ NULL, DIO_SKIP_DIO_COUNT);
return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
blkdev_get_block, NULL, NULL,
DIO_SKIP_DIO_COUNT);
@@ -443,6 +446,12 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector,
long avail;
const struct block_device_operations *ops = bdev->bd_disk->fops;
+ /*
+ * The device driver is allowed to sleep, in order to make the
+ * memory directly accessible.
+ */
+ might_sleep();
+
if (size < 0)
return size;
if (!ops->direct_access)
@@ -1170,6 +1179,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
+ bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
if (!partno) {
ret = -ENXIO;
bdev->bd_part = disk_get_part(disk, partno);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 795d754327a7..b823fac91c92 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1748,7 +1748,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
}
current->backing_dev_info = inode_to_bdi(inode);
- err = file_remove_suid(file);
+ err = file_remove_privs(file);
if (err) {
mutex_unlock(&inode->i_mutex);
goto out;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 8c52472d2efa..aecd0859eacb 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -43,7 +43,6 @@ struct cachefiles_object {
loff_t i_size; /* object size */
unsigned long flags;
#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
-#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */
atomic_t usage; /* object usage count */
uint8_t type; /* object type */
uint8_t new; /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index ab857ab9f40d..fc1056f5c96a 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -97,7 +97,8 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
* call vfs_unlink(), vfs_rmdir() or vfs_rename()
*/
static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
- struct dentry *dentry)
+ struct dentry *dentry,
+ enum fscache_why_object_killed why)
{
struct cachefiles_object *object;
struct rb_node *p;
@@ -132,8 +133,9 @@ found_dentry:
pr_err("\n");
pr_err("Error: Can't preemptively bury live object\n");
cachefiles_printk_object(object, NULL);
- } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
- pr_err("Error: Object already preemptively buried\n");
+ } else {
+ if (why != FSCACHE_OBJECT_IS_STALE)
+ fscache_object_mark_killed(&object->fscache, why);
}
write_unlock(&cache->active_lock);
@@ -265,7 +267,8 @@ requeue:
static int cachefiles_bury_object(struct cachefiles_cache *cache,
struct dentry *dir,
struct dentry *rep,
- bool preemptive)
+ bool preemptive,
+ enum fscache_why_object_killed why)
{
struct dentry *grave, *trap;
struct path path, path_to_graveyard;
@@ -289,7 +292,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
ret = vfs_unlink(d_inode(dir), rep, NULL);
if (preemptive)
- cachefiles_mark_object_buried(cache, rep);
+ cachefiles_mark_object_buried(cache, rep, why);
}
mutex_unlock(&d_inode(dir)->i_mutex);
@@ -394,7 +397,7 @@ try_again:
"Rename failed with error %d", ret);
if (preemptive)
- cachefiles_mark_object_buried(cache, rep);
+ cachefiles_mark_object_buried(cache, rep, why);
}
unlock_rename(cache->graveyard, dir);
@@ -422,7 +425,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
- if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
+ if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) {
/* object allocation for the same key preemptively deleted this
* object's file so that it could create its own file */
_debug("object preemptively buried");
@@ -433,7 +436,8 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
* may have been renamed */
if (dir == object->dentry->d_parent) {
ret = cachefiles_bury_object(cache, dir,
- object->dentry, false);
+ object->dentry, false,
+ FSCACHE_OBJECT_WAS_RETIRED);
} else {
/* it got moved, presumably by cachefilesd culling it,
* so it's no longer in the key path and we can ignore
@@ -522,7 +526,7 @@ lookup_again:
if (d_is_negative(next)) {
ret = cachefiles_has_space(cache, 1, 0);
if (ret < 0)
- goto create_error;
+ goto no_space_error;
path.dentry = dir;
ret = security_path_mkdir(&path, next, 0);
@@ -551,7 +555,7 @@ lookup_again:
if (d_is_negative(next)) {
ret = cachefiles_has_space(cache, 1, 0);
if (ret < 0)
- goto create_error;
+ goto no_space_error;
path.dentry = dir;
ret = security_path_mknod(&path, next, S_IFREG, 0);
@@ -602,7 +606,8 @@ lookup_again:
* mutex) */
object->dentry = NULL;
- ret = cachefiles_bury_object(cache, dir, next, true);
+ ret = cachefiles_bury_object(cache, dir, next, true,
+ FSCACHE_OBJECT_IS_STALE);
dput(next);
next = NULL;
@@ -610,6 +615,7 @@ lookup_again:
goto delete_error;
_debug("redo lookup");
+ fscache_object_retrying_stale(&object->fscache);
goto lookup_again;
}
}
@@ -662,6 +668,8 @@ lookup_again:
_leave(" = 0 [%lu]", d_backing_inode(object->dentry)->i_ino);
return 0;
+no_space_error:
+ fscache_object_mark_killed(&object->fscache, FSCACHE_OBJECT_NO_SPACE);
create_error:
_debug("create error %d", ret);
if (ret == -EIO)
@@ -927,7 +935,8 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
/* actually remove the victim (drops the dir mutex) */
_debug("bury");
- ret = cachefiles_bury_object(cache, dir, victim, false);
+ ret = cachefiles_bury_object(cache, dir, victim, false,
+ FSCACHE_OBJECT_WAS_CULLED);
if (ret < 0)
goto error;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index faf92095e105..8b79d87eaf46 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -962,7 +962,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
pos = iocb->ki_pos;
count = iov_iter_count(from);
- err = file_remove_suid(file);
+ err = file_remove_privs(file);
if (err)
goto out;
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index d6f7a76a1f5b..f829fe963f5b 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -79,7 +79,7 @@ void coda_sysctl_clean(void);
static inline struct coda_inode_info *ITOC(struct inode *inode)
{
- return list_entry(inode, struct coda_inode_info, vfs_inode);
+ return container_of(inode, struct coda_inode_info, vfs_inode);
}
static __inline__ struct CodaFid *coda_i2f(struct inode *inode)
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 8d89f5fd0331..eae87575e681 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -236,7 +236,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
if (dentry) {
spin_lock(&dentry->d_lock);
- if (!d_unhashed(dentry) && d_really_is_positive(dentry)) {
+ if (simple_positive(dentry)) {
dget_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
diff --git a/fs/coredump.c b/fs/coredump.c
index e52e0064feac..c5ecde6f3eed 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -140,7 +140,7 @@ static int cn_print_exe_file(struct core_name *cn)
goto put_exe_file;
}
- path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+ path = file_path(exe_file, pathbuf, PATH_MAX);
if (IS_ERR(path)) {
ret = PTR_ERR(path);
goto free_buf;
diff --git a/fs/dax.c b/fs/dax.c
index 99b5fbc38992..c3e21ccfc358 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -155,7 +155,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
}
if (iov_iter_rw(iter) == WRITE)
- len = copy_from_iter(addr, max - pos, iter);
+ len = copy_from_iter_nocache(addr, max - pos, iter);
else if (!hole)
len = copy_to_iter(addr, max - pos, iter);
else
@@ -209,7 +209,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
}
/* Protects against truncate */
- inode_dio_begin(inode);
+ if (!(flags & DIO_SKIP_DIO_COUNT))
+ inode_dio_begin(inode);
retval = dax_io(inode, iter, pos, end, get_block, &bh);
@@ -219,7 +220,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
if ((retval > 0) && end_io)
end_io(iocb, pos, retval, bh.b_private);
- inode_dio_end(inode);
+ if (!(flags & DIO_SKIP_DIO_COUNT))
+ inode_dio_end(inode);
out:
return retval;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 910968b4b6bf..7a3f3e5f9cea 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1673,7 +1673,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
DCACHE_OP_COMPARE |
DCACHE_OP_REVALIDATE |
DCACHE_OP_WEAK_REVALIDATE |
- DCACHE_OP_DELETE ));
+ DCACHE_OP_DELETE |
+ DCACHE_OP_SELECT_INODE));
dentry->d_op = op;
if (!op)
return;
@@ -1689,6 +1690,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
dentry->d_flags |= DCACHE_OP_DELETE;
if (op->d_prune)
dentry->d_flags |= DCACHE_OP_PRUNE;
+ if (op->d_select_inode)
+ dentry->d_flags |= DCACHE_OP_SELECT_INODE;
}
EXPORT_SYMBOL(d_set_d_op);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d6d1cf004123..c711be8d6a3c 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -44,11 +44,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb)
return inode;
}
-static inline int debugfs_positive(struct dentry *dentry)
-{
- return d_really_is_positive(dentry) && !d_unhashed(dentry);
-}
-
struct debugfs_mount_opts {
kuid_t uid;
kgid_t gid;
@@ -522,7 +517,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
{
int ret = 0;
- if (debugfs_positive(dentry)) {
+ if (simple_positive(dentry)) {
dget(dentry);
if (d_is_dir(dentry))
ret = simple_rmdir(d_inode(parent), dentry);
@@ -602,7 +597,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
*/
spin_lock(&parent->d_lock);
list_for_each_entry(child, &parent->d_subdirs, d_child) {
- if (!debugfs_positive(child))
+ if (!simple_positive(child))
continue;
/* perhaps simple_empty(child) makes more sense */
@@ -623,7 +618,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
* from d_subdirs. When releasing the parent->d_lock we can
* no longer trust that the next pointer is valid.
* Restart the loop. We'll skip this one with the
- * debugfs_positive() check.
+ * simple_positive() check.
*/
goto loop;
}
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 4deb0b05b011..e5bb2abf77f9 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -44,12 +44,6 @@ static inline void exofs_put_page(struct page *page)
page_cache_release(page);
}
-/* Accesses dir's inode->i_size must be called under inode lock */
-static inline unsigned long dir_pages(struct inode *inode)
-{
- return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-}
-
static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr)
{
loff_t last_byte = inode->i_size;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 796b491e6978..0c6638b40f21 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -70,11 +70,6 @@ static inline void ext2_put_page(struct page *page)
page_cache_release(page);
}
-static inline unsigned long dir_pages(struct inode *inode)
-{
- return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
-}
-
/*
* Return the offset into page `page_nr' of the last valid
* byte in that page, plus one.
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5c787647afe2..58987b5c514b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -452,7 +452,7 @@ void __ext4_error_file(struct file *file, const char *function,
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
if (ext4_error_ratelimit(inode->i_sb)) {
- path = d_path(&(file->f_path), pathname, sizeof(pathname));
+ path = file_path(file, pathname, sizeof(pathname));
if (IS_ERR(path))
path = "(unknown)";
va_start(args, fmt);
diff --git a/fs/file.c b/fs/file.c
index 93c5f89c248b..6c672ad329e9 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr)
spin_unlock(&files->file_lock);
new_fdt = alloc_fdtable(nr);
+
+ /* make sure all __fd_install() have seen resize_in_progress
+ * or have finished their rcu_read_lock_sched() section.
+ */
+ if (atomic_read(&files->count) > 1)
+ synchronize_sched();
+
spin_lock(&files->file_lock);
if (!new_fdt)
return -ENOMEM;
@@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr)
__free_fdtable(new_fdt);
return -EMFILE;
}
- /*
- * Check again since another task may have expanded the fd table while
- * we dropped the lock
- */
cur_fdt = files_fdtable(files);
- if (nr >= cur_fdt->max_fds) {
- /* Continue as planned */
- copy_fdtable(new_fdt, cur_fdt);
- rcu_assign_pointer(files->fdt, new_fdt);
- if (cur_fdt != &files->fdtab)
- call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
- } else {
- /* Somebody else expanded, so undo our attempt */
- __free_fdtable(new_fdt);
- }
+ BUG_ON(nr < cur_fdt->max_fds);
+ copy_fdtable(new_fdt, cur_fdt);
+ rcu_assign_pointer(files->fdt, new_fdt);
+ if (cur_fdt != &files->fdtab)
+ call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
+ /* coupled with smp_rmb() in __fd_install() */
+ smp_wmb();
return 1;
}
@@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr)
* The files->file_lock should be held on entry, and will be held on exit.
*/
static int expand_files(struct files_struct *files, int nr)
+ __releases(files->file_lock)
+ __acquires(files->file_lock)
{
struct fdtable *fdt;
+ int expanded = 0;
+repeat:
fdt = files_fdtable(files);
/* Do we need to expand? */
if (nr < fdt->max_fds)
- return 0;
+ return expanded;
/* Can we expand? */
if (nr >= sysctl_nr_open)
return -EMFILE;