From f47ec3f28354795f000c14bf18ed967ec81a3ec3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 21 Nov 2011 21:15:42 -0500 Subject: trim fs/internal.h some stuff in there can actually become static; some belongs to pnode.h as it's a private interface between namespace.c and pnode.c... Signed-off-by: Al Viro --- fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index afd0f1ad45e0..66a12f9bfc20 100644 --- a/fs/super.c +++ b/fs/super.c @@ -210,7 +210,7 @@ static inline void destroy_super(struct super_block *s) /* * Drop a superblock's refcount. The caller must hold sb_lock. */ -void __put_super(struct super_block *sb) +static void __put_super(struct super_block *sb) { if (!--sb->s_count) { list_del_init(&sb->s_list); @@ -225,7 +225,7 @@ void __put_super(struct super_block *sb) * Drops a temporary reference, frees superblock if there's no * references left. */ -void put_super(struct super_block *sb) +static void put_super(struct super_block *sb) { spin_lock(&sb_lock); __put_super(sb); -- cgit v1.2.3 From a5166169f9b920cae3c503910cb66a3ac5dd846d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Dec 2011 22:53:00 -0500 Subject: vfs: convert fs_supers to hlist Signed-off-by: Al Viro --- fs/super.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 66a12f9bfc20..bab11bad13ba 100644 --- a/fs/super.c +++ b/fs/super.c @@ -136,7 +136,7 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_LIST_HEAD(&s->s_files); #endif s->s_bdi = &default_backing_dev_info; - INIT_LIST_HEAD(&s->s_instances); + INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); @@ -328,7 +328,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock) bool grab_super_passive(struct super_block *sb) { spin_lock(&sb_lock); - if (list_empty(&sb->s_instances)) { + if (hlist_unhashed(&sb->s_instances)) { spin_unlock(&sb_lock); return false; } @@ -400,7 +400,7 @@ void generic_shutdown_super(struct super_block *sb) } spin_lock(&sb_lock); /* should be initialized for __put_super_and_need_restart() */ - list_del_init(&sb->s_instances); + hlist_del_init(&sb->s_instances); spin_unlock(&sb_lock); up_write(&sb->s_umount); } @@ -420,13 +420,14 @@ struct super_block *sget(struct file_system_type *type, void *data) { struct super_block *s = NULL; + struct hlist_node *node; struct super_block *old; int err; retry: spin_lock(&sb_lock); if (test) { - list_for_each_entry(old, &type->fs_supers, s_instances) { + hlist_for_each_entry(old, node, &type->fs_supers, s_instances) { if (!test(old, data)) continue; if (!grab_super(old)) @@ -462,7 +463,7 @@ retry: s->s_type = type; strlcpy(s->s_id, type->name, sizeof(s->s_id)); list_add_tail(&s->s_list, &super_blocks); - list_add(&s->s_instances, &type->fs_supers); + hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); register_shrinker(&s->s_shrink); @@ -497,7 +498,7 @@ void sync_supers(void) spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (list_empty(&sb->s_instances)) + if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_op->write_super && sb->s_dirt) { sb->s_count++; @@ -533,7 +534,7 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (list_empty(&sb->s_instances)) + if (hlist_unhashed(&sb->s_instances)) continue; sb->s_count++; spin_unlock(&sb_lock); @@ -566,9 +567,10 @@ void iterate_supers_type(struct file_system_type *type, void (*f)(struct super_block *, void *), void *arg) { struct super_block *sb, *p = NULL; + struct hlist_node *node; spin_lock(&sb_lock); - list_for_each_entry(sb, &type->fs_supers, s_instances) { + hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) { sb->s_count++; spin_unlock(&sb_lock); @@ -607,7 +609,7 @@ struct super_block *get_super(struct block_device *bdev) spin_lock(&sb_lock); rescan: list_for_each_entry(sb, &super_blocks, s_list) { - if (list_empty(&sb->s_instances)) + if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { sb->s_count++; @@ -647,7 +649,7 @@ struct super_block *get_active_super(struct block_device *bdev) restart: spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (list_empty(&sb->s_instances)) + if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { if (grab_super(sb)) /* drops sb_lock */ @@ -667,7 +669,7 @@ struct super_block *user_get_super(dev_t dev) spin_lock(&sb_lock); rescan: list_for_each_entry(sb, &super_blocks, s_list) { - if (list_empty(&sb->s_instances)) + if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_dev == dev) { sb->s_count++; @@ -756,7 +758,7 @@ static void do_emergency_remount(struct work_struct *work) spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (list_empty(&sb->s_instances)) + if (hlist_unhashed(&sb->s_instances)) continue; sb->s_count++; spin_unlock(&sb_lock); -- cgit v1.2.3 From dabe0dc194d5d56d379a8994fff47392744b6491 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 3 Jan 2012 21:01:29 -0500 Subject: vfs: fix the rest of sget() races unfortunately, just checking MS_BORN after having grabbed ->s_umount in sget() is not enough; places that pick superblock from a list and grab s_umount shared need the same check in addition to checking for ->s_root; otherwise three-way race between failing mount, sget() and such list-walker can leave us with list-walker coming *second*, when temporary active ref grabbed by sget() (to be dropped when sget() notices that original mount has failed by checking MS_BORN) has lead to deactivate_locked_super() from failing ->mount() *not* doing ->kill_sb() and just releasing ->s_umount. Once sget() gets through and notices that MS_BORN had never been set it will drop the active ref and fs will be shut down and kicked out of all lists, but it's too late for something like sync_supers(). Signed-off-by: Al Viro --- fs/super.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index bab11bad13ba..0413f51a9f0f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -337,7 +337,7 @@ bool grab_super_passive(struct super_block *sb) spin_unlock(&sb_lock); if (down_read_trylock(&sb->s_umount)) { - if (sb->s_root) + if (sb->s_root && (sb->s_flags & MS_BORN)) return true; up_read(&sb->s_umount); } @@ -505,7 +505,7 @@ void sync_supers(void) spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root && sb->s_dirt) + if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN)) sb->s_op->write_super(sb); up_read(&sb->s_umount); @@ -540,7 +540,7 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root) + if (sb->s_root && (sb->s_flags & MS_BORN)) f(sb, arg); up_read(&sb->s_umount); @@ -575,7 +575,7 @@ void iterate_supers_type(struct file_system_type *type, spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root) + if (sb->s_root && (sb->s_flags & MS_BORN)) f(sb, arg); up_read(&sb->s_umount); @@ -616,7 +616,7 @@ rescan: spin_unlock(&sb_lock); down_read(&sb->s_umount); /* still alive? */ - if (sb->s_root) + if (sb->s_root && (sb->s_flags & MS_BORN)) return sb; up_read(&sb->s_umount); /* nope, got unmounted */ @@ -676,7 +676,7 @@ rescan: spin_unlock(&sb_lock); down_read(&sb->s_umount); /* still alive? */ - if (sb->s_root) + if (sb->s_root && (sb->s_flags & MS_BORN)) return sb; up_read(&sb->s_umount); /* nope, got unmounted */ @@ -763,7 +763,8 @@ static void do_emergency_remount(struct work_struct *work) sb->s_count++; spin_unlock(&sb_lock); down_write(&sb->s_umount); - if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { + if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) && + !(sb->s_flags & MS_RDONLY)) { /* * What lock protects sb->s_flags?? */ @@ -1146,6 +1147,11 @@ int freeze_super(struct super_block *sb) return -EBUSY; } + if (!(sb->s_flags & MS_BORN)) { + up_write(&sb->s_umount); + return 0; /* sic - it's "nothing to do" */ + } + if (sb->s_flags & MS_RDONLY) { sb->s_frozen = SB_FREEZE_TRANS; smp_wmb(); -- cgit v1.2.3 From 39f7c4db1d2d9e2e2a90abdf34811783089d217d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Nov 2011 12:11:30 +0100 Subject: vfs: keep list of mounts for each superblock Keep track of vfsmounts belonging to a superblock. List is protected by vfsmount_lock. Signed-off-by: Miklos Szeredi Tested-by: Toshiyuki Okajima Signed-off-by: Al Viro --- fs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 0413f51a9f0f..993ca8f128d6 100644 --- a/fs/super.c +++ b/fs/super.c @@ -142,6 +142,7 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_LIST_HEAD(&s->s_dentry_lru); INIT_LIST_HEAD(&s->s_inode_lru); spin_lock_init(&s->s_inode_lru_lock); + INIT_LIST_HEAD(&s->s_mounts); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); lockdep_set_class(&s->s_umount, &type->s_umount_key); @@ -200,6 +201,7 @@ static inline void destroy_super(struct super_block *s) free_percpu(s->s_files); #endif security_sb_free(s); + WARN_ON(!list_empty(&s->s_mounts)); kfree(s->s_subtype); kfree(s->s_options); kfree(s); -- cgit v1.2.3 From 4ed5e82fe77f4147cf386327c9a63a2dd7eff518 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Nov 2011 12:11:31 +0100 Subject: vfs: protect remounting superblock read-only Currently remouting superblock read-only is racy in a major way. With the per mount read-only infrastructure it is now possible to prevent most races, which this patch attempts. Before starting the remount read-only, iterate through all mounts belonging to the superblock and if none of them have any pending writes, set sb->s_readonly_remount. This indicates that remount is in progress and no further write requests are allowed. If the remount succeeds set MS_RDONLY and reset s_readonly_remount. If the remounting is unsuccessful just reset s_readonly_remount. This can result in transient EROFS errors, despite the fact the remount failed. Unfortunately hodling off writes is difficult as remount itself may touch the filesystem (e.g. through load_nls()) which would deadlock. A later patch deals with delayed writes due to nlink going to zero. Signed-off-by: Miklos Szeredi Tested-by: Toshiyuki Okajima Signed-off-by: Al Viro --- fs/super.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 993ca8f128d6..6acc02237e3e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -723,23 +723,33 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ if (remount_ro) { - if (force) + if (force) { mark_files_ro(sb); - else if (!fs_may_remount_ro(sb)) - return -EBUSY; + } else { + retval = sb_prepare_remount_readonly(sb); + if (retval) + return retval; + + retval = -EBUSY; + if (!fs_may_remount_ro(sb)) + goto cancel_readonly; + } } if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); if (retval) { if (!force) - return retval; + goto cancel_readonly; /* If forced remount, go ahead despite any errors */ WARN(1, "forced remount of a %s fs returned %i\n", sb->s_type->name, retval); } } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); + /* Needs to be ordered wrt mnt_is_readonly() */ + smp_wmb(); + sb->s_readonly_remount = 0; /* * Some filesystems modify their metadata via some other path than the @@ -752,6 +762,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if (remount_ro && sb->s_bdev) invalidate_bdev(sb->s_bdev); return 0; + +cancel_readonly: + sb->s_readonly_remount = 0; + return retval; } static void do_emergency_remount(struct work_struct *work) -- cgit v1.2.3 From 8e8b87964bc8dc5c14b6543fc933b7725f07d3ac Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Nov 2011 12:11:33 +0100 Subject: vfs: prevent remount read-only if pending removes If there are any inodes on the super block that have been unlinked (i_nlink == 0) but have not yet been deleted then prevent the remounting the super block read-only. Reported-by: Toshiyuki Okajima Signed-off-by: Miklos Szeredi Tested-by: Toshiyuki Okajima Signed-off-by: Al Viro --- fs/super.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 6acc02237e3e..de41e1e46f09 100644 --- a/fs/super.c +++ b/fs/super.c @@ -729,10 +729,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) retval = sb_prepare_remount_readonly(sb); if (retval) return retval; - - retval = -EBUSY; - if (!fs_may_remount_ro(sb)) - goto cancel_readonly; } } -- cgit v1.2.3