From 3b4798cbc13dd8d1150aa6377f97f0e11450a67d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 15 Dec 2009 16:45:32 -0800 Subject: oom-kill: show virtual size and rss information of the killed process In a typical oom analysis scenario, we frequently want to know whether the killed process has a memory leak or not at the first step. This patch adds vsz and rss information to the oom log to help this analysis. To save time for the debugging. example: =================================================================== rsyslogd invoked oom-killer: gfp_mask=0x201da, order=0, oom_adj=0 Pid: 1308, comm: rsyslogd Not tainted 2.6.32-rc6 #24 Call Trace: [] ?_spin_unlock+0x2b/0x40 [] oom_kill_process+0xbe/0x2b0 (snip) 492283 pages non-shared Out of memory: kill process 2341 (memhog) score 527276 or a child Killed process 2341 (memhog) vsz:1054552kB, anon-rss:970588kB, file-rss:4kB =========================================================================== ^ | here [rientjes@google.com: fix race, add pid & comm to message] Signed-off-by: KOSAKI Motohiro Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 492c98624fc1..6bb8a7a7ec9a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -352,6 +352,8 @@ static void dump_header(gfp_t gfp_mask, int order, struct mem_cgroup *mem) dump_tasks(mem); } +#define K(x) ((x) << (PAGE_SHIFT-10)) + /* * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO @@ -365,15 +367,23 @@ static void __oom_kill_task(struct task_struct *p, int verbose) return; } + task_lock(p); if (!p->mm) { WARN_ON(1); - printk(KERN_WARNING "tried to kill an mm-less task!\n"); + printk(KERN_WARNING "tried to kill an mm-less task %d (%s)!\n", + task_pid_nr(p), p->comm); + task_unlock(p); return; } if (verbose) - printk(KERN_ERR "Killed process %d (%s)\n", - task_pid_nr(p), p->comm); + printk(KERN_ERR "Killed process %d (%s) " + "vsz:%lukB, anon-rss:%lukB, file-rss:%lukB\n", + task_pid_nr(p), p->comm, + K(p->mm->total_vm), + K(get_mm_counter(p->mm, anon_rss)), + K(get_mm_counter(p->mm, file_rss))); + task_unlock(p); /* * We give our sacrificial lamb high priority and access to -- cgit v1.2.3 From 4365a5676fa3aa1d5ae6c90c22a0044f09ba584e Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 15 Dec 2009 16:45:33 -0800 Subject: oom-kill: fix NUMA constraint check with nodemask Fix node-oriented allocation handling in oom-kill.c I myself think of this as a bugfix not as an ehnancement. In these days, things are changed as - alloc_pages() eats nodemask as its arguments, __alloc_pages_nodemask(). - mempolicy don't maintain its own private zonelists. (And cpuset doesn't use nodemask for __alloc_pages_nodemask()) So, current oom-killer's check function is wrong. This patch does - check nodemask, if nodemask && nodemask doesn't cover all node_states[N_HIGH_MEMORY], this is CONSTRAINT_MEMORY_POLICY. - Scan all zonelist under nodemask, if it hits cpuset's wall this faiulre is from cpuset. And - modifies the caller of out_of_memory not to call oom if __GFP_THISNODE. This doesn't change "current" behavior. If callers use __GFP_THISNODE it should handle "page allocation failure" by itself. - handle __GFP_NOFAIL+__GFP_THISNODE path. This is something like a FIXME but this gfpmask is not used now. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KAMEZAWA Hiroyuki Acked-by: David Rientjes Cc: Daisuke Nishimura Cc: KOSAKI Motohiro Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sysrq.c | 2 +- include/linux/oom.h | 4 +++- mm/oom_kill.c | 46 +++++++++++++++++++++++++++++++++------------- mm/page_alloc.c | 22 ++++++++++++++++------ 4 files changed, 53 insertions(+), 21 deletions(-) diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 44203ff599da..1ae2de7d8b4f 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -339,7 +339,7 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(struct work_struct *ignored) { - out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0); + out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0, NULL); } static DECLARE_WORK(moom_work, moom_callback); diff --git a/include/linux/oom.h b/include/linux/oom.h index 6aac5fe4f6f1..537662315627 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -10,6 +10,7 @@ #ifdef __KERNEL__ #include +#include struct zonelist; struct notifier_block; @@ -26,7 +27,8 @@ enum oom_constraint { extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags); extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); -extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); +extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, + int order, nodemask_t *mask); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6bb8a7a7ec9a..25c679e0288a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -196,27 +196,46 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) /* * Determine the type of allocation constraint. */ -static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, - gfp_t gfp_mask) -{ #ifdef CONFIG_NUMA +static enum oom_constraint constrained_alloc(struct zonelist *zonelist, + gfp_t gfp_mask, nodemask_t *nodemask) +{ struct zone *zone; struct zoneref *z; enum zone_type high_zoneidx = gfp_zone(gfp_mask); - nodemask_t nodes = node_states[N_HIGH_MEMORY]; - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) - if (cpuset_zone_allowed_softwall(zone, gfp_mask)) - node_clear(zone_to_nid(zone), nodes); - else - return CONSTRAINT_CPUSET; + /* + * Reach here only when __GFP_NOFAIL is used. So, we should avoid + * to kill current.We have to random task kill in this case. + * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. + */ + if (gfp_mask & __GFP_THISNODE) + return CONSTRAINT_NONE; - if (!nodes_empty(nodes)) + /* + * The nodemask here is a nodemask passed to alloc_pages(). Now, + * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy + * feature. mempolicy is an only user of nodemask here. + * check mempolicy's nodemask contains all N_HIGH_MEMORY + */ + if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) return CONSTRAINT_MEMORY_POLICY; -#endif + + /* Check this allocation failure is caused by cpuset's wall function */ + for_each_zone_zonelist_nodemask(zone, z, zonelist, + high_zoneidx, nodemask) + if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) + return CONSTRAINT_CPUSET; return CONSTRAINT_NONE; } +#else +static enum oom_constraint constrained_alloc(struct zonelist *zonelist, + gfp_t gfp_mask, nodemask_t *nodemask) +{ + return CONSTRAINT_NONE; +} +#endif /* * Simple selection loop. We chose the process with the highest @@ -613,7 +632,8 @@ rest_and_return: * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) +void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, + int order, nodemask_t *nodemask) { unsigned long freed = 0; enum oom_constraint constraint; @@ -632,7 +652,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) * Check if there were limitations on the allocation (only relevant for * NUMA) that may require different handling. */ - constraint = constrained_alloc(zonelist, gfp_mask); + constraint = constrained_alloc(zonelist, gfp_mask, nodemask); read_lock(&tasklist_lock); switch (constraint) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 59d2e88fb47c..850c4a7e2fe5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1654,12 +1654,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, if (page) goto out; - /* The OOM killer will not help higher order allocs */ - if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) - goto out; - + if (!(gfp_mask & __GFP_NOFAIL)) { + /* The OOM killer will not help higher order allocs */ + if (order > PAGE_ALLOC_COSTLY_ORDER) + goto out; + /* + * GFP_THISNODE contains __GFP_NORETRY and we never hit this. + * Sanity check for bare calls of __GFP_THISNODE, not real OOM. + * The caller should handle page allocation failure by itself if + * it specifies __GFP_THISNODE. + * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER. + */ + if (gfp_mask & __GFP_THISNODE) + goto out; + } /* Exhausted what can be done so it's blamo time */ - out_of_memory(zonelist, gfp_mask, order); + out_of_memory(zonelist, gfp_mask, order, nodemask); out: clear_zonelist_oom(zonelist, gfp_mask); @@ -3123,7 +3133,7 @@ static int __cpuinit process_zones(int cpu) if (percpu_pagelist_fraction) setup_pagelist_highmark(zone_pcp(zone, cpu), - (zone->present_pages / percpu_pagelist_fraction)); + (zone->present_pages / percpu_pagelist_fraction)); } return 0; -- cgit v1.2.3 From f065f41f48569122b5bcddbd1ba2354f7cc29fdc Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Tue, 15 Dec 2009 16:45:34 -0800 Subject: timecompare: fix half-Y2K38 problem in timecompare_update while calculating offset ktime will overflow from 03:14:07 UTC on Tuesday, 19 January 2038, ktime_add() in timecompare_update() will overflow a half earlier. As a result, wrong offset will be gotten, then cause some strange problems. Signed-off-by: Barry Song <21cnbao@gmail.com> Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Patrick Ohly Cc: David S. Miller Cc: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timecompare.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c index 96ff643a5a59..12f5c55090be 100644 --- a/kernel/time/timecompare.c +++ b/kernel/time/timecompare.c @@ -89,7 +89,7 @@ int timecompare_offset(struct timecompare *sync, * source time */ sample.offset = - ktime_to_ns(ktime_add(end, start)) / 2 - + (ktime_to_ns(end) + ktime_to_ns(start)) / 2 - ts; /* simple insertion sort based on duration */ -- cgit v1.2.3 From 135d5655dc58a24eda64e3f6c192d7d605e10050 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 15 Dec 2009 16:45:39 -0800 Subject: proc: rename de_get() to pde_get() and inline it * de_get() is trivial -- make inline, save a few bits of code, drop "refcount is 0" check -- it should be done in some generic refcount code, don't recall it's was helpful * rename GET and PUT functions to pde_get(), pde_put() for cool prefix! * remove obvious and incorrent comments * in remove_proc_entry() use pde_put(), when I fixed PDE refcounting to be normal one, remove_proc_entry() was supposed to do "-1" and code now reflects that. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 21 +++++++++++++-------- fs/proc/inode.c | 31 ++++--------------------------- fs/proc/internal.h | 10 ++++++---- 3 files changed, 23 insertions(+), 39 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index fa678abc9db1..480cb1065eec 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -429,7 +429,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, unsigned int ino; ino = de->low_ino; - de_get(de); + pde_get(de); spin_unlock(&proc_subdir_lock); error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); @@ -445,7 +445,7 @@ out_unlock: return NULL; } if (de) - de_put(de); + pde_put(de); return ERR_PTR(error); } @@ -509,17 +509,17 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, struct proc_dir_entry *next; /* filldir passes info to user space */ - de_get(de); + pde_get(de); spin_unlock(&proc_subdir_lock); if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) { - de_put(de); + pde_put(de); goto out; } spin_lock(&proc_subdir_lock); filp->f_pos++; next = de->next; - de_put(de); + pde_put(de); de = next; } while (de); spin_unlock(&proc_subdir_lock); @@ -763,7 +763,7 @@ out: return NULL; } -void free_proc_entry(struct proc_dir_entry *de) +static void free_proc_entry(struct proc_dir_entry *de) { unsigned int ino = de->low_ino; @@ -777,6 +777,12 @@ void free_proc_entry(struct proc_dir_entry *de) kfree(de); } +void pde_put(struct proc_dir_entry *pde) +{ + if (atomic_dec_and_test(&pde->count)) + free_proc_entry(pde); +} + /* * Remove a /proc entry and free it if it's not currently in use. */ @@ -845,6 +851,5 @@ continue_removing: WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " "'%s/%s', leaking at least '%s'\n", __func__, de->parent->name, de->name, de->subdir->name); - if (atomic_dec_and_test(&de->count)) - free_proc_entry(de); + pde_put(de); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d78ade305541..445a02bcaab3 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -24,29 +24,6 @@ #include "internal.h" -struct proc_dir_entry *de_get(struct proc_dir_entry *de) -{ - atomic_inc(&de->count); - return de; -} - -/* - * Decrements the use count and checks for deferred deletion. - */ -void de_put(struct proc_dir_entry *de) -{ - if (!atomic_read(&de->count)) { - printk("de_put: entry %s already free!\n", de->name); - return; - } - - if (atomic_dec_and_test(&de->count)) - free_proc_entry(de); -} - -/* - * Decrement the use count of the proc_dir_entry. - */ static void proc_delete_inode(struct inode *inode) { struct proc_dir_entry *de; @@ -59,7 +36,7 @@ static void proc_delete_inode(struct inode *inode) /* Let go of any associated proc directory entry */ de = PROC_I(inode)->pde; if (de) - de_put(de); + pde_put(de); if (PROC_I(inode)->sysctl) sysctl_head_put(PROC_I(inode)->sysctl); clear_inode(inode); @@ -480,7 +457,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, } unlock_new_inode(inode); } else - de_put(de); + pde_put(de); return inode; } @@ -495,7 +472,7 @@ int proc_fill_super(struct super_block *s) s->s_op = &proc_sops; s->s_time_gran = 1; - de_get(&proc_root); + pde_get(&proc_root); root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); if (!root_inode) goto out_no_root; @@ -509,6 +486,6 @@ int proc_fill_super(struct super_block *s) out_no_root: printk("proc_read_super: get root inode failed\n"); iput(root_inode); - de_put(&proc_root); + pde_put(&proc_root); return -ENOMEM; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 753ca37002c8..1f24a3eddd12 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -61,8 +61,6 @@ extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; -void free_proc_entry(struct proc_dir_entry *de); - void proc_init_inodecache(void); static inline struct pid *proc_pid(struct inode *inode) @@ -101,8 +99,12 @@ unsigned long task_vsize(struct mm_struct *); int task_statm(struct mm_struct *, int *, int *, int *, int *); void task_mem(struct seq_file *, struct mm_struct *); -struct proc_dir_entry *de_get(struct proc_dir_entry *de); -void de_put(struct proc_dir_entry *de); +static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) +{ + atomic_inc(&pde->count); + return pde; +} +void pde_put(struct proc_dir_entry *pde); extern struct vfsmount *proc_mnt; int proc_fill_super(struct super_block *); -- cgit v1.2.3 From 9ea9a886b0e8630e12cff515955e7f0f5be32cb1 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 15 Dec 2009 16:45:39 -0800 Subject: vt: make the default cursor shape configurable For embedded systems, the blinking cursor at startup time can be annoying and unintended. Add a new kernel parameter to change the default cursor shape. Signed-off-by: Clemens Ladisch Cc: Daniel Mack Acked-by: Pavel Machek Cc: David Newall Cc: Alan Cox Cc: Greg Kroah-Hartman Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 5 +++++ drivers/char/vt.c | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ab95d3ada5c7..c309515ae959 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2729,6 +2729,11 @@ and is between 256 and 4096 characters. It is defined in the file vmpoff= [KNL,S390] Perform z/VM CP command after power off. Format: + vt.cur_default= [VT] Default cursor shape. + Format: 0xCCBBAA, where AA, BB, and CC are the same as + the parameters of the [?A;B;Cc escape sequence; + see VGA-softcursor.txt. Default: 2 = underline. + vt.default_blu= [VT] Format: ,,,..., Change the default blue palette of the console. diff --git a/drivers/char/vt.c b/drivers/char/vt.c index e43fbc66aef0..50faa1fb0f06 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -164,6 +164,9 @@ module_param(default_utf8, int, S_IRUGO | S_IWUSR); int global_cursor_default = -1; module_param(global_cursor_default, int, S_IRUGO | S_IWUSR); +static int cur_default = CUR_DEFAULT; +module_param(cur_default, int, S_IRUGO | S_IWUSR); + /* * ignore_poke: don't unblank the screen when things are typed. This is * mainly for the privacy of braille terminal users. @@ -1636,7 +1639,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) /* do not do set_leds here because this causes an endless tasklet loop when the keyboard hasn't been initialized yet */ - vc->vc_cursor_type = CUR_DEFAULT; + vc->vc_cursor_type = cur_default; vc->vc_complement_mask = vc->vc_s_complement_mask; default_attr(vc); @@ -1838,7 +1841,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) if (vc->vc_par[0]) vc->vc_cursor_type = vc->vc_par[0] | (vc->vc_par[1] << 8) | (vc->vc_par[2] << 16); else - vc->vc_cursor_type = CUR_DEFAULT; + vc->vc_cursor_type = cur_default; return; } break; -- cgit v1.2.3 From 4f8427d1903148f9753eb35c5f51a8a865457329 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:42 -0800 Subject: autofs4: use helper functions for active list handling Define some simple helper functions for adding and deleting entries on the active (and unhashed) dentry list. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/autofs_i.h | 2 ++ fs/autofs4/inode.c | 1 + fs/autofs4/root.c | 46 +++++++++++++++++++++++++++++++++++----------- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 8f7cdde41733..f3cf151a59e3 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -75,6 +75,8 @@ struct autofs_info { struct completion expire_complete; struct list_head active; + int active_count; + struct list_head expiring; struct autofs_sb_info *sbi; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 69c8142da838..4670a7818eac 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -49,6 +49,7 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, ino->dentry = NULL; ino->size = 0; INIT_LIST_HEAD(&ino->active); + ino->active_count = 0; INIT_LIST_HEAD(&ino->expiring); atomic_set(&ino->count, 0); } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index b96a3c57359d..67d8d962168d 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -72,6 +72,38 @@ const struct inode_operations autofs4_dir_inode_operations = { .rmdir = autofs4_dir_rmdir, }; +static void autofs4_add_active(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino) { + spin_lock(&sbi->lookup_lock); + if (!ino->active_count) { + if (list_empty(&ino->active)) + list_add(&ino->active, &sbi->active_list); + } + ino->active_count++; + spin_unlock(&sbi->lookup_lock); + } + return; +} + +static void autofs4_del_active(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino) { + spin_lock(&sbi->lookup_lock); + ino->active_count--; + if (!ino->active_count) { + if (!list_empty(&ino->active)) + list_del_init(&ino->active); + } + spin_unlock(&sbi->lookup_lock); + } + return; +} + static int autofs4_dir_open(struct inode *inode, struct file *file) { struct dentry *dentry = file->f_path.dentry; @@ -513,9 +545,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s dentry->d_fsdata = ino; ino->dentry = dentry; - spin_lock(&sbi->lookup_lock); - list_add(&ino->active, &sbi->active_list); - spin_unlock(&sbi->lookup_lock); + autofs4_add_active(dentry); d_instantiate(dentry, NULL); } @@ -624,10 +654,7 @@ static int autofs4_dir_symlink(struct inode *dir, if (!ino) return -ENOMEM; - spin_lock(&sbi->lookup_lock); - if (!list_empty(&ino->active)) - list_del_init(&ino->active); - spin_unlock(&sbi->lookup_lock); + autofs4_del_active(dentry); ino->size = strlen(symname); cp = kmalloc(ino->size + 1, GFP_KERNEL); @@ -775,10 +802,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (!ino) return -ENOMEM; - spin_lock(&sbi->lookup_lock); - if (!list_empty(&ino->active)) - list_del_init(&ino->active); - spin_unlock(&sbi->lookup_lock); + autofs4_del_active(dentry); inode = autofs4_get_inode(dir->i_sb, ino); if (!inode) { -- cgit v1.2.3 From c4cd70b3e3e95cc2201a00edf6deb52327d73c6b Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:43 -0800 Subject: autofs4: use helper functions for expiring list Define some simple helper functions for adding and deleting entries on the expiring dentry list. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/autofs_i.h | 26 ++++++++++++++++++++++++++ fs/autofs4/root.c | 15 +++------------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index f3cf151a59e3..fe9fc235ee75 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -266,5 +266,31 @@ out: return ret; } +static inline void autofs4_add_expiring(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino) { + spin_lock(&sbi->lookup_lock); + if (list_empty(&ino->expiring)) + list_add(&ino->expiring, &sbi->expiring_list); + spin_unlock(&sbi->lookup_lock); + } + return; +} + +static inline void autofs4_del_expiring(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino) { + spin_lock(&sbi->lookup_lock); + if (!list_empty(&ino->expiring)) + list_del_init(&ino->expiring); + spin_unlock(&sbi->lookup_lock); + } + return; +} + void autofs4_dentry_release(struct dentry *); extern void autofs4_kill_sb(struct super_block *); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 67d8d962168d..2954ac5fec75 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -563,10 +563,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s */ ino = autofs4_dentry_ino(expiring); autofs4_expire_wait(expiring); - spin_lock(&sbi->lookup_lock); - if (!list_empty(&ino->expiring)) - list_del_init(&ino->expiring); - spin_unlock(&sbi->lookup_lock); + autofs4_del_expiring(expiring); dput(expiring); } @@ -732,10 +729,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) dir->i_mtime = CURRENT_TIME; spin_lock(&dcache_lock); - spin_lock(&sbi->lookup_lock); - if (list_empty(&ino->expiring)) - list_add(&ino->expiring, &sbi->expiring_list); - spin_unlock(&sbi->lookup_lock); + autofs4_add_expiring(dentry); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -761,10 +755,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) spin_unlock(&dcache_lock); return -ENOTEMPTY; } - spin_lock(&sbi->lookup_lock); - if (list_empty(&ino->expiring)) - list_add(&ino->expiring, &sbi->expiring_list); - spin_unlock(&sbi->lookup_lock); + autofs4_add_expiring(dentry); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); -- cgit v1.2.3 From 36b6413ef301d30f60037e497ecb902897895473 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:44 -0800 Subject: autofs4: use helper function for need mount check Define simple helper function for checking if we need to trigger a mount. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2954ac5fec75..f6e8ca9ea56a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -104,6 +104,14 @@ static void autofs4_del_active(struct dentry *dentry) return; } +static unsigned int autofs4_need_mount(unsigned int flags) +{ + unsigned int res = 0; + if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS)) + res = 1; + return res; +} + static int autofs4_dir_open(struct inode *inode, struct file *file) { struct dentry *dentry = file->f_path.dentry; @@ -168,7 +176,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) } /* Trigger mount for path component or follow link */ } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING || - flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) || + autofs4_need_mount(flags) || current->link_count) { DPRINTK("waiting for mount name=%.*s", dentry->d_name.len, dentry->d_name.name); @@ -234,7 +242,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) autofs4_expire_wait(dentry); /* We trigger a mount for almost all flags */ - lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS); + lookup_type = autofs4_need_mount(nd->flags); if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING)) goto follow; -- cgit v1.2.3 From aa952eb26d4344fdad44c104f3c298d3130c53da Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:45 -0800 Subject: autofs4: use autofs_info for pending flag Eliminate the use of the d_lock spin lock by using the autofs super block info spin lock. This reduces the number of spin locks we use by one and makes the code for the following patch (to redirect ->d_revalidate() to ->lookup()) a little simpler. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/autofs_i.h | 3 ++- fs/autofs4/expire.c | 2 +- fs/autofs4/root.c | 58 ++++++++++++++++++++++++++++----------------------- 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index fe9fc235ee75..3d283abf67d7 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -97,6 +97,7 @@ struct autofs_info { #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ #define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ +#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ struct autofs_wait_queue { wait_queue_head_t queue; @@ -163,7 +164,7 @@ static inline int autofs4_ispending(struct dentry *dentry) { struct autofs_info *inf = autofs4_dentry_ino(dentry); - if (dentry->d_flags & DCACHE_AUTOFS_PENDING) + if (inf->flags & AUTOFS_INF_PENDING) return 1; if (inf->flags & AUTOFS_INF_EXPIRING) diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 3da18d453488..a796c9417fb1 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -27,7 +27,7 @@ static inline int autofs4_can_expire(struct dentry *dentry, return 0; /* No point expiring a pending mount */ - if (dentry->d_flags & DCACHE_AUTOFS_PENDING) + if (ino->flags & AUTOFS_INF_PENDING) return 0; if (!do_now) { diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index f6e8ca9ea56a..305136ba74b6 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -166,32 +166,32 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) /* Turn this into a real negative dentry? */ if (status == -ENOENT) { - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; - spin_unlock(&dentry->d_lock); + spin_lock(&sbi->fs_lock); + ino->flags &= ~AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); return status; } else if (status) { /* Return a negative dentry, but leave it "pending" */ return status; } /* Trigger mount for path component or follow link */ - } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING || + } else if (ino->flags & AUTOFS_INF_PENDING || autofs4_need_mount(flags) || current->link_count) { DPRINTK("waiting for mount name=%.*s", dentry->d_name.len, dentry->d_name.name); - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_AUTOFS_PENDING; - spin_unlock(&dentry->d_lock); + spin_lock(&sbi->fs_lock); + ino->flags |= AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); status = autofs4_wait(sbi, dentry, NFY_MOUNT); DPRINTK("mount done status=%d", status); if (status) { - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; - spin_unlock(&dentry->d_lock); + spin_lock(&sbi->fs_lock); + ino->flags &= ~AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); return status; } } @@ -200,9 +200,9 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) if (ino) ino->last_used = jiffies; - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; - spin_unlock(&dentry->d_lock); + spin_lock(&sbi->fs_lock); + ino->flags &= ~AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); return 0; } @@ -243,18 +243,23 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) /* We trigger a mount for almost all flags */ lookup_type = autofs4_need_mount(nd->flags); - if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING)) + spin_lock(&sbi->fs_lock); + spin_lock(&dcache_lock); + if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { + spin_unlock(&dcache_lock); + spin_unlock(&sbi->fs_lock); goto follow; + } /* * If the dentry contains directories then it is an autofs * multi-mount with no root mount offset. So don't try to * mount it again. */ - spin_lock(&dcache_lock); - if (dentry->d_flags & DCACHE_AUTOFS_PENDING || + if (ino->flags & AUTOFS_INF_PENDING || (!d_mountpoint(dentry) && __simple_empty(dentry))) { spin_unlock(&dcache_lock); + spin_unlock(&sbi->fs_lock); status = try_to_fill_dentry(dentry, 0); if (status) @@ -263,6 +268,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) goto follow; } spin_unlock(&dcache_lock); + spin_unlock(&sbi->fs_lock); follow: /* * If there is no root mount it must be an autofs @@ -525,9 +531,10 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); - if (unhashed) + if (unhashed) { dentry = unhashed; - else { + ino = autofs4_dentry_ino(dentry); + } else { /* * Mark the dentry incomplete but don't hash it. We do this * to serialize our inode creation operations (symlink and @@ -569,15 +576,14 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * be quite complete but the directory has been removed * so it must have been successful, so just wait for it. */ - ino = autofs4_dentry_ino(expiring); autofs4_expire_wait(expiring); autofs4_del_expiring(expiring); dput(expiring); } - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_AUTOFS_PENDING; - spin_unlock(&dentry->d_lock); + spin_lock(&sbi->fs_lock); + ino->flags |= AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); if (dentry->d_op && dentry->d_op->d_revalidate) (dentry->d_op->d_revalidate)(dentry, nd); mutex_lock(&dir->i_mutex); @@ -587,7 +593,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * If we are still pending, check if we had to handle * a signal. If so we can force a restart.. */ - if (dentry->d_flags & DCACHE_AUTOFS_PENDING) { + if (ino->flags & AUTOFS_INF_PENDING) { /* See if we were interrupted */ if (signal_pending(current)) { sigset_t *sigset = ¤t->pending.signal; @@ -600,9 +606,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s } } if (!oz_mode) { - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; - spin_unlock(&dentry->d_lock); + spin_lock(&sbi->fs_lock); + ino->flags &= ~AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); } } -- cgit v1.2.3 From 90387c9c1d5787aeb7dfdfc90c8f8aeaeed7ad0e Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:46 -0800 Subject: autofs4: renamer unhashed to active in autofs4_lookup() Rename the variable unhashed to active in autofs4_lookup() to better reflect its usage. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 305136ba74b6..961ff377db03 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -514,7 +514,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s { struct autofs_sb_info *sbi; struct autofs_info *ino; - struct dentry *expiring, *unhashed; + struct dentry *expiring, *active; int oz_mode; DPRINTK("name = %.*s", @@ -530,9 +530,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); - unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); - if (unhashed) { - dentry = unhashed; + active = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); + if (active) { + dentry = active; ino = autofs4_dentry_ino(dentry); } else { /* @@ -600,8 +600,8 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s if (sigismember (sigset, SIGKILL) || sigismember (sigset, SIGQUIT) || sigismember (sigset, SIGINT)) { - if (unhashed) - dput(unhashed); + if (active) + dput(active); return ERR_PTR(-ERESTARTNOINTR); } } @@ -633,14 +633,14 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s else dentry = ERR_PTR(-ENOENT); - if (unhashed) - dput(unhashed); + if (active) + dput(active); return dentry; } - if (unhashed) - return unhashed; + if (active) + return active; return NULL; } -- cgit v1.2.3 From 6510c9d8595adcee2b0dc86408bc432a8dd7d652 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:47 -0800 Subject: autofs4: cleanup active and expire lookup The lookup functions for active and expiring dentrys use parameters that can be easily obtained on entry so we change the call to to take just the dentry. This makes the subsequent change, to send all lookups to ->lookup(), a bit cleaner. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 961ff377db03..81700f4a19da 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -405,8 +405,11 @@ static const struct dentry_operations autofs4_dentry_operations = { .d_release = autofs4_dentry_release, }; -static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) +static struct dentry *autofs4_lookup_active(struct dentry *dentry) { + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct dentry *parent = dentry->d_parent; + struct qstr *name = &dentry->d_name; unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; @@ -457,8 +460,11 @@ next: return NULL; } -static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) +static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) { + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct dentry *parent = dentry->d_parent; + struct qstr *name = &dentry->d_name; unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; @@ -530,7 +536,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); - active = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); + active = autofs4_lookup_active(dentry); if (active) { dentry = active; ino = autofs4_dentry_ino(dentry); @@ -567,9 +573,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s if (!oz_mode) { mutex_unlock(&dir->i_mutex); - expiring = autofs4_lookup_expiring(sbi, - dentry->d_parent, - &dentry->d_name); + expiring = autofs4_lookup_expiring(dentry); if (expiring) { /* * If we are racing with expire the request might not -- cgit v1.2.3 From c42c7f7e698fa888abbd50eb9c8e328fff68914f Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:48 -0800 Subject: autofs4: eliminate d_unhashed in path walk checks We unhash the dentry (in a subsequent patch) in ->d_revalidate() in order to send mount requests to ->lookup(). But then we can not rely on d_unhased() to give reliable results because it may be called at any time by any code path. The d_unhashed() function is used by __simple_empty() in the path walking callbacks but autofs mount point dentrys should have no directories at all so a list_empty() on d_subdirs should be (and is) sufficient. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 81700f4a19da..b6530f321adb 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -133,7 +133,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) * it. */ spin_lock(&dcache_lock); - if (!d_mountpoint(dentry) && __simple_empty(dentry)) { + if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { spin_unlock(&dcache_lock); return -ENOENT; } @@ -257,7 +257,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) * mount it again. */ if (ino->flags & AUTOFS_INF_PENDING || - (!d_mountpoint(dentry) && __simple_empty(dentry))) { + (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); @@ -340,8 +340,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) /* Check for a non-mountpoint directory with no contents */ spin_lock(&dcache_lock); if (S_ISDIR(dentry->d_inode->i_mode) && - !d_mountpoint(dentry) && - __simple_empty(dentry)) { + !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); spin_unlock(&dcache_lock); -- cgit v1.2.3 From e4d5ade7b54cf74efcf53ff3dcb09454c29d70cf Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:49 -0800 Subject: autofs4: rename dentry to active in autofs4_lookup_active() In autofs4_lookup_active() a declaration within the list traversal loop uses a declaration that has the same name as the function parameter. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index b6530f321adb..e8a8881c3f45 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -419,23 +419,23 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) head = &sbi->active_list; list_for_each(p, head) { struct autofs_info *ino; - struct dentry *dentry; + struct dentry *active; struct qstr *qstr; ino = list_entry(p, struct autofs_info, active); - dentry = ino->dentry; + active = ino->dentry; - spin_lock(&dentry->d_lock); + spin_lock(&active->d_lock); /* Already gone? */ - if (atomic_read(&dentry->d_count) == 0) + if (atomic_read(&active->d_count) == 0) goto next; - qstr = &dentry->d_name; + qstr = &active->d_name; - if (dentry->d_name.hash != hash) + if (active->d_name.hash != hash) goto next; - if (dentry->d_parent != parent) + if (active->d_parent != parent) goto next; if (qstr->len != len) @@ -443,15 +443,15 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) if (memcmp(qstr->name, str, len)) goto next; - if (d_unhashed(dentry)) { - dget(dentry); - spin_unlock(&dentry->d_lock); + if (d_unhashed(active)) { + dget(active); + spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); - return dentry; + return active; } next: - spin_unlock(&dentry->d_lock); + spin_unlock(&active->d_lock); } spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); -- cgit v1.2.3 From cb4b492ac7595aad10756fe0b04691f0965e0cfc Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:50 -0800 Subject: autofs4: rename dentry to expiring in autofs4_lookup_expiring() In autofs4_lookup_expiring() a declaration within the list traversal loop uses a declaration that has the same name as the function parameter. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/root.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e8a8881c3f45..a015b49891df 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -474,23 +474,23 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) head = &sbi->expiring_list; list_for_each(p, head) { struct autofs_info *ino; - struct dentry *dentry; + struct dentry *expiring; struct qstr *qstr; ino = list_entry(p, struct autofs_info, expiring); - dentry = ino->dentry; + expiring = ino->dentry; - spin_lock(&dentry->d_lock); + spin_lock(&expiring->d_lock); /* Bad luck, we've already been dentry_iput */ - if (!dentry->d_inode) + if (!expiring->d_inode) goto next; - qstr = &dentry->d_name; + qstr = &expiring->d_name; - if (dentry->d_name.hash != hash) + if (expiring->d_name.hash != hash) goto next; - if (dentry->d_parent != parent) + if (expiring->d_parent != parent) goto next; if (qstr->len != len) @@ -498,15 +498,15 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) if (memcmp(qstr->name, str, len)) goto next; - if (d_unhashed(dentry)) { - dget(dentry); - spin_unlock(&dentry->d_lock); + if (d_unhashed(expiring)) { + dget(expiring); + spin_unlock(&expiring->d_lock); spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); - return dentry; + return expiring; } next: - spin_unlock(&dentry->d_lock); + spin_unlock(&expiring->d_lock); } spin_unlock(&sbi->lookup_lock); spin_unlock(&dcache_lock); -- cgit v1.2.3 From 213614d583748d00967a91cacd656f417efb36ce Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 15 Dec 2009 16:45:51 -0800 Subject: autofs4: always use lookup for lookup We need to be able to cope with the directory mutex being held during ->d_revalidate() in some cases, but not all cases, and not necessarily by us. Because we need to release the mutex when we call back to the daemon to do perform a mount we must be sure that it is us who holds the mutex so we must redirect mount requests to ->lookup() if the mutex is held. Signed-off-by: Ian Kent Cc: Sage Weil Cc: Al Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Yehuda Saheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/autofs_i.h | 7 + fs/autofs4/expire.c | 6 +- fs/autofs4/inode.c | 1 + fs/autofs4/root.c | 474 +++++++++++++++++++++++++++++++++----------------- 4 files changed, 330 insertions(+), 158 deletions(-) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 3d283abf67d7..0118d67221b2 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -60,6 +60,11 @@ do { \ current->pid, __func__, ##args); \ } while (0) +struct rehash_entry { + struct task_struct *task; + struct list_head list; +}; + /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never @@ -76,6 +81,7 @@ struct autofs_info { struct list_head active; int active_count; + struct list_head rehash_list; struct list_head expiring; @@ -98,6 +104,7 @@ struct autofs_info { #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ #define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ +#define AUTOFS_INF_REHASH (1<<3) /* dentry in transit to ->lookup() */ struct autofs_wait_queue { wait_queue_head_t queue; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index a796c9417fb1..74bc9aa6df31 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -279,6 +279,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, root->d_mounted--; } ino->flags |= AUTOFS_INF_EXPIRING; + autofs4_add_expiring(root); init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); return root; @@ -406,6 +407,7 @@ found: expired, (int)expired->d_name.len, expired->d_name.name); ino = autofs4_dentry_ino(expired); ino->flags |= AUTOFS_INF_EXPIRING; + autofs4_add_expiring(expired); init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); spin_lock(&dcache_lock); @@ -433,7 +435,7 @@ int autofs4_expire_wait(struct dentry *dentry) DPRINTK("expire done status=%d", status); - if (d_unhashed(dentry)) + if (d_unhashed(dentry) && IS_DEADDIR(dentry->d_inode)) return -EAGAIN; return status; @@ -473,6 +475,7 @@ int autofs4_expire_run(struct super_block *sb, spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); ino->flags &= ~AUTOFS_INF_EXPIRING; + autofs4_del_expiring(dentry); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); @@ -503,6 +506,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, ino->flags &= ~AUTOFS_INF_MOUNTPOINT; } ino->flags &= ~AUTOFS_INF_EXPIRING; + autofs4_del_expiring(dentry); complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); dput(dentry); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 4670a7818eac..d0a3de247458 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -49,6 +49,7 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, ino->dentry = NULL; ino->size = 0; INIT_LIST_HEAD(&ino->active); + INIT_LIST_HEAD(&ino->rehash_list); ino->active_count = 0; INIT_LIST_HEAD(&ino->expiring); atomic_set(&ino->count, 0); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index a015b49891df..30cc9ddf4b70 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -104,6 +104,99 @@ static void autofs4_del_active(struct dentry *dentry) return; } +static void autofs4_add_rehash_entry(struct autofs_info *ino, + struct rehash_entry *entry) +{ + entry->task = current; + INIT_LIST_HEAD(&entry->list); + list_add(&entry->list, &ino->rehash_list); + return; +} + +static void autofs4_remove_rehash_entry(struct autofs_info *ino) +{ + struct list_head *head = &ino->rehash_list; + struct rehash_entry *entry; + list_for_each_entry(entry, head, list) { + if (entry->task == current) { + list_del(&entry->list); + kfree(entry); + break; + } + } + return; +} + +static void autofs4_remove_rehash_entrys(struct autofs_info *ino) +{ + struct autofs_sb_info *sbi = ino->sbi; + struct rehash_entry *entry, *next; + struct list_head *head; + + spin_lock(&sbi->fs_lock); + spin_lock(&sbi->lookup_lock); + if (!(ino->flags & AUTOFS_INF_REHASH)) { + spin_unlock(&sbi->lookup_lock); + spin_unlock(&sbi->fs_lock); + return; + } + ino->flags &= ~AUTOFS_INF_REHASH; + head = &ino->rehash_list; + list_for_each_entry_safe(entry, next, head, list) { + list_del(&entry->list); + kfree(entry); + } + spin_unlock(&sbi->lookup_lock); + spin_unlock(&sbi->fs_lock); + dput(ino->dentry); + + return; +} + +static void autofs4_revalidate_drop(struct dentry *dentry, + struct rehash_entry *entry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + /* + * Add to the active list so we can pick this up in + * ->lookup(). Also add an entry to a rehash list so + * we know when there are no dentrys in flight so we + * know when we can rehash the dentry. + */ + spin_lock(&sbi->lookup_lock); + if (list_empty(&ino->active)) + list_add(&ino->active, &sbi->active_list); + autofs4_add_rehash_entry(ino, entry); + spin_unlock(&sbi->lookup_lock); + if (!(ino->flags & AUTOFS_INF_REHASH)) { + ino->flags |= AUTOFS_INF_REHASH; + dget(dentry); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + } + return; +} + +static void autofs4_revalidate_rehash(struct dentry *dentry) +{ + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino->flags & AUTOFS_INF_REHASH) { + spin_lock(&sbi->lookup_lock); + autofs4_remove_rehash_entry(ino); + if (list_empty(&ino->rehash_list)) { + spin_unlock(&sbi->lookup_lock); + ino->flags &= ~AUTOFS_INF_REHASH; + d_rehash(dentry); + dput(ino->dentry); + } else + spin_unlock(&sbi->lookup_lock); + } + return; +} + static unsigned int autofs4_need_mount(unsigned int flags) { unsigned int res = 0; @@ -143,7 +236,7 @@ out: return dcache_dir_open(inode, file); } -static int try_to_fill_dentry(struct dentry *dentry, int flags) +static int try_to_fill_dentry(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); @@ -156,55 +249,17 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) * Wait for a pending mount, triggering one if there * isn't one already */ - if (dentry->d_inode == NULL) { - DPRINTK("waiting for mount name=%.*s", - dentry->d_name.len, dentry->d_name.name); + DPRINTK("waiting for mount name=%.*s", + dentry->d_name.len, dentry->d_name.name); - status = autofs4_wait(sbi, dentry, NFY_MOUNT); + status = autofs4_wait(sbi, dentry, NFY_MOUNT); - DPRINTK("mount done status=%d", status); - - /* Turn this into a real negative dentry? */ - if (status == -ENOENT) { - spin_lock(&sbi->fs_lock); - ino->flags &= ~AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - return status; - } else if (status) { - /* Return a negative dentry, but leave it "pending" */ - return status; - } - /* Trigger mount for path component or follow link */ - } else if (ino->flags & AUTOFS_INF_PENDING || - autofs4_need_mount(flags) || - current->link_count) { - DPRINTK("waiting for mount name=%.*s", - dentry->d_name.len, dentry->d_name.name); + DPRINTK("mount done status=%d", status); - spin_lock(&sbi->fs_lock); - ino->flags |= AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - status = autofs4_wait(sbi, dentry, NFY_MOUNT); + /* Update expiry counter */ + ino->last_used = jiffies; - DPRINTK("mount done status=%d", status); - - if (status) { - spin_lock(&sbi->fs_lock); - ino->flags &= ~AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - return status; - } - } - - /* Initialize expiry counter after successful mount */ - if (ino) - ino->last_used = jiffies; - - spin_lock(&sbi->fs_lock); - ino->flags &= ~AUTOFS_INF_PENDING; - spin_unlock(&sbi->fs_lock); - - return 0; + return status; } /* For autofs direct mounts the follow link triggers the mount */ @@ -258,10 +313,16 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) */ if (ino->flags & AUTOFS_INF_PENDING || (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { + ino->flags |= AUTOFS_INF_PENDING; spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); - status = try_to_fill_dentry(dentry, 0); + status = try_to_fill_dentry(dentry); + + spin_lock(&sbi->fs_lock); + ino->flags &= ~AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); + if (status) goto out_error; @@ -300,18 +361,47 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *dir = dentry->d_parent->d_inode; struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); - int oz_mode = autofs4_oz_mode(sbi); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + struct rehash_entry *entry; int flags = nd ? nd->flags : 0; - int status = 1; + unsigned int mutex_aquired; + + DPRINTK("name = %.*s oz_mode = %d", + dentry->d_name.len, dentry->d_name.name, oz_mode); + + /* Daemon never causes a mount to trigger */ + if (autofs4_oz_mode(sbi)) + return 1; + + entry = kmalloc(sizeof(struct rehash_entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + mutex_aquired = mutex_trylock(&dir->i_mutex); - /* Pending dentry */ spin_lock(&sbi->fs_lock); + spin_lock(&dcache_lock); + /* Pending dentry */ if (autofs4_ispending(dentry)) { - /* The daemon never causes a mount to trigger */ - spin_unlock(&sbi->fs_lock); + int status; - if (oz_mode) - return 1; + /* + * We can only unhash and send this to ->lookup() if + * the directory mutex is held over d_revalidate() and + * ->lookup(). This prevents the VFS from incorrectly + * seeing the dentry as non-existent. + */ + ino->flags |= AUTOFS_INF_PENDING; + if (!mutex_aquired) { + autofs4_revalidate_drop(dentry, entry); + spin_unlock(&dcache_lock); + spin_unlock(&sbi->fs_lock); + return 0; + } + spin_unlock(&dcache_lock); + spin_unlock(&sbi->fs_lock); + mutex_unlock(&dir->i_mutex); + kfree(entry); /* * If the directory has gone away due to an expire @@ -325,45 +415,82 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) * A zero status is success otherwise we have a * negative error code. */ - status = try_to_fill_dentry(dentry, flags); + status = try_to_fill_dentry(dentry); + + spin_lock(&sbi->fs_lock); + ino->flags &= ~AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); + if (status == 0) return 1; return status; } - spin_unlock(&sbi->fs_lock); - - /* Negative dentry.. invalidate if "old" */ - if (dentry->d_inode == NULL) - return 0; /* Check for a non-mountpoint directory with no contents */ - spin_lock(&dcache_lock); if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK("dentry=%p %.*s, emptydir", dentry, dentry->d_name.len, dentry->d_name.name); - spin_unlock(&dcache_lock); - /* The daemon never causes a mount to trigger */ - if (oz_mode) - return 1; + if (autofs4_need_mount(flags) || current->link_count) { + int status; - /* - * A zero status is success otherwise we have a - * negative error code. - */ - status = try_to_fill_dentry(dentry, flags); - if (status == 0) - return 1; + /* + * We can only unhash and send this to ->lookup() if + * the directory mutex is held over d_revalidate() and + * ->lookup(). This prevents the VFS from incorrectly + * seeing the dentry as non-existent. + */ + ino->flags |= AUTOFS_INF_PENDING; + if (!mutex_aquired) { + autofs4_revalidate_drop(dentry, entry); + spin_unlock(&dcache_lock); + spin_unlock(&sbi->fs_lock); + return 0; + } + spin_unlock(&dcache_lock); + spin_unlock(&sbi->fs_lock); + mutex_unlock(&dir->i_mutex); + kfree(entry); - return status; + /* + * A zero status is success otherwise we have a + * negative error code. + */ + status = try_to_fill_dentry(dentry); + + spin_lock(&sbi->fs_lock); + ino->flags &= ~AUTOFS_INF_PENDING; + spin_unlock(&sbi->fs_lock); + + if (status == 0) + return 1; + + return status; + } } spin_unlock(&dcache_lock); + spin_unlock(&sbi->fs_lock); + + if (mutex_aquired) + mutex_unlock(&dir->i_mutex); + + kfree(entry); return 1; } +static void autofs4_free_rehash_entrys(struct autofs_info *inf) +{ + struct list_head *head = &inf->rehash_list; + struct rehash_entry *entry, *next; + list_for_each_entry_safe(entry, next, head, list) { + list_del(&entry->list); + kfree(entry); + } +} + void autofs4_dentry_release(struct dentry *de) { struct autofs_info *inf; @@ -382,6 +509,8 @@ void autofs4_dentry_release(struct dentry *de) list_del(&inf->active); if (!list_empty(&inf->expiring)) list_del(&inf->expiring); + if (!list_empty(&inf->rehash_list)) + autofs4_free_rehash_entrys(inf); spin_unlock(&sbi->lookup_lock); } @@ -414,6 +543,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; +restart: spin_lock(&dcache_lock); spin_lock(&sbi->lookup_lock); head = &sbi->active_list; @@ -431,6 +561,19 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) if (atomic_read(&active->d_count) == 0) goto next; + if (active->d_inode && IS_DEADDIR(active->d_inode)) { + if (!list_empty(&ino->rehash_list)) { + dget(active); + spin_unlock(&active->d_lock); + spin_unlock(&sbi->lookup_lock); + spin_unlock(&dcache_lock); + autofs4_remove_rehash_entrys(ino); + dput(active); + goto restart; + } + goto next; + } + qstr = &active->d_name; if (active->d_name.hash != hash) @@ -443,13 +586,11 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) if (memcmp(qstr->name, str, len)) goto next; - if (d_unhashed(active)) { - dget(active); - spin_unlock(&active->d_lock); - spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); - return active; - } + dget(active); + spin_unlock(&active->d_lock); + spin_unlock(&sbi->lookup_lock); + spin_unlock(&dcache_lock); + return active; next: spin_unlock(&active->d_lock); } @@ -498,13 +639,11 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) if (memcmp(qstr->name, str, len)) goto next; - if (d_unhashed(expiring)) { - dget(expiring); - spin_unlock(&expiring->d_lock); - spin_unlock(&sbi->lookup_lock); - spin_unlock(&dcache_lock); - return expiring; - } + dget(expiring); + spin_unlock(&expiring->d_lock); + spin_unlock(&sbi->lookup_lock); + spin_unlock(&dcache_lock); + return expiring; next: spin_unlock(&expiring->d_lock); } @@ -514,6 +653,48 @@ next: return NULL; } +static struct autofs_info *init_new_dentry(struct autofs_sb_info *sbi, + struct dentry *dentry, int oz_mode) +{ + struct autofs_info *ino; + + /* + * Mark the dentry incomplete but don't hash it. We do this + * to serialize our inode creation operations (symlink and + * mkdir) which prevents deadlock during the callback to + * the daemon. Subsequent user space lookups for the same + * dentry are placed on the wait queue while the daemon + * itself is allowed passage unresticted so the create + * operation itself can then hash the dentry. Finally, + * we check for the hashed dentry and return the newly + * hashed dentry. + */ + dentry->d_op = &autofs4_root_dentry_operations; + + /* +