diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-04 12:05:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-04 12:05:25 -0700 |
commit | 2e08edc5c50a01dc52c005fd939c24476eaf55ef (patch) | |
tree | e5d91ecfc9d966dd4cc006cf005732bd569be5c5 | |
parent | 17dec0a949153d9ac00760ba2f5b78cb583e995f (diff) | |
parent | 04bbc9795d2e89c79edf48fb1303ace2e8c90a60 (diff) |
Merge branch 'work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs dcache updates from Al Viro:
"Part of this is what the trylock loop elimination series has turned
into, part making d_move() preserve the parent (and thus the path) of
victim, plus some general cleanups"
* 'work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (22 commits)
d_genocide: move export to definition
fold dentry_lock_for_move() into its sole caller and clean it up
make non-exchanging __d_move() copy ->d_parent rather than swap them
oprofilefs: don't oops on allocation failure
lustre: get rid of pointless casts to struct dentry *
debugfs_lookup(): switch to lookup_one_len_unlocked()
fold lookup_real() into __lookup_hash()
take out orphan externs (empty_string/slash_string)
split d_path() and friends into a separate file
dcache.c: trim includes
fs/dcache: Avoid a try_lock loop in shrink_dentry_list()
get rid of trylock loop around dentry_kill()
handle move to LRU in retain_dentry()
dput(): consolidate the "do we need to retain it?" into an inlined helper
split the slow part of lock_parent() off
now lock_parent() can't run into killed dentry
get rid of trylock loop in locking dentries on shrink list
d_delete(): get rid of trylock loop
fs/dcache: Move dentry_kill() below lock_parent()
fs/dcache: Remove stale comment from dentry_kill()
...
-rw-r--r-- | drivers/oprofile/oprofilefs.c | 3 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/dcache.c | 6 | ||||
-rw-r--r-- | fs/Makefile | 2 | ||||
-rw-r--r-- | fs/d_path.c | 470 | ||||
-rw-r--r-- | fs/dcache.c | 966 | ||||
-rw-r--r-- | fs/debugfs/inode.c | 5 | ||||
-rw-r--r-- | fs/namei.c | 41 | ||||
-rw-r--r-- | include/linux/dcache.h | 6 |
8 files changed, 735 insertions, 764 deletions
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index d77ebbfc67c9..4ea08979312c 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -138,6 +138,9 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name, struct dentry *dentry; struct inode *inode; + if (!root) + return -ENOMEM; + inode_lock(d_inode(root)); dentry = d_alloc_name(root, name); if (!dentry) { diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index 549369739d80..6cd0318062e8 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -90,7 +90,7 @@ static int ll_dcompare(const struct dentry *dentry, d_count(dentry)); /* mountpoint is always valid */ - if (d_mountpoint((struct dentry *)dentry)) + if (d_mountpoint(dentry)) return 0; if (d_lustre_invalid(dentry)) @@ -111,7 +111,7 @@ static int ll_ddelete(const struct dentry *de) LASSERT(de); CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n", - d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping", + d_lustre_invalid(de) ? "deleting" : "keeping", de, de, de->d_parent, d_inode(de), d_unhashed(de) ? "" : "hashed,", list_empty(&de->d_subdirs) ? "" : "subdirs"); @@ -119,7 +119,7 @@ static int ll_ddelete(const struct dentry *de) /* kernel >= 2.6.38 last refcount is decreased after this function. */ LASSERT(d_count(de) == 1); - if (d_lustre_invalid((struct dentry *)de)) + if (d_lustre_invalid(de)) return 1; return 0; } diff --git a/fs/Makefile b/fs/Makefile index add789ea270a..c9375fd2c8c4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ ioctl.o readdir.o select.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ - pnode.o splice.o sync.o utimes.o \ + pnode.o splice.o sync.o utimes.o d_path.o \ stack.o fs_struct.o statfs.o fs_pin.o nsfs.o ifeq ($(CONFIG_BLOCK),y) diff --git a/fs/d_path.c b/fs/d_path.c new file mode 100644 index 000000000000..e8fce6b1174f --- /dev/null +++ b/fs/d_path.c @@ -0,0 +1,470 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/syscalls.h> +#include <linux/export.h> +#include <linux/uaccess.h> +#include <linux/fs_struct.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/prefetch.h> +#include "mount.h" + +static int prepend(char **buffer, int *buflen, const char *str, int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) + return -ENAMETOOLONG; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + +/** + * prepend_name - prepend a pathname in front of current buffer pointer + * @buffer: buffer pointer + * @buflen: allocated length of the buffer + * @name: name string and length qstr structure + * + * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to + * make sure that either the old or the new name pointer and length are + * fetched. However, there may be mismatch between length and pointer. + * The length cannot be trusted, we need to copy it byte-by-byte until + * the length is reached or a null byte is found. It also prepends "/" at + * the beginning of the name. The sequence number check at the caller will + * retry it again when a d_move() does happen. So any garbage in the buffer + * due to mismatched pointer and length will be discarded. + * + * Load acquire is needed to make sure that we see that terminating NUL. + */ +static int prepend_name(char **buffer, int *buflen, const struct qstr *name) +{ + const char *dname = smp_load_acquire(&name->name); /* ^^^ */ + u32 dlen = READ_ONCE(name->len); + char *p; + + *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; + p = *buffer -= dlen + 1; + *p++ = '/'; + while (dlen--) { + char c = *dname++; + if (!c) + break; + *p++ = c; + } + return 0; +} + +/** + * prepend_path - Prepend path string to a buffer + * @path: the dentry/vfsmount to report + * @root: root vfsmnt/dentry + * @buffer: pointer to the end of the buffer + * @buflen: pointer to buffer length + * + * The function will first try to write out the pathname without taking any + * lock other than the RCU read lock to make sure that dentries won't go away. + * It only checks the sequence number of the global rename_lock as any change + * in the dentry's d_seq will be preceded by changes in the rename_lock + * sequence number. If the sequence number had been changed, it will restart + * the whole pathname back-tracing sequence again by taking the rename_lock. + * In this case, there is no need to take the RCU read lock as the recursive + * parent pointer references will keep the dentry chain alive as long as no + * rename operation is performed. + */ +static int prepend_path(const struct path *path, + const struct path *root, + char **buffer, int *buflen) +{ + struct dentry *dentry; + struct vfsmount *vfsmnt; + struct mount *mnt; + int error = 0; + unsigned seq, m_seq = 0; + char *bptr; + int blen; + + rcu_read_lock(); +restart_mnt: + read_seqbegin_or_lock(&mount_lock, &m_seq); + seq = 0; + rcu_read_lock(); +restart: + bptr = *buffer; + blen = *buflen; + error = 0; + dentry = path->dentry; + vfsmnt = path->mnt; + mnt = real_mount(vfsmnt); + read_seqbegin_or_lock(&rename_lock, &seq); + while (dentry != root->dentry || vfsmnt != root->mnt) { + struct dentry * parent; + + if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + struct mount *parent = READ_ONCE(mnt->mnt_parent); + /* Escaped? */ + if (dentry != vfsmnt->mnt_root) { + bptr = *buffer; + blen = *buflen; + error = 3; + break; + } + /* Global root? */ + if (mnt != parent) { + dentry = READ_ONCE(mnt->mnt_mountpoint); + mnt = parent; + vfsmnt = &mnt->mnt; + continue; + } + if (!error) + error = is_mounted(vfsmnt) ? 1 : 2; + break; + } + parent = dentry->d_parent; + prefetch(parent); + error = prepend_name(&bptr, &blen, &dentry->d_name); + if (error) + break; + + dentry = parent; + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + + if (!(m_seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&mount_lock, m_seq)) { + m_seq = 1; + goto restart_mnt; + } + done_seqretry(&mount_lock, m_seq); + + if (error >= 0 && bptr == *buffer) { + if (--blen < 0) + error = -ENAMETOOLONG; + else + *--bptr = '/'; + } + *buffer = bptr; + *buflen = blen; + return error; +} + +/** + * __d_path - return the path of a dentry + * @path: the dentry/vfsmount to report + * @root: root vfsmnt/dentry + * @buf: buffer to return value in + * @buflen: buffer length + * + * Convert a dentry into an ASCII path name. + * + * Returns a pointer into the buffer or an error code if the + * path was too long. + * + * "buflen" should be positive. + * + * If the path is not reachable from the supplied root, return %NULL. + */ +char *__d_path(const struct path *path, + const struct path *root, + char *buf, int buflen) +{ + char *res = buf + buflen; + int error; + + prepend(&res, &buflen, "\0", 1); + error = prepend_path(path, root, &res, &buflen); + + if (error < 0) + return ERR_PTR(error); + if (error > 0) + return NULL; + return res; +} + +char *d_absolute_path(const struct path *path, + char *buf, int buflen) +{ + struct path root = {}; + char *res = buf + buflen; + int error; + + prepend(&res, &buflen, "\0", 1); + error = prepend_path(path, &root, &res, &buflen); + + if (error > 1) + error = -EINVAL; + if (error < 0) + return ERR_PTR(error); + return res; +} + +/* + * same as __d_path but appends "(deleted)" for unlinked files. + */ +static int path_with_deleted(const struct path *path, + const struct path *root, + char **buf, int *buflen) +{ + prepend(buf, buflen, "\0", 1); + if (d_unlinked(path->dentry)) { + int error = prepend(buf, buflen, " (deleted)", 10); + if (error) + return error; + } + + return prepend_path(path, root, buf, buflen); +} + +static int prepend_unreachable(char **buffer, int *buflen) +{ + return prepend(buffer, buflen, "(unreachable)", 13); +} + +static void get_fs_root_rcu(struct fs_struct *fs, struct path *root) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + } while (read_seqcount_retry(&fs->seq, seq)); +} + +/** + * d_path - return the path of a dentry + * @path: path to report + * @buf: buffer to return value in + * @buflen: buffer length + * + * Convert a dentry into an ASCII path name. If the entry has been deleted + * the string " (deleted)" is appended. Note that this is ambiguous. + * + * Returns a pointer into the buffer or an error code if the path was + * too long. Note: Callers should use the returned pointer, not the passed + * in buffer, to use the name! The implementation often starts at an offset + * into the buffer, and may leave 0 bytes at the start. + * + * "buflen" should be positive. + */ +char *d_path(const struct path *path, char *buf, int buflen) +{ + char *res = buf + buflen; + struct path root; + int error; + + /* + * We have various synthetic filesystems that never get mounted. On + * these filesystems dentries are never used for lookup purposes, and + * thus don't need to be hashed. They also don't need a name until a + * user wants to identify the object in /proc/pid/fd/. The little hack + * below allows us to generate a name for these objects on demand: + * + * Some pseudo inodes are mountable. When they are mounted + * path->dentry == path->mnt->mnt_root. In that case don't call d_dname + * and instead have d_path return the mounted path. + */ + if (path->dentry->d_op && path->dentry->d_op->d_dname && + (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) + return path->dentry->d_op->d_dname(path->dentry, buf, buflen); + + rcu_read_lock(); + get_fs_root_rcu(current->fs, &root); + error = path_with_deleted(path, &root, &res, &buflen); + rcu_read_unlock(); + + if (error < 0) + res = ERR_PTR(error); + return res; +} +EXPORT_SYMBOL(d_path); + +/* + * Helper function for dentry_operations.d_dname() members + */ +char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, + const char *fmt, ...) +{ + va_list args; + char temp[64]; + int sz; + + va_start(args, fmt); + sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; + va_end(args); + + if (sz > sizeof(temp) || sz > buflen) + return ERR_PTR(-ENAMETOOLONG); + + buffer += buflen - sz; + return memcpy(buffer, temp, sz); +} + +char *simple_dname(struct dentry *dentry, char *buffer, int buflen) +{ + char *end = buffer + buflen; + /* these dentries are never renamed, so d_lock is not needed */ + if (prepend(&end, &buflen, " (deleted)", 11) || + prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || + prepend(&end, &buflen, "/", 1)) + end = ERR_PTR(-ENAMETOOLONG); + return end; +} +EXPORT_SYMBOL(simple_dname); + +/* + * Write full pathname from the root of the filesystem into the buffer. + */ +static char *__dentry_path(struct dentry *d, char *buf, int buflen) +{ + struct dentry *dentry; + char *end, *retval; + int len, seq = 0; + int error = 0; + + if (buflen < 2) + goto Elong; + + rcu_read_lock(); +restart: + dentry = d; + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); + /* Get '/' right */ + retval = end-1; + *retval = '/'; + read_seqbegin_or_lock(&rename_lock, &seq); + while (!IS_ROOT(dentry)) { + struct dentry *parent = dentry->d_parent; + + prefetch(parent); + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; + + retval = end; + dentry = parent; + } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) +{ + return __dentry_path(dentry, buf, buflen); +} +EXPORT_SYMBOL(dentry_path_raw); + +char *dentry_path(struct dentry *dentry, char *buf, int buflen) +{ + char *p = NULL; + char *retval; + + if (d_unlinked(dentry)) { + p = buf + buflen; + if (prepend(&p, &buflen, "//deleted", 10) != 0) + goto Elong; + buflen++; + } + retval = __dentry_path(dentry, buf, buflen); + if (!IS_ERR(retval) && p) + *p = '/'; /* restore '/' overriden with '\0' */ + return retval; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, + struct path *pwd) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + *pwd = fs->pwd; + } while (read_seqcount_retry(&fs->seq, seq)); +} + +/* + * NOTE! The user-level library version returns a + * character pointer. The kernel system call just + * returns the length of the buffer filled (which + * includes the ending '\0' character), or a negative + * error value. So libc would do something like + * + * char *getcwd(char * buf, size_t size) + * { + * int retval; + * + * retval = sys_getcwd(buf, size); + * if (retval >= 0) + * return buf; + * errno = -retval; + * return NULL; + * } + */ +SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) +{ + int error; + struct path pwd, root; + char *page = __getname(); + + if (!page) + return -ENOMEM; + + rcu_read_lock(); + get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); + + error = -ENOENT; + if (!d_unlinked(pwd.dentry)) { + unsigned long len; + char *cwd = page + PATH_MAX; + int buflen = PATH_MAX; + + prepend(&cwd, &buflen, "\0", 1); + error = prepend_path(&pwd, &root, &cwd, &buflen); + rcu_read_unlock(); + + if (error < 0) + goto out; + + /* Unreachable from current root */ + if (error > 0) { + error = prepend_unreachable(&cwd, &buflen); + if (error) + goto out; + } + + error = -ERANGE; + len = PATH_MAX + page - cwd; + if (len <= size) { + error = len; + if (copy_to_user(buf, cwd, len)) + error = -EFAULT; + } + } else { + rcu_read_unlock(); + } + +out: + __putname(page); + return error; +} diff --git a/fs/dcache.c b/fs/dcache.c index 8945e6cabd93..593079176123 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -14,7 +14,7 @@ * the dcache entry is deleted or garbage collected. */ -#include <linux/syscalls.h> +#include <linux/ratelimit.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/fs.h> @@ -24,18 +24,11 @@ #include <linux/hash.h> #include <linux/cache.h> #include <linux/export.h> -#include <linux/mount.h> -#include <linux/file.h> -#include <linux/uaccess.h> #include <linux/security.h> #include <linux/seqlock.h> -#include <linux/swap.h> #include <linux/bootmem.h> -#include <linux/fs_struct.h> #include <linux/bit_spinlock.h> #include <linux/rculist_bl.h> -#include <linux/prefetch.h> -#include <linux/ratelimit.h> #include <linux/list_lru.h> #include "internal.h" #include "mount.h" @@ -74,9 +67,7 @@ * dentry->d_lock * * If no ancestor relationship: - * if (dentry1 < dentry2) - * dentry1->d_lock - * dentry2->d_lock + * arbitrary, since it's serialized on rename_lock */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); @@ -440,17 +431,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, list_lru_isolate_move(lru, &dentry->d_lru, list); } -/* - * dentry_lru_(add|del)_list) must be called with d_lock held. - */ -static void dentry_lru_add(struct dentry *dentry) -{ - if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) - d_lru_add(dentry); - else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) - dentry->d_flags |= DCACHE_REFERENCED; -} - /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -470,30 +450,29 @@ static void dentry_lru_add(struct dentry *dentry) */ static void ___d_drop(struct dentry *dentry) { - if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - /* - * Hashed dentries are normally on the dentry hashtable, - * with the exception of those newly allocated by - * d_obtain_root, which are always IS_ROOT: - */ - if (unlikely(IS_ROOT(dentry))) - b = &dentry->d_sb->s_roots; - else - b = d_hash(dentry->d_name.hash); + struct hlist_bl_head *b; + /* + * Hashed dentries are normally on the dentry hashtable, + * with the exception of those newly allocated by + * d_obtain_root, which are always IS_ROOT: + */ + if (unlikely(IS_ROOT(dentry))) + b = &dentry->d_sb->s_roots; + else + b = d_hash(dentry->d_name.hash); - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - hlist_bl_unlock(b); - /* After this call, in-progress rcu-walk path lookup will fail. */ - write_seqcount_invalidate(&dentry->d_seq); - } + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + hlist_bl_unlock(b); } void __d_drop(struct dentry *dentry) { - ___d_drop(dentry); - dentry->d_hash.pprev = NULL; + if (!d_unhashed(dentry)) { + ___d_drop(dentry); + dentry->d_hash.pprev = NULL; + write_seqcount_invalidate(&dentry->d_seq); + } } EXPORT_SYMBOL(__d_drop); @@ -589,10 +568,71 @@ static void __dentry_kill(struct dentry *dentry) dentry_free(dentry); } +static struct dentry *__lock_parent(struct dentry *dentry) +{ + struct dentry *parent; + rcu_read_lock(); + spin_unlock(&dentry->d_lock); +again: + parent = READ_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + /* + * We can't blindly lock dentry until we are sure + * that we won't violate the locking order. + * Any changes of dentry->d_parent must have + * been done with parent->d_lock held, so + * spin_lock() above is enough of a barrier + * for checking if it's still our child. + */ + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + goto again; + } + rcu_read_unlock(); + if (parent != dentry) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + else + parent = NULL; + return parent; +} + +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + return __lock_parent(dentry); +} + +static inline bool retain_dentry(struct dentry *dentry) +{ + WARN_ON(d_in_lookup(dentry)); + + /* Unreachable? Get rid of it */ + if (unlikely(d_unhashed(dentry))) + return false; + + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + return false; + + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { + if (dentry->d_op->d_delete(dentry)) + return false; + } + /* retain; LRU fodder */ + dentry->d_lockref.count--; + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) + d_lru_add(dentry); + else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) + dentry->d_flags |= DCACHE_REFERENCED; + return true; +} + /* * Finish off a dentry we've decided to kill. * dentry->d_lock must be held, returns with it unlocked. - * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ static struct dentry *dentry_kill(struct dentry *dentry) @@ -602,62 +642,43 @@ static struct dentry *dentry_kill(struct dentry *dentry) struct dentry *parent = NULL; if (inode && unlikely(!spin_trylock(&inode->i_lock))) - goto failed; + goto slow_positive; if (!IS_ROOT(dentry)) { parent = dentry->d_parent; if (unlikely(!spin_trylock(&parent->d_lock))) { - if (inode) - spin_unlock(&inode->i_lock); - goto failed; + parent = __lock_parent(dentry); + if (likely(inode || !dentry->d_inode)) + goto got_locks; + /* negative that became positive */ + if (parent) + spin_unlock(&parent->d_lock); + inode = dentry->d_inode; + goto slow_positive; } } - __dentry_kill(dentry); return parent; -failed: +slow_positive: spin_unlock(&dentry->d_lock); - return dentry; /* try again with same dentry */ -} - -static inline struct dentry *lock_parent(struct dentry *dentry) -{ - struct dentry *parent = dentry->d_parent; - if (IS_ROOT(dentry)) - return NULL; - if (unlikely(dentry->d_lockref.count < 0)) - return NULL; - if (likely(spin_trylock(&parent->d_lock))) + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); + parent = lock_parent(dentry); +got_locks: + if (unlikely(dentry->d_lockref.count != 1)) { + dentry->d_lockref.count--; + } else if (likely(!retain_dentry(dentry))) { + __dentry_kill(dentry); return parent; - rcu_read_lock(); - spin_unlock(&dentry->d_lock); -again: - parent = READ_ONCE(dentry->d_parent); - spin_lock(&parent->d_lock); - /* - * We can't blindly lock dentry until we are sure - * that we won't violate the locking order. - * Any changes of dentry->d_parent must have - * been done with parent->d_lock held, so - * spin_lock() above is enough of a barrier - * for checking if it's still our child. - */ - if (unlikely(parent != dentry->d_parent)) { - spin_unlock(&parent->d_lock); - goto again; } - if (parent != dentry) { - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (unlikely(dentry->d_lockref.count < 0)) { - spin_unlock(&parent->d_lock); - parent = NULL; - } - } else { - parent = NULL; - } - rcu_read_unlock(); - return parent; + /* we are keeping it, after all */ + if (inode) + spin_unlock(&inode->i_lock); + if (parent) + spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); + return NULL; } /* @@ -807,27 +828,11 @@ repeat: /* Slow case: now with the dentry lock held */ rcu_read_unlock(); - WARN_ON(d_in_lookup(dentry)); - - /* Unreachable? Get rid of it */ - if (unlikely(d_unhashed(dentry))) - goto kill_it; - - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - goto kill_it; - - if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { - if (dentry->d_op->d_delete(dentry)) - goto kill_it; + if (likely(retain_dentry(dentry))) { + spin_unlock(&dentry->d_lock); + return; } - dentry_lru_add(dentry); - - dentry->d_lockref.count--; - spin_unlock(&dentry->d_lock); - return; - -kill_it: dentry = dentry_kill(dentry); if (dentry) { cond_resched(); @@ -976,56 +981,83 @@ restart: } EXPORT_SYMBOL(d_prune_aliases); -static void shrink_dentry_list(struct list_head *list) +/* + * Lock a dentry from shrink list. + * Called under rcu_read_lock() and dentry->d_lock; the former + * guarantees that nothing we access will be freed under us. + * Note that dentry is *not* protected from concurrent dentry_kill(), + * d_delete(), etc. + * + * Return false if dentry has been disrupted or grabbed, leaving + * the caller to kick it off-list. Otherwise, return true and have + * that dentry's inode and parent both locked. + */ +static bool shrink_lock_dentry(struct dentry *dentry) { - struct dentry *dentry, *parent; + struct inode *inode; + struct dentry *parent; - while (!list_empty(list)) { - struct inode *inode; - dentry = list_entry(list->prev, struct dentry, d_lru); + if (dentry->d_lockref.count) + return false; + + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { + spin_unlock(&dentry->d_lock); + spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); - parent = lock_parent(dentry); + if (unlikely(dentry->d_lockref.count)) + goto out; + /* changed inode means that somebody had grabbed it */ + if (unlikely(inode != dentry->d_inode)) + goto out; + } - /* - * The dispose list is isolated and dentries are not accounted - * to the LRU here, so we can simply remove it from the list - * here regardless of whether it is referenced or not. - */ - d_shrink_del(dentry); + parent = dentry->d_parent; + if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock))) + return true; - /* - * We found an inuse dentry which was not removed from - * the LRU because of laziness during lookup. Do not free it. - */ - if (dentry->d_lockref.count > 0) { - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - continue; - } + spin_unlock(&dentry->d_lock); + spin_lock(&parent->d_lock); + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + spin_lock(&dentry->d_lock); + goto out; + } + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (likely(!dentry->d_lockref.count)) + return true; + spin_unlock(&parent->d_lock); +out: + if (inode) + spin_unlock(&inode->i_lock); + return false; +} +static void shrink_dentry_list(struct list_head *list) +{ + while (!list_empty(list)) { + struct dentry *dentry, *parent; - if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { - bool can_free = dentry->d_flags & DCACHE_MAY_FREE; + dentry = list_entry(list->prev, struct dentry, d_lru); + spin_lock(&dentry->d_lock); + rcu_read_lock(); + if (!shrink_lock_dentry(dentry)) { + bool can_free = false; + rcu_read_unlock(); + d_shrink_del(dentry); + if (dentry->d_lockref.count < 0) + can_free = dentry->d_flags & DCACHE_MAY_FREE; spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); if (can_free) dentry_free(dentry); continue; } - - inode = dentry->d_inode; - if (inode && unlikely(!spin_trylock(&inode->i_lock))) { - d_shrink_add(dentry, list); - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - continue; - } - + rcu_read_unlock(); + d_shrink_del(dentry); + parent = dentry->d_parent; __dentry_kill(dentry); - + if (parent == dentry) + continue; /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also @@ -1033,26 +1065,8 @@ static void shrink_dentry_list(struct list_head *list) * fragmentation. */ dentry = parent; - while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { - parent = lock_parent(dentry); - if (dentry->d_lockref.count != 1) { - dentry->d_lockref.count--; - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - break; - } - inode = dentry->d_inode; /* can't be NULL */ - if (unlikely(!spin_trylock(&inode->i_lock))) { - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - cpu_relax(); - continue; - } - __dentry_kill(dentry); - dentry = parent; - } + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) + dentry = dentry_kill(dentry); } } @@ -2379,32 +2393,22 @@ EXPORT_SYMBOL(d_hash_and_lookup); void d_delete(struct dentry * dentry) { - struct inode *inode; - int isdir = 0; + struct inode *inode = dentry->d_inode; + int isdir = d_is_dir(dentry); + + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); /* * Are we the only user? */ -again: - spin_lock(&dentry->d_lock); - inode = dentry->d_inode; - isdir = S_ISDIR(inode->i_mode); if (dentry->d_lockref.count == 1) { - if (!spin_trylock(&inode->i_lock)) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - goto again; - } dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_unlink_inode(dentry); - fsnotify_nameremove(dentry, isdir); - return; - } - - if (!d_unhashed(dentry)) + } else { __d_drop(dentry); - - spin_unlock(&dentry->d_lock); - + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + } fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete); @@ -2769,57 +2773,6 @@ static void copy_name(struct dentry *dentry, struct dentry *target) kfree_rcu(old_name, u.head); } -static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) -{ - /* - * XXXX: do we really need to take target->d_lock? - */ - if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) - spin_lock(&target->d_parent->d_lock); - else { - if (d_ancestor(dentry->d_parent, target->d_parent)) { - spin_lock(&dentry->d_parent->d_lock); - spin_lock_nested(&target->d_parent->d_lock, - DENTRY_D_LOCK_NESTED); - } else { - spin_lock(&target->d_parent->d_lock); - spin_lock_nested(&dentry->d_parent->d_lock, - DENTRY_D_LOCK_NESTED); - } - } - if (target < dentry) { - spin_lock_nested(&target->d_lock, 2); - spin_lock_nested(&dentry->d_lock, 3); - } else { - spin_lock_nested(&dentry->d_lock, 2); - spin_lock_nested(&target->d_lock, 3); - } -} - -static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target) -{ - if (target->d_parent != dentry->d_parent) - spin_unlock(&dentry->d_parent->d_lock); - if (target->d_parent != target) - spin_unlock(&target->d_parent->d_lock); - spin_unlock(&target->d_lock); - spin_unlock(&dentry->d_lock); -} - -/* - * When switching names, the actual string doesn't strictly have to - * be preserved in the target - because we're dropping the target - * anyway. As such, we can just do a simple memcpy() to copy over - * the new name before we switch, unless we are going to rehash - * it. Note that if we *do* unhash the target, we are not allowed - * to rehash it without giving it a new name/hash key - whether - * we swap or overwrite the names here, resulting name won't match - * the reality in filesystem; it's only there for d_path() purposes. - * Note that all of this is happening under rename_lock, so the - * any hash lookup seeing it in the middle of manipulations will - * be discarded anyway. So we do not care what happens to the hash< |