summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-02 12:30:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-02 12:30:08 -0700
commit9c577491b985e1b27995abe69b32b041893798cf (patch)
treea7c9c8f29310b76368c94b4cd05c762464b1658a /fs
parentd987ca1c6b7e22fbd30664111e85cec7aa66000d (diff)
parent99a4a90c8e9337e364136393286544e3753673c3 (diff)
Merge branch 'work.dotdot1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs pathwalk sanitizing from Al Viro: "Massive pathwalk rewrite and cleanups. Several iterations have been posted; hopefully this thing is getting readable and understandable now. Pretty much all parts of pathname resolutions are affected... The branch is identical to what has sat in -next, except for commit message in "lift all calls of step_into() out of follow_dotdot/ follow_dotdot_rcu", crediting Qian Cai for reporting the bug; only commit message changed there." * 'work.dotdot1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (69 commits) lookup_open(): don't bother with fallbacks to lookup+create atomic_open(): no need to pass struct open_flags anymore open_last_lookups(): move complete_walk() into do_open() open_last_lookups(): lift O_EXCL|O_CREAT handling into do_open() open_last_lookups(): don't abuse complete_walk() when all we want is unlazy open_last_lookups(): consolidate fsnotify_create() calls take post-lookup part of do_last() out of loop link_path_walk(): sample parent's i_uid and i_mode for the last component __nd_alloc_stack(): make it return bool reserve_stack(): switch to __nd_alloc_stack() pick_link(): take reserving space on stack into a new helper pick_link(): more straightforward handling of allocation failures fold path_to_nameidata() into its only remaining caller pick_link(): pass it struct path already with normal refcounting rules fs/namei.c: kill follow_mount() non-RCU analogue of the previous commit helper for mount rootwards traversal follow_dotdot(): be lazy about changing nd->path follow_dotdot_rcu(): be lazy about changing nd->path follow_dotdot{,_rcu}(): massage loops ...
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs/dev-ioctl.c6
-rw-r--r--fs/internal.h1
-rw-r--r--fs/namei.c1474
-rw-r--r--fs/namespace.c96
-rw-r--r--fs/open.c4
5 files changed, 676 insertions, 905 deletions
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index a3cdb0036c5d..f3a0f412b43b 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -186,7 +186,7 @@ static int find_autofs_mount(const char *pathname,
struct path path;
int err;
- err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0);
+ err = kern_path(pathname, LOOKUP_MOUNTPOINT, &path);
if (err)
return err;
err = -ENOENT;
@@ -519,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
if (!fp || param->ioctlfd == -1) {
if (autofs_type_any(type))
- err = kern_path_mountpoint(AT_FDCWD,
- name, &path, LOOKUP_FOLLOW);
+ err = kern_path(name, LOOKUP_FOLLOW | LOOKUP_MOUNTPOINT,
+ &path);
else
err = find_autofs_mount(name, &path,
test_by_type, &type);
diff --git a/fs/internal.h b/fs/internal.h
index 4d37912a5587..aa5d45524e87 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -60,7 +60,6 @@ extern int finish_clean_context(struct fs_context *fc);
*/
extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root);
-extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
long do_mknodat(int dfd, const char __user *filename, umode_t mode,
diff --git a/fs/namei.c b/fs/namei.c
index db6565c99825..61fdb77a7d58 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -503,9 +503,10 @@ struct nameidata {
} *stack, internal[EMBEDDED_LEVELS];
struct filename *name;
struct nameidata *saved;
- struct inode *link_inode;
unsigned root_seq;
int dfd;
+ kuid_t dir_uid;
+ umode_t dir_mode;
} __randomize_layout;
static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
@@ -530,52 +531,34 @@ static void restore_nameidata(void)
kfree(now->stack);
}
-static int __nd_alloc_stack(struct nameidata *nd)
+static bool nd_alloc_stack(struct nameidata *nd)
{
struct saved *p;
- if (nd->flags & LOOKUP_RCU) {
- p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
- GFP_ATOMIC);
- if (unlikely(!p))
- return -ECHILD;
- } else {
- p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
- GFP_KERNEL);
- if (unlikely(!p))
- return -ENOMEM;
- }
+ p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
+ nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
+ if (unlikely(!p))
+ return false;
memcpy(p, nd->internal, sizeof(nd->internal));
nd->stack = p;
- return 0;
+ return true;
}
/**
- * path_connected - Verify that a path->dentry is below path->mnt.mnt_root
- * @path: nameidate to verify
+ * path_connected - Verify that a dentry is below mnt.mnt_root
*
* Rename can sometimes move a file or directory outside of a bind
* mount, path_connected allows those cases to be detected.
*/
-static bool path_connected(const struct path *path)
+static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
{
- struct vfsmount *mnt = path->mnt;
struct super_block *sb = mnt->mnt_sb;
/* Bind mounts and multi-root filesystems can have disconnected paths */
if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
return true;
- return is_subdir(path->dentry, mnt->mnt_root);
-}
-
-static inline int nd_alloc_stack(struct nameidata *nd)
-{
- if (likely(nd->depth != EMBEDDED_LEVELS))
- return 0;
- if (likely(nd->stack != nd->internal))
- return 0;
- return __nd_alloc_stack(nd);
+ return is_subdir(dentry, mnt->mnt_root);
}
static void drop_links(struct nameidata *nd)
@@ -608,10 +591,9 @@ static void terminate_walk(struct nameidata *nd)
}
/* path_put is needed afterwards regardless of success or failure */
-static bool legitimize_path(struct nameidata *nd,
- struct path *path, unsigned seq)
+static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
{
- int res = __legitimize_mnt(path->mnt, nd->m_seq);
+ int res = __legitimize_mnt(path->mnt, mseq);
if (unlikely(res)) {
if (res > 0)
path->mnt = NULL;
@@ -625,6 +607,12 @@ static bool legitimize_path(struct nameidata *nd,
return !read_seqcount_retry(&path->dentry->d_seq, seq);
}
+static inline bool legitimize_path(struct nameidata *nd,
+ struct path *path, unsigned seq)
+{
+ return __legitimize_path(path, nd->m_seq, seq);
+}
+
static bool legitimize_links(struct nameidata *nd)
{
int i;
@@ -858,25 +846,6 @@ static int set_root(struct nameidata *nd)
return 0;
}
-static void path_put_conditional(struct path *path, struct nameidata *nd)
-{
- dput(path->dentry);
- if (path->mnt != nd->path.mnt)
- mntput(path->mnt);
-}
-
-static inline void path_to_nameidata(const struct path *path,
- struct nameidata *nd)
-{
- if (!(nd->flags & LOOKUP_RCU)) {
- dput(nd->path.dentry);
- if (nd->path.mnt != path->mnt)
- mntput(nd->path.mnt);
- }
- nd->path.mnt = path->mnt;
- nd->path.dentry = path->dentry;
-}
-
static int nd_jump_root(struct nameidata *nd)
{
if (unlikely(nd->flags & LOOKUP_BENEATH))
@@ -969,28 +938,21 @@ int sysctl_protected_regular __read_mostly;
*
* Returns 0 if following the symlink is allowed, -ve on error.
*/
-static inline int may_follow_link(struct nameidata *nd)
+static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
{
- const struct inode *inode;
- const struct inode *parent;
- kuid_t puid;
-
if (!sysctl_protected_symlinks)
return 0;
/* Allowed if owner and follower match. */
- inode = nd->link_inode;
if (uid_eq(current_cred()->fsuid, inode->i_uid))
return 0;
/* Allowed if parent directory not sticky and world-writable. */
- parent = nd->inode;
- if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
+ if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
return 0;
/* Allowed if parent directory and link owner match. */
- puid = parent->i_uid;
- if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
+ if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
return 0;
if (nd->flags & LOOKUP_RCU)
@@ -1113,63 +1075,6 @@ static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
return 0;
}
-static __always_inline
-const char *get_link(struct nameidata *nd)
-{
- struct saved *last = nd->stack + nd->depth - 1;
- struct dentry *dentry = last->link.dentry;
- struct inode *inode = nd->link_inode;
- int error;
- const char *res;
-
- if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS))
- return ERR_PTR(-ELOOP);
-
- if (!(nd->flags & LOOKUP_RCU)) {
- touch_atime(&last->link);
- cond_resched();
- } else if (atime_needs_update(&last->link, inode)) {
- if (unlikely(unlazy_walk(nd)))
- return ERR_PTR(-ECHILD);
- touch_atime(&last->link);
- }
-
- error = security_inode_follow_link(dentry, inode,
- nd->flags & LOOKUP_RCU);
- if (unlikely(error))
- return ERR_PTR(error);
-
- nd->last_type = LAST_BIND;
- res = READ_ONCE(inode->i_link);
- if (!res) {
- const char * (*get)(struct dentry *, struct inode *,
- struct delayed_call *);
- get = inode->i_op->get_link;
- if (nd->flags & LOOKUP_RCU) {
- res = get(NULL, inode, &last->done);
- if (res == ERR_PTR(-ECHILD)) {
- if (unlikely(unlazy_walk(nd)))
- return ERR_PTR(-ECHILD);
- res = get(dentry, inode, &last->done);
- }
- } else {
- res = get(dentry, inode, &last->done);
- }
- if (IS_ERR_OR_NULL(res))
- return res;
- }
- if (*res == '/') {
- error = nd_jump_root(nd);
- if (unlikely(error))
- return ERR_PTR(error);
- while (unlikely(*++res == '/'))
- ;
- }
- if (!*res)
- res = NULL;
- return res;
-}
-
/*
* follow_up - Find the mountpoint of path's vfsmount
*
@@ -1203,19 +1108,59 @@ int follow_up(struct path *path)
}
EXPORT_SYMBOL(follow_up);
+static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
+ struct path *path, unsigned *seqp)
+{
+ while (mnt_has_parent(m)) {
+ struct dentry *mountpoint = m->mnt_mountpoint;
+
+ m = m->mnt_parent;
+ if (unlikely(root->dentry == mountpoint &&
+ root->mnt == &m->mnt))
+ break;
+ if (mountpoint != m->mnt.mnt_root) {
+ path->mnt = &m->mnt;
+ path->dentry = mountpoint;
+ *seqp = read_seqcount_begin(&mountpoint->d_seq);
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool choose_mountpoint(struct mount *m, const struct path *root,
+ struct path *path)
+{
+ bool found;
+
+ rcu_read_lock();
+ while (1) {
+ unsigned seq, mseq = read_seqbegin(&mount_lock);
+
+ found = choose_mountpoint_rcu(m, root, path, &seq);
+ if (unlikely(!found)) {
+ if (!read_seqretry(&mount_lock, mseq))
+ break;
+ } else {
+ if (likely(__legitimize_path(path, seq, mseq)))
+ break;
+ rcu_read_unlock();
+ path_put(path);
+ rcu_read_lock();
+ }
+ }
+ rcu_read_unlock();
+ return found;
+}
+
/*
* Perform an automount
* - return -EISDIR to tell follow_managed() to stop and return the path we
* were called with.
*/
-static int follow_automount(struct path *path, struct nameidata *nd,
- bool *need_mntput)
+static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
{
- struct vfsmount *mnt;
- int err;
-
- if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
- return -EREMOTE;
+ struct dentry *dentry = path->dentry;
/* We don't want to mount if someone's just doing a stat -
* unless they're stat'ing a directory and appended a '/' to
@@ -1228,138 +1173,91 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* as being automount points. These will need the attentions
* of the daemon to instantiate them before they can be used.
*/
- if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
+ if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
- path->dentry->d_inode)
+ dentry->d_inode)
return -EISDIR;
- nd->total_link_count++;
- if (nd->total_link_count >= 40)
+ if (count && (*count)++ >= MAXSYMLINKS)
return -ELOOP;
- mnt = path->dentry->d_op->d_automount(path);
- if (IS_ERR(mnt)) {
- /*
- * The filesystem is allowed to return -EISDIR here to indicate
- * it doesn't want to automount. For instance, autofs would do
- * this so that its userspace daemon can mount on this dentry.
- *
- * However, we can only permit this if it's a terminal point in
- * the path being looked up; if it wasn't then the remainder of
- * the path is inaccessible and we should say so.
- */
- if (PTR_ERR(mnt) == -EISDIR && (nd->flags & LOOKUP_PARENT))
- return -EREMOTE;
- return PTR_ERR(mnt);
- }
-
- if (!mnt) /* mount collision */
- return 0;
-
- if (!*need_mntput) {
- /* lock_mount() may release path->mnt on error */
- mntget(path->mnt);
- *need_mntput = true;
- }
- err = finish_automount(mnt, path);
-
- switch (err) {
- case -EBUSY:
- /* Someone else made a mount here whilst we were busy */
- return 0;
- case 0:
- path_put(path);
- path->mnt = mnt;
- path->dentry = dget(mnt->mnt_root);
- return 0;
- default:
- return err;
- }
-
+ return finish_automount(dentry->d_op->d_automount(path), path);
}
/*
- * Handle a dentry that is managed in some way.
- * - Flagged for transit management (autofs)
- * - Flagged as mountpoint
- * - Flagged as automount point
- *
- * This may only be called in refwalk mode.
- * On success path->dentry is known positive.
- *
- * Serialization is taken care of in namespace.c
+ * mount traversal - out-of-line part. One note on ->d_flags accesses -
+ * dentries are pinned but not locked here, so negative dentry can go
+ * positive right under us. Use of smp_load_acquire() provides a barrier
+ * sufficient for ->d_inode and ->d_flags consistency.
*/
-static int follow_managed(struct path *path, struct nameidata *nd)
+static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
+ int *count, unsigned lookup_flags)
{
- struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
- unsigned flags;
+ struct vfsmount *mnt = path->mnt;
bool need_mntput = false;
int ret = 0;
- /* Given that we're not holding a lock here, we retain the value in a
- * local variable for each dentry as we look at it so that we don't see
- * the components of that value change under us */
- while (flags = smp_load_acquire(&path->dentry->d_flags),
- unlikely(flags & DCACHE_MANAGED_DENTRY)) {
+ while (flags & DCACHE_MANAGED_DENTRY) {
/* Allow the filesystem to manage the transit without i_mutex
* being held. */
if (flags & DCACHE_MANAGE_TRANSIT) {
- BUG_ON(!path->dentry->d_op);
- BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path, false);
flags = smp_load_acquire(&path->dentry->d_flags);
if (ret < 0)
break;
}
- /* Transit to a mounted filesystem. */
- if (flags & DCACHE_MOUNTED) {
+ if (flags & DCACHE_MOUNTED) { // something's mounted on it..
struct vfsmount *mounted = lookup_mnt(path);
- if (mounted) {
+ if (mounted) { // ... in our namespace
dput(path->dentry);
if (need_mntput)
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root);
+ // here we know it's positive
+ flags = path->dentry->d_flags;
need_mntput = true;
continue;
}
-
- /* Something is mounted on this dentry in another
- * namespace and/or whatever was mounted there in this
- * namespace got unmounted before lookup_mnt() could
- * get it */
}
- /* Handle an automount point */
- if (flags & DCACHE_NEED_AUTOMOUNT) {
- ret = follow_automount(path, nd, &need_mntput);
- if (ret < 0)
- break;
- continue;
- }
+ if (!(flags & DCACHE_NEED_AUTOMOUNT))
+ break;
- /* We didn't change the current path point */
- break;
+ // uncovered automount point
+ ret = follow_automount(path, count, lookup_flags);
+ flags = smp_load_acquire(&path->dentry->d_flags);
+ if (ret < 0)
+ break;
}
- if (need_mntput) {
- if (path->mnt == mnt)
- mntput(path->mnt);
- if (unlikely(nd->flags & LOOKUP_NO_XDEV))
- ret = -EXDEV;
- else
- nd->flags |= LOOKUP_JUMPED;
- }
- if (ret == -EISDIR || !ret)
- ret = 1;
- if (ret > 0 && unlikely(d_flags_negative(flags)))
+ if (ret == -EISDIR)
+ ret = 0;
+ // possible if you race with several mount --move
+ if (need_mntput && path->mnt == mnt)
+ mntput(path->mnt);
+ if (!ret && unlikely(d_flags_negative(flags)))
ret = -ENOENT;
- if (unlikely(ret < 0))
- path_put_conditional(path, nd);
+ *jumped = need_mntput;
return ret;
}
+static inline int traverse_mounts(struct path *path, bool *jumped,
+ int *count, unsigned lookup_flags)
+{
+ unsigned flags = smp_load_acquire(&path->dentry->d_flags);
+
+ /* fastpath */
+ if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
+ *jumped = false;
+ if (unlikely(d_flags_negative(flags)))
+ return -ENOENT;
+ return 0;
+ }
+ return __traverse_mounts(path, flags, jumped, count, lookup_flags);
+}
+
int follow_down_one(struct path *path)
{
struct vfsmount *mounted;
@@ -1376,11 +1274,22 @@ int follow_down_one(struct path *path)
}
EXPORT_SYMBOL(follow_down_one);
-static inline int managed_dentry_rcu(const struct path *path)
+/*
+ * Follow down to the covering mount currently visible to userspace. At each
+ * point, the filesystem owning that dentry may be queried as to whether the
+ * caller is permitted to proceed or not.
+ */
+int follow_down(struct path *path)
{
- return (path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
- path->dentry->d_op->d_manage(path, true) : 0;
+ struct vfsmount *mnt = path->mnt;
+ bool jumped;
+ int ret = traverse_mounts(path, &jumped, NULL, 0);
+
+ if (path->mnt != mnt)
+ mntput(mnt);
+ return ret;
}
+EXPORT_SYMBOL(follow_down);
/*
* Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
@@ -1389,204 +1298,88 @@ static inline int managed_dentry_rcu(const struct path *path)
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
struct inode **inode, unsigned *seqp)
{
+ struct dentry *dentry = path->dentry;
+ unsigned int flags = dentry->d_flags;
+
+ if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
+ return true;
+
+ if (unlikely(nd->flags & LOOKUP_NO_XDEV))
+ return false;
+
for (;;) {
- struct mount *mounted;
/*
* Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit.
*/
- switch (managed_dentry_rcu(path)) {
- case -ECHILD:
- default:
- return false;
- case -EISDIR:
- return true;
- case 0:
- break;
- }
-
- if (!d_mountpoint(path->dentry))
- return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
-
- mounted = __lookup_mnt(path->mnt, path->dentry);
- if (!mounted)
- break;
- if (unlikely(nd->flags & LOOKUP_NO_XDEV))
- return false;
- path->mnt = &mounted->mnt;
- path->dentry = mounted->mnt.mnt_root;
- nd->flags |= LOOKUP_JUMPED;
- *seqp = read_seqcount_begin(&path->dentry->d_seq);
- /*
- * Update the inode too. We don't need to re-check the
- * dentry sequence number here after this d_inode read,
- * because a mount-point is always pinned.
- */
- *inode = path->dentry->d_inode;
- }
- return !read_seqretry(&mount_lock, nd->m_seq) &&
- !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
-}
-
-static int follow_dotdot_rcu(struct nameidata *nd)
-{
- struct inode *inode = nd->inode;
-
- while (1) {
- if (path_equal(&nd->path, &nd->root)) {
- if (unlikely(nd->flags & LOOKUP_BENEATH))
- return -ECHILD;
- break;
+ if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
+ int res = dentry->d_op->d_manage(path, true);
+ if (res)
+ return res == -EISDIR;
+ flags = dentry->d_flags;
}
- if (nd->path.dentry != nd->path.mnt->mnt_root) {
- struct dentry *old = nd->path.dentry;
- struct dentry *parent = old->d_parent;
- unsigned seq;
- inode = parent->d_inode;
- seq = read_seqcount_begin(&parent->d_seq);
- if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
- return -ECHILD;
- nd->path.dentry = parent;
- nd->seq = seq;
- if (unlikely(!path_connected(&nd->path)))
- return -ECHILD;
- break;
- } else {
- struct mount *mnt = real_mount(nd->path.mnt);
- struct mount *mparent = mnt->mnt_parent;
- struct dentry *mountpoint = mnt->mnt_mountpoint;
- struct inode *inode2 = mountpoint->d_inode;
- unsigned seq = read_seqcount_begin(&mountpoint->d_seq);
- if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
- return -ECHILD;
- if (&mparent->mnt == nd->path.mnt)
- break;
- if (unlikely(nd->flags & LOOKUP_NO_XDEV))
- return -ECHILD;
- /* we know that mountpoint was pinned */
- nd->path.dentry = mountpoint;
- nd->path.mnt = &mparent->mnt;
- inode = inode2;
- nd->seq = seq;
+ if (flags & DCACHE_MOUNTED) {
+ struct mount *mounted = __lookup_mnt(path->mnt, dentry);
+ if (mounted) {
+ path->mnt = &mounted->mnt;
+ dentry = path->dentry = mounted->mnt.mnt_root;
+ nd->flags |= LOOKUP_JUMPED;
+ *seqp = read_seqcount_begin(&dentry->d_seq);
+ *inode = dentry->d_inode;
+ /*
+ * We don't need to re-check ->d_seq after this
+ * ->d_inode read - there will be an RCU delay
+ * between mount hash removal and ->mnt_root
+ * becoming unpinned.
+ */
+ flags = dentry->d_flags;
+ continue;
+ }
+ if (read_seqretry(&mount_lock, nd->m_seq))
+ return false;
}
+ return !(flags & DCACHE_NEED_AUTOMOUNT);
}
- while (unlikely(d_mountpoint(nd->path.dentry))) {
- struct mount *mounted;
- mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
- if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
- return -ECHILD;
- if (!mounted)
- break;
- if (unlikely(nd->flags & LOOKUP_NO_XDEV))
- return -ECHILD;
- nd->path.mnt = &mounted->mnt;
- nd->path.dentry = mounted->mnt.mnt_root;
- inode = nd->path.dentry->d_inode;
- nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
- }
- nd->inode = inode;
- return 0;
}
-/*
- * Follow down to the covering mount currently visible to userspace. At each
- * point, the filesystem owning that dentry may be queried as to whether the
- * caller is permitted to proceed or not.
- */
-int follow_down(struct path *path)
+static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
+ struct path *path, struct inode **inode,
+ unsigned int *seqp)
{
- unsigned managed;
+ bool jumped;
int ret;
- while (managed = READ_ONCE(path->dentry->d_flags),
- unlikely(managed & DCACHE_MANAGED_DENTRY)) {
- /* Allow the filesystem to manage the transit without i_mutex
- * being held.
- *
- * We indicate to the filesystem if someone is trying to mount
- * something here. This gives autofs the chance to deny anyone
- * other than its daemon the right to mount on its
- * superstructure.
- *
- * The filesystem may sleep at this point.
- */
- if (managed & DCACHE_MANAGE_TRANSIT) {
- BUG_ON(!path->dentry->d_op);
- BUG_ON(!path->dentry->d_op->d_manage);
- ret = path->dentry->d_op->d_manage(path, false);
- if (ret < 0)
- return ret == -EISDIR ? 0 : ret;
- }
-
- /* Transit to a mounted filesystem. */
- if (managed & DCACHE_MOUNTED) {
- struct vfsmount *mounted = lookup_mnt(path);
- if (!mounted)
- break;
- dput(path->dentry);
- mntput(path->mnt);
- path->mnt = mounted;
- path->dentry = dget(mounted->mnt_root);
- continue;
- }
-
- /* Don't handle automount points here */
- break;
- }
- return 0;
-}
-EXPORT_SYMBOL(follow_down);
-
-/*
- * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
- */
-static void follow_mount(struct path *path)
-{
- while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted = lookup_mnt(path);
- if (!mounted)
- break;
- dput(path->dentry);
- mntput(path->mnt);
- path->mnt = mounted;
- path->dentry = dget(mounted->mnt_root);
+ path->mnt = nd->path.mnt;
+ path->dentry = dentry;
+ if (nd->flags & LOOKUP_RCU) {
+ unsigned int seq = *seqp;
+ if (unlikely(!*inode))
+ return -ENOENT;
+ if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
+ return 0;
+ if (unlazy_child(nd, dentry, seq))
+ return -ECHILD;
+ // *path might've been clobbered by __follow_mount_rcu()
+ path->mnt = nd->path.mnt;
+ path->dentry = dentry;
}
-}
-
-static int path_parent_directory(struct path *path)
-{
- struct dentry *old = path->dentry;
- /* rare case of legitimate dget_parent()... */
- path->dentry = dget_parent(path->dentry);
- dput(old);
- if (unlikely(!path_connected(path)))
- return -ENOENT;
- return 0;
-}
-
-static int follow_dotdot(struct nameidata *nd)
-{
- while (1) {
- if (path_equal(&nd->path, &nd->root)) {
- if (unlikely(nd->flags & LOOKUP_BENEATH))
- return -EXDEV;
- break;
- }
- if (nd->path.dentry != nd->path.mnt->mnt_root) {
- int ret = path_parent_directory(&nd->path);
- if (ret)
- return ret;
- break;
- }
- if (!follow_up(&nd->path))
- break;
+ ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
+ if (jumped) {
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
- return -EXDEV;
+ ret = -EXDEV;
+ else
+ nd->flags |= LOOKUP_JUMPED;
}
- follow_mount(&nd->path);
- nd->inode = nd->path.dentry->d_inode;
- return 0;
+ if (unlikely(ret)) {
+ dput(path->dentry);
+ if (path->mnt != nd->path.mnt)
+ mntput(path->mnt);
+ } else {
+ *inode = d_backing_inode(path->dentry);
+ *seqp = 0; /* out of RCU mode, so the value doesn't matter */
+ }
+ return ret;
}
/*
@@ -1643,14 +1436,12 @@ static struct dentry *__lookup_hash(const struct qstr *name,
return dentry;
}
-static int lookup_fast(struct nameidata *nd,
- struct path *path, struct inode **inode,
- unsigned *seqp)
+static struct dentry *lookup_fast(struct nameidata *nd,
+ struct inode **inode,
+ unsigned *seqp)
{
- struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
int status = 1;
- int err;
/*
* Rename seqlock is not required here because in the off chance
@@ -1659,12 +1450,11 @@ static int lookup_fast(struct nameidata *nd,
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
- bool negative;
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
if (unlikely(!dentry)) {
if (unlazy_walk(nd))
- return -ECHILD;
- return 0;
+ return ERR_PTR(-ECHILD);
+ return NULL;
}
/*
@@ -1672,9 +1462,8 @@ static int lookup_fast(struct nameidata *nd,
* the dentry name information from lookup.
*/
*inode = d_backing_inode(dentry);
- negative = d_is_negative(dentry);
if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
- return -ECHILD;
+ return ERR_PTR(-ECHILD);
/*
* This sequence count validates that the parent had no
@@ -1684,46 +1473,30 @@ static int lookup_fast(struct nameidata *nd,
* enough, we can use __read_seqcount_retry here.
*/
if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
- return -ECHILD;
+ return ERR_PTR(-ECHILD);
*seqp = seq;
status = d_revalidate(dentry, nd->flags);
- if (likely(status > 0)) {
- /*
- * Note: do negative dentry check after revalidation in
- * case that drops it.
- */
- if (unlikely(negative))
- return -ENOENT;
- path->mnt = mnt;
- path->dentry = dentry;
- if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
- return 1;
- }
+ if (likely(status > 0))
+ return dentry;
if (unlazy_child(nd, dentry, seq))
- return -ECHILD;
+ return ERR_PTR(-ECHILD);
if (unlikely(status == -ECHILD))
/* we'd been told to redo it in non-rcu mode */
status = d_revalidate(dentry, nd->flags);
} else {
dentry = __d_lookup(parent, &nd->last);
if (unlikely(!dentry))
- return 0;
+ return NULL;
status = d_revalidate(dentry, nd->flags);
}
if (unlikely(status <= 0)) {
if (!status)
d_invalidate(dentry);
dput(dentry);
- return status;
+ return ERR_PTR(status);
}
-
- path->mnt = mnt;
- path->dentry = dentry;
- err = follow_managed(path, nd);
- if (likely(err > 0))
- *inode = d_backing_inode(path->dentry);
- return err;
+ return dentry;
}
/* Fast lookup failed, do it the slow way */
@@ -1788,81 +1561,107 @@ static inline int may_lookup(struct nameidata *nd)
return inode_permission(nd->inode, MAY_EXEC);
}
-static inline int handle_dots(struct nameidata *nd, int type)
+static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
{
- if (type == LAST_DOTDOT) {
- int error = 0;
+ if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
+ return -ELOOP;
- if (!nd->root.mnt) {
- error = set_root(nd);
- if (error)
- return error;
- }
- if (nd->flags & LOOKUP_RCU)
- error = follow_dotdot_rcu(nd);
- else
- error = follow_dotdot(nd);
- if (error)
- return error;
+ if (likely(nd->depth != EMBEDDED_LEVELS))
+ return 0;
+ if (likely(nd->stack != nd->internal))
+ return 0;
+ if (likely(nd_alloc_stack(nd)))
+ return 0;
- if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
- /*
- * If there was a racing rename or mount along our
- * path, then we can't be sure that ".." hasn't jumped
- * above nd->root (and so userspace should retry or use
- * some fallback).
- */
- smp_rmb();
- if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
- return -EAGAIN;
- if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
- return -EAGAIN;
- }
+ if (nd->flags & LOOKUP_RCU) {
+ // we need to grab link before we do unlazy. And we can't skip
+ // unlazy even if we fail to grab the link - cleanup needs it
+ bool grabbed_link = legitimize_path(nd, link, seq);
+
+ if (unlazy_walk(nd) != 0 || !grabbed_link)
+ return -ECHILD;
+
+ if (nd_alloc_stack(nd))
+ return 0;
}
- return 0;
+ return -ENOMEM;
}
-static int pick_link(struct nameidata *nd, struct path *link,
- struct inode *inode, unsigned seq)
+enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
+
+static const char *pick_link(struct nameidata *nd, struct path *link,
+ struct inode *inode, unsigned seq, int flags)
{
- int error;
struct saved *last;
- if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) {
- path_to_nameidata(link, nd);
- return -ELOOP;
- }
- if (!(nd->flags & LOOKUP_RCU)) {
- if (link->mnt == nd->path.mnt)
- mntget(link->mnt);
- }
- error = nd_alloc_stack(nd);
+ const char *res;
+ int error = reserve_stack(nd, link, seq);
+
if (unlikely(error)) {
- if (error == -ECHILD) {
- if (unlikely(!legitimize_path(nd, link, seq))) {
- drop_links(nd);
- nd->depth = 0;
- nd->flags &= ~LOOKUP_RCU;
- nd->path.mnt = NULL;
- nd->path.dentry = NULL;
- rcu_read_unlock();
- } else if (likely(unlazy_walk(nd)) == 0)
- error = nd_alloc_stack(nd);
- }
- if (error) {
+ if (!(nd->flags & LOOKUP_RCU))
path_put(link);
- return error;
- }
+ return ERR_PTR(error);
}
-
last = nd->stack + nd->depth++;
last->link = *link;
clear_delayed_call(&last->done);
- nd->link_inode = inode;
last->seq = seq;
- return 1;
-}
-enum {WALK_FOLLOW = 1, WALK_MORE = 2};
+ if (flags & WALK_TRAILING) {
+ error = may_follow_link(nd, inode);
+ if (unlikely(error))
+ return ERR_PTR(error);
+ }
+
+ if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS))
+ return ERR_PTR(-ELOOP);
+
+ if (!(nd->flags & LOOKUP_RCU)) {
+ touch_atime(&last->link);
+ cond_resched();
+ } else if (atime_needs_update(&last->link, inode)) {
+ if (unlikely(unlazy_walk(nd)))
+ return ERR_PTR(-ECHILD);
+ touch_atime(&last->link);
+ }
+
+ error = security_inode_follow_link(link->dentry, inode,
+ nd->flags & LOOKUP_RCU);
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ res = READ_ONCE(inode->i_link);
+ if (!res) {
+ const char * (*get)(struct dentry *, struct inode *,
+ struct delayed_call *);
+ get = inode->i_op->get_link;
+ if (nd->flags & LOOKUP_RCU) {
+ res = get(NULL, inode, &last->done);
+ if (res == ERR_PTR(-ECHILD)) {
+ if (unlikely(unlazy_walk(nd)))
+ return ERR_PTR(-ECHILD);
+ res = get(link->dentry, inode, &last->done);
+ }
+ } else {
+ res = get(link->dentry, inode, &last->done);
+ }
+ if (!res)
+ goto all_done;
+ if (IS_ERR(res))
+ return res;
+ }
+ if (*res == '/') {
+ error = nd_jump_root(nd);
+ if (unlikely(error))
+ return ERR_PTR(error);
+ while (unlikely(*++res == '/'))
+ ;
+ }
+ if (*res)
+ return res;
+all_done: // pure jump
+ put_link(nd);
+ return NULL;