summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-12 09:28:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-12 09:28:55 -0700
commit6b1c776d3efbda31085b6a9f3bc7f774511fafd9 (patch)
treed306d6c2841b88b0a78ccdaf3532bb1e1f8260a0
parent58c7ffc0747a3a9145629d4966291f0586703767 (diff)
parentf4439de118283159ff165e52036134a278ebf990 (diff)
Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs updates from Miklos Szeredi: "This work from Amir introduces the inodes index feature, which provides: - hardlinks are not broken on copy up - infrastructure for overlayfs NFS export This also fixes constant st_ino for samefs case for lower hardlinks" * 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (33 commits) ovl: mark parent impure and restore timestamp on ovl_link_up() ovl: document copying layers restrictions with inodes index ovl: cleanup orphan index entries ovl: persistent overlay inode nlink for indexed inodes ovl: implement index dir copy up ovl: move copy up lock out ovl: rearrange copy up ovl: add flag for upper in ovl_entry ovl: use struct copy_up_ctx as function argument ovl: base tmpfile in workdir too ovl: factor out ovl_copy_up_inode() helper ovl: extract helper to get temp file in copy up ovl: defer upper dir lock to tempfile link ovl: hash overlay non-dir inodes by copy up origin ovl: cleanup bad and stale index entries on mount ovl: lookup index entry for copy up origin ovl: verify index dir matches upper dir ovl: verify upper root dir matches lower root dir ovl: introduce the inodes index dir feature ovl: generalize ovl_create_workdir() ...
-rw-r--r--Documentation/filesystems/overlayfs.txt34
-rw-r--r--fs/overlayfs/Kconfig20
-rw-r--r--fs/overlayfs/copy_up.c410
-rw-r--r--fs/overlayfs/dir.c52
-rw-r--r--fs/overlayfs/inode.c215
-rw-r--r--fs/overlayfs/namei.c368
-rw-r--r--fs/overlayfs/overlayfs.h58
-rw-r--r--fs/overlayfs/ovl_entry.h36
-rw-r--r--fs/overlayfs/readdir.c50
-rw-r--r--fs/overlayfs/super.c247
-rw-r--r--fs/overlayfs/util.c345
-rw-r--r--include/linux/fs.h4
12 files changed, 1456 insertions, 383 deletions
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index c9e884b52698..36f528a7fdd6 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -201,6 +201,40 @@ rightmost one and going left. In the above example lower1 will be the
top, lower2 the middle and lower3 the bottom layer.
+Sharing and copying layers
+--------------------------
+
+Lower layers may be shared among several overlay mounts and that is indeed
+a very common practice. An overlay mount may use the same lower layer
+path as another overlay mount and it may use a lower layer path that is
+beneath or above the path of another overlay lower layer path.
+
+Using an upper layer path and/or a workdir path that are already used by
+another overlay mount is not allowed and will fail with EBUSY. Using
+partially overlapping paths is not allowed but will not fail with EBUSY.
+
+Mounting an overlay using an upper layer path, where the upper layer path
+was previously used by another mounted overlay in combination with a
+different lower layer path, is allowed, unless the "inodes index" feature
+is enabled.
+
+With the "inodes index" feature, on the first time mount, an NFS file
+handle of the lower layer root directory, along with the UUID of the lower
+filesystem, are encoded and stored in the "trusted.overlay.origin" extended
+attribute on the upper layer root directory. On subsequent mount attempts,
+the lower root directory file handle and lower filesystem UUID are compared
+to the stored origin in upper root directory. On failure to verify the
+lower root origin, mount will fail with ESTALE. An overlayfs mount with
+"inodes index" enabled will fail with EOPNOTSUPP if the lower filesystem
+does not support NFS export, lower filesystem does not have a valid UUID or
+if the upper filesystem does not support extended attributes.
+
+It is quite a common practice to copy overlay layers to a different
+directory tree on the same or different underlying filesystem, and even
+to a different machine. With the "inodes index" feature, trying to mount
+the copied layers will fail the verification of the lower root file handle.
+
+
Non-standard behavior
---------------------
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index c0c9683934b7..cbfc196e5dc5 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -23,3 +23,23 @@ config OVERLAY_FS_REDIRECT_DIR
Note, that redirects are not backward compatible. That is, mounting
an overlay which has redirects on a kernel that doesn't support this
feature will have unexpected results.
+
+config OVERLAY_FS_INDEX
+ bool "Overlayfs: turn on inodes index feature by default"
+ depends on OVERLAY_FS
+ help
+ If this config option is enabled then overlay filesystems will use
+ the inodes index dir to map lower inodes to upper inodes by default.
+ In this case it is still possible to turn off index globally with the
+ "index=off" module option or on a filesystem instance basis with the
+ "index=off" mount option.
+
+ The inodes index feature prevents breaking of lower hardlinks on copy
+ up.
+
+ Note, that the inodes index feature is read-only backward compatible.
+ That is, mounting an overlay which has an index dir on a kernel that
+ doesn't support this feature read-only, will not have any negative
+ outcomes. However, mounting the same overlay with an old kernel
+ read-write and then mounting it again with a new kernel, will have
+ unexpected results.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index e5869f91b3ab..acb6f97deb97 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -233,12 +233,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
{
struct ovl_fh *fh;
int fh_type, fh_len, dwords;
void *buf;
int buflen = MAX_HANDLE_SZ;
+ uuid_t *uuid = &lower->d_sb->s_uuid;
buf = kmalloc(buflen, GFP_TEMPORARY);
if (!buf)
@@ -271,6 +272,14 @@ static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
fh->magic = OVL_FH_MAGIC;
fh->type = fh_type;
fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+ /*
+ * When we will want to decode an overlay dentry from this handle
+ * and all layers are on the same fs, if we get a disconncted real
+ * dentry when we decode fid, the only way to tell if we should assign
+ * it to upperdentry or to lowerstack is by checking this flag.
+ */
+ if (is_upper)
+ fh->flags |= OVL_FH_FLAG_PATH_UPPER;
fh->len = fh_len;
fh->uuid = *uuid;
memcpy(fh->fid, buf, buflen);
@@ -283,7 +292,6 @@ out:
static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper)
{
- struct super_block *sb = lower->d_sb;
const struct ovl_fh *fh = NULL;
int err;
@@ -292,9 +300,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
- if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
- !uuid_is_null(&sb->s_uuid)) {
- fh = ovl_encode_fh(lower, &sb->s_uuid);
+ if (ovl_can_decode_fh(lower->d_sb)) {
+ fh = ovl_encode_fh(lower, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
@@ -309,84 +316,156 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
return err;
}
-static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
- struct dentry *dentry, struct path *lowerpath,
- struct kstat *stat, const char *link,
- struct kstat *pstat, bool tmpfile)
+struct ovl_copy_up_ctx {
+ struct dentry *parent;
+ struct dentry *dentry;
+ struct path lowerpath;
+ struct kstat stat;
+ struct kstat pstat;
+ const char *link;
+ struct dentry *destdir;
+ struct qstr destname;
+ struct dentry *workdir;
+ bool tmpfile;
+ bool origin;
+};
+
+static int ovl_link_up(struct ovl_copy_up_ctx *c)
+{
+ int err;
+ struct dentry *upper;
+ struct dentry *upperdir = ovl_dentry_upper(c->parent);
+ struct inode *udir = d_inode(upperdir);
+
+ /* Mark parent "impure" because it may now contain non-pure upper */
+ err = ovl_set_impure(c->parent, upperdir);
+ if (err)
+ return err;
+
+ err = ovl_set_nlink_lower(c->dentry);
+ if (err)
+ return err;
+
+ inode_lock_nested(udir, I_MUTEX_PARENT);
+ upper = lookup_one_len(c->dentry->d_name.name, upperdir,
+ c->dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (!IS_ERR(upper)) {
+ err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
+ true);
+ dput(upper);
+
+ if (!err) {
+ /* Restore timestamps on parent (best effort) */
+ ovl_set_timestamps(upperdir, &c->pstat);
+ ovl_dentry_set_upper_alias(c->dentry);
+ }
+ }
+ inode_unlock(udir);
+ ovl_set_nlink_upper(c->dentry);
+
+ return err;
+}
+
+static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
+ struct dentry **newdentry)
{
- struct inode *wdir = workdir->d_inode;
- struct inode *udir = upperdir->d_inode;
- struct dentry *newdentry = NULL;
- struct dentry *upper = NULL;
- struct dentry *temp = NULL;
int err;
+ struct dentry *upper;
+ struct inode *udir = d_inode(c->destdir);
+
+ upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+ if (IS_ERR(upper))
+ return PTR_ERR(upper);
+
+ if (c->tmpfile)
+ err = ovl_do_link(temp, udir, upper, true);
+ else
+ err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
+
+ if (!err)
+ *newdentry = dget(c->tmpfile ? upper : temp);
+ dput(upper);
+
+ return err;
+}
+
+static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
+{
+ int err;
+ struct dentry *temp;
const struct cred *old_creds = NULL;
struct cred *new_creds = NULL;
struct cattr cattr = {
/* Can't properly set mode on creation because of the umask */
- .mode = stat->mode & S_IFMT,
- .rdev = stat->rdev,
- .link = link
+ .mode = c->stat.mode & S_IFMT,
+ .rdev = c->stat.rdev,
+ .link = c->link
};
- err = security_inode_copy_up(dentry, &new_creds);
+ err = security_inode_copy_up(c->dentry, &new_creds);
if (err < 0)
goto out;
if (new_creds)
old_creds = override_creds(new_creds);
- if (tmpfile)
- temp = ovl_do_tmpfile(upperdir, stat->mode);
- else
- temp = ovl_lookup_temp(workdir);
- err = 0;
- if (IS_ERR(temp)) {
- err = PTR_ERR(temp);
- temp = NULL;
+ if (c->tmpfile) {
+ temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+ if (IS_ERR(temp))
+ goto temp_err;
+ } else {
+ temp = ovl_lookup_temp(c->workdir);
+ if (IS_ERR(temp))
+ goto temp_err;
+
+ err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
+ NULL, true);
+ if (err) {
+ dput(temp);
+ goto out;
+ }
}
-
- if (!err && !tmpfile)
- err = ovl_create_real(wdir, temp, &cattr, NULL, true);
-
+ err = 0;
+ *tempp = temp;
+out:
if (new_creds) {
revert_creds(old_creds);
put_cred(new_creds);
}
- if (err)
- goto out;
+ return err;
- if (S_ISREG(stat->mode)) {
+temp_err:
+ err = PTR_ERR(temp);
+ goto out;
+}
+
+static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+ int err;
+
+ if (S_ISREG(c->stat.mode)) {
struct path upperpath;
- ovl_path_upper(dentry, &upperpath);
+ ovl_path_upper(c->dentry, &upperpath);
BUG_ON(upperpath.dentry != NULL);
upperpath.dentry = temp;
- if (tmpfile) {
- inode_unlock(udir);
- err = ovl_copy_up_data(lowerpath, &upperpath,
- stat->size);
- inode_lock_nested(udir, I_MUTEX_PARENT);
- } else {
- err = ovl_copy_up_data(lowerpath, &upperpath,
- stat->size);
- }
-
+ err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
if (err)
- goto out_cleanup;
+ return err;
}
- err = ovl_copy_xattr(lowerpath->dentry, temp);
+ err = ovl_copy_xattr(c->lowerpath.dentry, temp);
if (err)
- goto out_cleanup;
+ return err;
inode_lock(temp->d_inode);
- err = ovl_set_attr(temp, stat);
+ err = ovl_set_attr(temp, &c->stat);
inode_unlock(temp->d_inode);
if (err)
- goto out_cleanup;
+ return err;
/*
* Store identifier of lower inode in upper inode xattr to
@@ -395,41 +474,48 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
* Don't set origin when we are breaking the association with a lower
* hard link.
*/
- if (S_ISDIR(stat->mode) || stat->nlink == 1) {
- err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+ if (c->origin) {
+ err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
if (err)
- goto out_cleanup;
+ return err;
}
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- if (IS_ERR(upper)) {
- err = PTR_ERR(upper);
- upper = NULL;
- goto out_cleanup;
- }
+ return 0;
+}
- if (tmpfile)
- err = ovl_do_link(temp, udir, upper, true);
- else
- err = ovl_do_rename(wdir, temp, udir, upper, 0);
+static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
+{
+ struct inode *udir = c->destdir->d_inode;
+ struct dentry *newdentry = NULL;
+ struct dentry *temp = NULL;
+ int err;
+
+ err = ovl_get_tmpfile(c, &temp);
+ if (err)
+ goto out;
+
+ err = ovl_copy_up_inode(c, temp);
if (err)
goto out_cleanup;
- newdentry = dget(tmpfile ? upper : temp);
- ovl_dentry_update(dentry, newdentry);
- ovl_inode_update(d_inode(dentry), d_inode(newdentry));
+ if (c->tmpfile) {
+ inode_lock_nested(udir, I_MUTEX_PARENT);
+ err = ovl_install_temp(c, temp, &newdentry);
+ inode_unlock(udir);
+ } else {
+ err = ovl_install_temp(c, temp, &newdentry);
+ }
+ if (err)
+ goto out_cleanup;
- /* Restore timestamps on parent (best effort) */
- ovl_set_timestamps(upperdir, pstat);
+ ovl_inode_update(d_inode(c->dentry), newdentry);
out:
dput(temp);
- dput(upper);
return err;
out_cleanup:
- if (!tmpfile)
- ovl_cleanup(wdir, temp);
+ if (!c->tmpfile)
+ ovl_cleanup(d_inode(c->workdir), temp);
goto out;
}
@@ -442,78 +528,119 @@ out_cleanup:
* is possible that the copy up will lock the old parent. At that point
* the file will have already been copied up anyway.
*/
+static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+{
+ int err;
+ struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+ bool indexed = false;
+
+ if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
+ c->stat.nlink > 1)
+ indexed = true;
+
+ if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
+ c->origin = true;
+
+ if (indexed) {
+ c->destdir = ovl_indexdir(c->dentry->d_sb);
+ err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+ if (err)
+ return err;
+ } else {
+ /*
+ * Mark parent "impure" because it may now contain non-pure
+ * upper
+ */
+ err = ovl_set_impure(c->parent, c->destdir);
+ if (err)
+ return err;
+ }
+
+ /* Should we copyup with O_TMPFILE or with workdir? */
+ if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
+ c->tmpfile = true;
+ err = ovl_copy_up_locked(c);
+ } else {
+ err = -EIO;
+ if (lock_rename(c->workdir, c->destdir) != NULL) {
+ pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ } else {
+ err = ovl_copy_up_locked(c);
+ unlock_rename(c->workdir, c->destdir);
+ }
+ }
+
+ if (indexed) {
+ if (!err)
+ ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+ kfree(c->destname.name);
+ } else if (!err) {
+ struct inode *udir = d_inode(c->destdir);
+
+ /* Restore timestamps on parent (best effort) */
+ inode_lock(udir);
+ ovl_set_timestamps(c->destdir, &c->pstat);
+ inode_unlock(udir);
+
+ ovl_dentry_set_upper_alias(c->dentry);
+ }
+
+ return err;
+}
+
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
- struct path *lowerpath, struct kstat *stat)
+ int flags)
{
- DEFINE_DELAYED_CALL(done);
- struct dentry *workdir = ovl_workdir(dentry);
int err;
- struct kstat pstat;
+ DEFINE_DELAYED_CALL(done);
struct path parentpath;
- struct dentry *lowerdentry = lowerpath->dentry;
- struct dentry *upperdir;
- const char *link = NULL;
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct ovl_copy_up_ctx ctx = {
+ .parent = parent,
+ .dentry = dentry,
+ .workdir = ovl_workdir(dentry),
+ };
- if (WARN_ON(!workdir))
+ if (WARN_ON(!ctx.workdir))
return -EROFS;
- ovl_do_check_copy_up(lowerdentry);
-
- ovl_path_upper(parent, &parentpath);
- upperdir = parentpath.dentry;
-
- /* Mark parent "impure" because it may now contain non-pure upper */
- err = ovl_set_impure(parent, upperdir);
+ ovl_path_lower(dentry, &ctx.lowerpath);
+ err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
+ STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
- err = vfs_getattr(&parentpath, &pstat,
+ ovl_path_upper(parent, &parentpath);
+ ctx.destdir = parentpath.dentry;
+ ctx.destname = dentry->d_name;
+
+ err = vfs_getattr(&parentpath, &ctx.pstat,
STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
- if (S_ISLNK(stat->mode)) {
- link = vfs_get_link(lowerdentry, &done);
- if (IS_ERR(link))
- return PTR_ERR(link);
- }
-
- /* Should we copyup with O_TMPFILE or with workdir? */
- if (S_ISREG(stat->mode) && ofs->tmpfile) {
- err = ovl_copy_up_start(dentry);
- /* err < 0: interrupted, err > 0: raced with another copy-up */
- if (unlikely(err)) {
- pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
- if (err > 0)
- err = 0;
- goto out_done;
- }
-
- inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
- err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
- stat, link, &pstat, true);
- inode_unlock(upperdir->d_inode);
- ovl_copy_up_end(dentry);
- goto out_done;
- }
+ /* maybe truncate regular file. this has no effect on dirs */
+ if (flags & O_TRUNC)
+ ctx.stat.size = 0;
- err = -EIO;
- if (lock_rename(workdir, upperdir) != NULL) {
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
- goto out_unlock;
+ if (S_ISLNK(ctx.stat.mode)) {
+ ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
+ if (IS_ERR(ctx.link))
+ return PTR_ERR(ctx.link);
}
- if (ovl_dentry_upper(dentry)) {
- /* Raced with another copy-up? Nothing to do, then... */
- err = 0;
- goto out_unlock;
+ ovl_do_check_copy_up(ctx.lowerpath.dentry);
+
+ err = ovl_copy_up_start(dentry);
+ /* err < 0: interrupted, err > 0: raced with another copy-up */
+ if (unlikely(err)) {
+ if (err > 0)
+ err = 0;
+ } else {
+ if (!ovl_dentry_upper(dentry))
+ err = ovl_do_copy_up(&ctx);
+ if (!err && !ovl_dentry_has_upper_alias(dentry))
+ err = ovl_link_up(&ctx);
+ ovl_copy_up_end(dentry);
}
-
- err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
- stat, link, &pstat, false);
-out_unlock:
- unlock_rename(workdir, upperdir);
-out_done:
do_delayed_call(&done);
return err;
@@ -527,11 +654,22 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
while (!err) {
struct dentry *next;
struct dentry *parent;
- struct path lowerpath;
- struct kstat stat;
- enum ovl_path_type type = ovl_path_type(dentry);
- if (OVL_TYPE_UPPER(type))
+ /*
+ * Check if copy-up has happened as well as for upper alias (in
+ * case of hard links) is there.
+ *
+ * Both checks are lockless:
+ * - false negatives: will recheck under oi->lock
+ * - false positives:
+ * + ovl_dentry_upper() uses memory barriers to ensure the
+ * upper dentry is up-to-date
+ * + ovl_dentry_has_upper_alias() relies on locking of
+ * upper parent i_rwsem to prevent reordering copy-up
+ * with rename.
+ */
+ if (ovl_dentry_upper(dentry) &&
+ ovl_dentry_has_upper_alias(dentry))
break;
next = dget(dentry);
@@ -539,22 +677,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
for (;;) {
parent = dget_parent(next);
- type = ovl_path_type(parent);
- if (OVL_TYPE_UPPER(type))
+ if (ovl_dentry_upper(parent))
break;
dput(next);
next = parent;
}
- ovl_path_lower(next, &lowerpath);
- err = vfs_getattr(&lowerpath, &stat,
- STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
- /* maybe truncate regular file. this has no effect on dirs */
- if (flags & O_TRUNC)
- stat.size = 0;
- if (!err)
- err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
+ err = ovl_copy_up_one(parent, next, flags);
dput(parent);
dput(next);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a63a71656e9b..641d9ee97f91 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -24,7 +24,7 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
MODULE_PARM_DESC(ovl_redirect_max,
"Maximum length of absolute redirect xattr value");
-void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
{
int err;
@@ -39,6 +39,8 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
wdentry, err);
}
+
+ return err;
}
struct dentry *ovl_lookup_temp(struct dentry *workdir)
@@ -154,12 +156,13 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
{
ovl_dentry_version_inc(dentry->d_parent);
- ovl_dentry_update(dentry, newdentry);
+ ovl_dentry_set_upper_alias(dentry);
if (!hardlink) {
- ovl_inode_update(inode, d_inode(newdentry));
+ ovl_inode_update(inode, newdentry);
ovl_copyattr(newdentry->d_inode, inode);
} else {
- WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
+ WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
+ dput(newdentry);
inc_nlink(inode);
}
d_instantiate(dentry, inode);
@@ -588,6 +591,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
struct dentry *new)
{
int err;
+ bool locked = false;
struct inode *inode;
err = ovl_want_write(old);
@@ -598,6 +602,10 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
goto out_drop_write;
+ err = ovl_nlink_start(old, &locked);
+ if (err)
+ goto out_drop_write;
+
inode = d_inode(old);
ihold(inode);
@@ -605,12 +613,18 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
iput(inode);
+ ovl_nlink_end(old, locked);
out_drop_write:
ovl_drop_write(old);
out:
return err;
}
+static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
+{
+ return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
+}
+
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
{
struct dentry *workdir = ovl_workdir(dentry);
@@ -646,7 +660,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && ovl_dentry_upper(dentry) &&
- upper != ovl_dentry_upper(dentry))) {
+ !ovl_matches_upper(dentry, upper))) {
goto out_dput_upper;
}
@@ -707,7 +721,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
- (!opaquedir && upper != ovl_dentry_upper(dentry)))
+ (!opaquedir && !ovl_matches_upper(dentry, upper)))
goto out_dput_upper;
if (is_dir)
@@ -735,8 +749,8 @@ out:
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
- enum ovl_path_type type;
int err;
+ bool locked = false;
const struct cred *old_cred;
err = ovl_want_write(dentry);
@@ -747,7 +761,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (err)
goto out_drop_write;
- type = ovl_path_type(dentry);
+ err = ovl_nlink_start(dentry, &locked);
+ if (err)
+ goto out_drop_write;
old_cred = ovl_override_creds(dentry->d_sb);
if (!ovl_lower_positive(dentry))
@@ -761,6 +777,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
else
drop_nlink(dentry->d_inode);
}
+ ovl_nlink_end(dentry, locked);
out_drop_write:
ovl_drop_write(dentry);
out:
@@ -883,6 +900,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
unsigned int flags)
{
int err;
+ bool locked = false;
struct dentry *old_upperdir;
struct dentry *new_upperdir;
struct dentry *olddentry;
@@ -926,6 +944,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
err = ovl_copy_up(new);
if (err)
goto out_drop_write;
+ } else {
+ err = ovl_nlink_start(new, &locked);
+ if (err)
+ goto out_drop_write;
}
old_cred = ovl_override_creds(old->d_sb);
@@ -985,7 +1007,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
goto out_unlock;
err = -ESTALE;
- if (olddentry != ovl_dentry_upper(old))
+ if (!ovl_matches_upper(old, olddentry))
goto out_dput_old;
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
@@ -998,12 +1020,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
new_opaque = ovl_dentry_is_opaque(new);
err = -ESTALE;
- if (ovl_dentry_upper(new)) {
+ if (d_inode(new) && ovl_dentry_upper(new)) {
if (opaquedir) {
if (newdentry != opaquedir)
goto out_dput;
} else {
- if (newdentry != ovl_dentry_upper(new))
+ if (!ovl_matches_upper(new, newdentry))
goto out_dput;
}
} else {
@@ -1046,6 +1068,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (cleanup_whiteout)
ovl_cleanup(old_upperdir->d_inode, newdentry);
+ if (overwrite && d_inode(new)) {
+ if (new_is_dir)
+ clear_nlink(d_inode(new));
+ else
+ drop_nlink(d_inode(new));
+ }
+
ovl_dentry_version_inc(old->d_parent);
ovl_dentry_version_inc(new->d_parent);
@@ -1057,6 +1086,7 @@ out_unlock:
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
revert_creds(old_cred);
+ ovl_nlink_end(new, locked);
out_drop_write:
ovl_drop_write(old);
out:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index d613e2c41242..69f4fc26ee39 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -12,6 +12,7 @@
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
+#include <linux/ratelimit.h>
#include "overlayfs.h"
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
@@ -96,11 +97,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
WARN_ON_ONCE(stat->dev != lowerstat.dev);
/*
- * Lower hardlinks are broken on copy up to different
+ * Lower hardlinks may be broken on copy up to different
* upper files, so we cannot use the lower origin st_ino
* for those different files, even for the same fs case.
+ * With inodes index enabled, it is safe to use st_ino
+ * of an indexed hardlinked origin. The index validates
+ * that the upper hardlink is not broken.
*/
- if (is_dir || lowerstat.nlink == 1)
+ if (is_dir || lowerstat.nlink == 1 ||
+ ovl_test_flag(OVL_INDEX, d_inode(dentry)))
stat->ino = lowerstat.ino;
}
stat->dev = dentry->d_sb->s_dev;
@@ -126,6 +131,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (is_dir && OVL_TYPE_MERGE(type))
stat->nlink = 1;
+ /*
+ * Return the overlay inode nlinks for indexed upper inodes.
+ * Overlay inode nlink counts the union of the upper hardlinks
+ * and non-covered lower hardlinks. It does not include the upper
+ * index hardlink.
+ */
+ if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ stat->nlink = dentry->d_inode->i_nlink;
+
out:
revert_creds(old_cred);
@@ -134,8 +148,8 @@ out:
int ovl_permission(struct inode *inode, int mask)
{
- bool is_upper;
- struct inode *realinode = ovl_inode_real(inode, &is_upper);
+ struct inode *upperinode = ovl_inode_upper(inode);
+ struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
const struct cred *old_cred;
int err;
@@ -154,7 +168,8 @@ int ovl_permission(struct inode *inode, int mask)
return err;
old_cred = ovl_override_creds(inode->i_sb);
- if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+ if (!upperinode &&
+ !special_file(realinode->i_mode) && mask & MAY_WRITE) {
mask &= ~(MAY_WRITE | MAY_APPEND);
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
@@ -286,7 +301,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
struct posix_acl *ovl_get_acl(struct inode *inode, int type)
{
- struct inode *realinode = ovl_inode_real(inode, NULL);
+ struct inode *realinode = ovl_inode_real(inode);
const struct cred *old_cred;
struct posix_acl *acl;
@@ -300,13 +315,13 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
return acl;
}
-static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
- struct dentry *realdentry)
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
{
- if (OVL_TYPE_UPPER(type))
+ if (ovl_dentry_upper(dentry) &&
+ ovl_dentry_has_upper_alias(dentry))
return false;
- if (special_file(realdentry->d_inode->i_mode))
+ if (special_file(d_inode(dentry)->i_mode))
return false;
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
@@ -318,11 +333,8 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
{
int err = 0;
- struct path realpath;
- enum ovl_path_type type;
- type = ovl_path_real(dentry, &realpath);
- if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
+ if (ovl_open_need_copy_up(dentry, file_flags)) {
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up_flags(dentry, file_flags);
@@ -440,6 +452,103 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
}
}
+/*
+ * With inodes index enabled, an overlay inode nlink counts the union of upper
+ * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
+ * upper inode, the following nlink modifying operations can happen:
+ *
+ * 1. Lower hardlink copy up
+ * 2. Upper hardlink created, unlinked or renamed over
+ * 3. Lower hardlink whiteout or renamed over
+ *
+ * For the first, copy up case, the union nlink does not change, whether the
+ * operation succeeds or fails, but the upper inode nlink may change.
+ * Therefore, before copy up, we store the union nlink value relative to the
+ * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ *
+ * For the second, upper hardlink case, the union nlink should be incremented
+ * or decremented IFF the operation succeeds, aligned with nlink change of the
+ * upper inode. Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in the index inode.
+ *
+ * For the last, lower cover up case, we simplify things by preceding the
+ * whiteout or cover up with copy up. This makes sure that there is an index
+ * upper inode where the nlink xattr can be stored before the copied up upper
+ * entry is unlink.
+ */
+#define OVL_NLINK_ADD_UPPER (1 << 0)
+
+/*
+ * On-disk format for indexed nlink:
+ *
+ * nlink relative to the upper inode - "U[+-]NUM"
+ * nlink relative to the lower inode - "L[+-]NUM"
+ */
+
+static int ovl_set_nlink_common(struct dentry *dentry,
+ struct dentry *realdentry, const char *format)
+{
+ struct inode *inode = d_inode(dentry);
+ struct inode *realinode = d_inode(realdentry);
+ char buf[13];
+ int len;
+
+ len = snprintf(buf, sizeof(buf), format,
+ (int) (inode->i_nlink - realinode->i_nlink));
+
+ return ovl_do_setxattr(ovl_dentry_upper(dentry),
+ OVL_XATTR_NLINK, buf, len, 0);
+}
+
+int ovl_set_nlink_upper(struct dentry *dentry)
+{
+ return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
+}
+
+int ovl_set_nlink_lower(struct dentry *dentry)
+{
+ return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
+}
+
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+ struct dentry *upperdentry,
+ unsigned int fallback)
+{
+ int nlink_diff;
+ int nlink;
+ char buf[13];
+ int err;
+
+ if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
+ return fallback;
+
+ err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);