From e7f52429b4a5b2e3224948d1737eb264c8f7e15f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 17 Jan 2017 06:34:53 +0200 Subject: ovl: check if upperdir fs supports O_TMPFILE This is needed for choosing between concurrent copyup using O_TMPFILE and legacy copyup using workdir+rename. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 9 +++++++++ fs/overlayfs/ovl_entry.h | 1 + fs/overlayfs/super.c | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 8af450b0e57a..3822a909ce1f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -127,6 +127,15 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) return err; } +static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) +{ + struct dentry *ret = vfs_tmpfile(dentry, mode, 0); + int err = IS_ERR(ret) ? PTR_ERR(ret) : 0; + + pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); + return ret; +} + static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper) { unsigned long x = (unsigned long) READ_ONCE(inode->i_private); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index d14bca1850d9..65f240001aa6 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -27,6 +27,7 @@ struct ovl_fs { struct ovl_config config; /* creds of process who forced instantiation of super block */ const struct cred *creator_cred; + bool tmpfile; }; /* private information held for every overlayfs dentry */ diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 20f48abbb82f..ff05065b510f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -825,6 +825,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) * creation of workdir in previous step. */ if (ufs->workdir) { + struct dentry *temp; + err = ovl_check_d_type_supported(&workpath); if (err < 0) goto out_put_workdir; @@ -836,6 +838,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) */ if (!err) pr_warn("overlayfs: upper fs needs to support d_type.\n"); + + /* Check if upper/work fs supports O_TMPFILE */ + temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); + ufs->tmpfile = !IS_ERR(temp); + if (ufs->tmpfile) + dput(temp); + else + pr_warn("overlayfs: upper fs does not support tmpfile.\n"); } } -- cgit v1.2.3 From 42f269b925405d9dd45014823e5057786d6ca452 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 17 Jan 2017 06:34:54 +0200 Subject: ovl: rearrange code in ovl_copy_up_locked() As preparation to implementing copy up with O_TMPFILE, name the variable for dentry before final rename 'temp' and assign it to 'newdentry' only after rename. Also lookup upper dentry before looking up temp dentry and move ovl_set_timestamps() into ovl_copy_up_locked(), because that is going to be more convenient for upcoming change. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index f57043dace62..01e332725e94 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -232,12 +232,14 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, struct dentry *dentry, struct path *lowerpath, - struct kstat *stat, const char *link) + struct kstat *stat, const char *link, + struct kstat *pstat) { struct inode *wdir = workdir->d_inode; struct inode *udir = upperdir->d_inode; struct dentry *newdentry = NULL; struct dentry *upper = NULL; + struct dentry *temp = NULL; int err; const struct cred *old_creds = NULL; struct cred *new_creds = NULL; @@ -248,25 +250,25 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, .link = link }; - newdentry = ovl_lookup_temp(workdir, dentry); - err = PTR_ERR(newdentry); - if (IS_ERR(newdentry)) - goto out; - upper = lookup_one_len(dentry->d_name.name, upperdir, dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) - goto out1; + goto out; err = security_inode_copy_up(dentry, &new_creds); if (err < 0) - goto out2; + goto out1; if (new_creds) old_creds = override_creds(new_creds); - err = ovl_create_real(wdir, newdentry, &cattr, NULL, true); + temp = ovl_lookup_temp(workdir, dentry); + err = PTR_ERR(temp); + if (IS_ERR(temp)) + goto out1; + + err = ovl_create_real(wdir, temp, &cattr, NULL, true); if (new_creds) { revert_creds(old_creds); @@ -281,39 +283,42 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, ovl_path_upper(dentry, &upperpath); BUG_ON(upperpath.dentry != NULL); - upperpath.dentry = newdentry; + upperpath.dentry = temp; err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); if (err) goto out_cleanup; } - err = ovl_copy_xattr(lowerpath->dentry, newdentry); + err = ovl_copy_xattr(lowerpath->dentry, temp); if (err) goto out_cleanup; - inode_lock(newdentry->d_inode); - err = ovl_set_attr(newdentry, stat); - inode_unlock(newdentry->d_inode); + inode_lock(temp->d_inode); + err = ovl_set_attr(temp, stat); + inode_unlock(temp->d_inode); if (err) goto out_cleanup; - err = ovl_do_rename(wdir, newdentry, udir, upper, 0); + err = ovl_do_rename(wdir, temp, udir, upper, 0); if (err) goto out_cleanup; + newdentry = dget(temp); ovl_dentry_update(dentry, newdentry); ovl_inode_update(d_inode(dentry), d_inode(newdentry)); - newdentry = NULL; + + /* Restore timestamps on parent (best effort) */ + ovl_set_timestamps(upperdir, pstat); out2: - dput(upper); + dput(temp); out1: - dput(newdentry); + dput(upper); out: return err; out_cleanup: - ovl_cleanup(wdir, newdentry); + ovl_cleanup(wdir, temp); goto out2; } @@ -368,11 +373,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, - stat, link); - if (!err) { - /* Restore timestamps on parent (best effort) */ - ovl_set_timestamps(upperdir, &pstat); - } + stat, link, &pstat); out_unlock: unlock_rename(workdir, upperdir); do_delayed_call(&done); -- cgit v1.2.3 From d8514d8edb5b045cf7f708e14f888ce760d60f0b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 17 Jan 2017 06:34:55 +0200 Subject: ovl: copy up regular file using O_TMPFILE In preparation for concurrent copy up, implement copy up of regular file as O_TMPFILE that is linked to upperdir instead of a file in workdir that is moved to upperdir. Suggested-by: Al Viro Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 01e332725e94..6e39e90b5605 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -20,6 +20,7 @@ #include #include #include "overlayfs.h" +#include "ovl_entry.h" #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) @@ -233,7 +234,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, struct dentry *dentry, struct path *lowerpath, struct kstat *stat, const char *link, - struct kstat *pstat) + struct kstat *pstat, bool tmpfile) { struct inode *wdir = workdir->d_inode; struct inode *udir = upperdir->d_inode; @@ -263,12 +264,17 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (new_creds) old_creds = override_creds(new_creds); - temp = ovl_lookup_temp(workdir, dentry); + if (tmpfile) + temp = ovl_do_tmpfile(upperdir, stat->mode); + else + temp = ovl_lookup_temp(workdir, dentry); err = PTR_ERR(temp); if (IS_ERR(temp)) goto out1; - err = ovl_create_real(wdir, temp, &cattr, NULL, true); + err = 0; + if (!tmpfile) + err = ovl_create_real(wdir, temp, &cattr, NULL, true); if (new_creds) { revert_creds(old_creds); @@ -300,11 +306,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (err) goto out_cleanup; - err = ovl_do_rename(wdir, temp, udir, upper, 0); + if (tmpfile) + err = ovl_do_link(temp, udir, upper, true); + else + err = ovl_do_rename(wdir, temp, udir, upper, 0); if (err) goto out_cleanup; - newdentry = dget(temp); + newdentry = dget(tmpfile ? upper : temp); ovl_dentry_update(dentry, newdentry); ovl_inode_update(d_inode(dentry), d_inode(newdentry)); @@ -318,7 +327,8 @@ out: return err; out_cleanup: - ovl_cleanup(wdir, temp); + if (!tmpfile) + ovl_cleanup(wdir, temp); goto out2; } @@ -342,6 +352,9 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, struct dentry *lowerdentry = lowerpath->dentry; struct dentry *upperdir; const char *link = NULL; + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + /* Should we copyup with O_TMPFILE or with workdir? */ + bool tmpfile = S_ISREG(stat->mode) && ofs->tmpfile; if (WARN_ON(!workdir)) return -EROFS; @@ -373,7 +386,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, - stat, link, &pstat); + stat, link, &pstat, tmpfile); out_unlock: unlock_rename(workdir, upperdir); do_delayed_call(&done); -- cgit v1.2.3 From 39d3d60a54df05a1a32fa71159d7a26a530dee6c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 17 Jan 2017 06:34:56 +0200 Subject: ovl: introduce copy up waitqueue The overlay sb 'copyup_wq' and overlay inode 'copying' condition variable are about to replace the upper sb rename_lock, as finer grained synchronization objects for concurrent copy up. Suggested-by: Miklos Szeredi Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 2 ++ fs/overlayfs/ovl_entry.h | 2 ++ fs/overlayfs/super.c | 1 + fs/overlayfs/util.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 3822a909ce1f..741dc0b6931f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -178,6 +178,8 @@ void ovl_dentry_version_inc(struct dentry *dentry); u64 ovl_dentry_version_get(struct dentry *dentry); bool ovl_is_whiteout(struct dentry *dentry); struct file *ovl_path_open(struct path *path, int flags); +int ovl_copy_up_start(struct dentry *dentry); +void ovl_copy_up_end(struct dentry *dentry); /* namei.c */ int ovl_path_next(int idx, struct dentry *dentry, struct path *path); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 65f240001aa6..59614faa14c3 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -28,6 +28,7 @@ struct ovl_fs { /* creds of process who forced instantiation of super block */ const struct cred *creator_cred; bool tmpfile; + wait_queue_head_t copyup_wq; }; /* private information held for every overlayfs dentry */ @@ -39,6 +40,7 @@ struct ovl_entry { u64 version; const char *redirect; bool opaque; + bool copying; }; struct rcu_head rcu; }; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ff05065b510f..6792bb70b4ac 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -708,6 +708,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ufs) goto out; + init_waitqueue_head(&ufs->copyup_wq); ufs->config.redirect_dir = ovl_redirect_dir_def; err = ovl_parse_opt((char *) data, &ufs->config); if (err) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 952286f4826c..01157d6e8cfe 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -263,3 +263,33 @@ struct file *ovl_path_open(struct path *path, int flags) { return dentry_open(path, flags | O_NOATIME, current_cred()); } + +int ovl_copy_up_start(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_entry *oe = dentry->d_fsdata; + int err; + + spin_lock(&ofs->copyup_wq.lock); + err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying); + if (!err) { + if (oe->__upperdentry) + err = 1; /* Already copied up */ + else + oe->copying = true; + } + spin_unlock(&ofs->copyup_wq.lock); + + return err; +} + +void ovl_copy_up_end(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_entry *oe = dentry->d_fsdata; + + spin_lock(&ofs->copyup_wq.lock); + oe->copying = false; + wake_up_locked(&ofs->copyup_wq); + spin_unlock(&ofs->copyup_wq.lock); +} -- cgit v1.2.3 From 01ad3eb8a07385bc8849f0ee7c800e7c8bd7287e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 17 Jan 2017 06:34:57 +0200 Subject: ovl: concurrent copy up of regular files Now that copy up of regular file is done using O_TMPFILE, we don't need to hold rename_lock throughout copy up. Use the copy up waitqueue to synchronize concurrent copy up of the same file. Different regular files can be copied up concurrently. The upper dir inode_lock is taken instead of rename_lock, because it is needed for lookup and later for linking the temp file, but it is released while copying up data. Suggested-by: Al Viro Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 6e39e90b5605..48eb8812ac5b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -291,7 +291,16 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, BUG_ON(upperpath.dentry != NULL); upperpath.dentry = temp; - err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); + if (tmpfile) { + inode_unlock(udir); + err = ovl_copy_up_data(lowerpath, &upperpath, + stat->size); + inode_lock_nested(udir, I_MUTEX_PARENT); + } else { + err = ovl_copy_up_data(lowerpath, &upperpath, + stat->size); + } + if (err) goto out_cleanup; } @@ -353,8 +362,6 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, struct dentry *upperdir; const char *link = NULL; struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - /* Should we copyup with O_TMPFILE or with workdir? */ - bool tmpfile = S_ISREG(stat->mode) && ofs->tmpfile; if (WARN_ON(!workdir)) return -EROFS; @@ -374,6 +381,25 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return PTR_ERR(link); } + /* Should we copyup with O_TMPFILE or with workdir? */ + if (S_ISREG(stat->mode) && ofs->tmpfile) { + err = ovl_copy_up_start(dentry); + /* err < 0: interrupted, err > 0: raced with another copy-up */ + if (unlikely(err)) { + pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err); + if (err > 0) + err = 0; + goto out_done; + } + + inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT); + err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, + stat, link, &pstat, true); + inode_unlock(upperdir->d_inode); + ovl_copy_up_end(dentry); + goto out_done; + } + err = -EIO; if (lock_rename(workdir, upperdir) != NULL) { pr_err("overlayfs: failed to lock workdir+upperdir\n"); @@ -386,9 +412,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, - stat, link, &pstat, tmpfile); + stat, link, &pstat, false); out_unlock: unlock_rename(workdir, upperdir); +out_done: do_delayed_call(&done); return err; -- cgit v1.2.3 From e593b2bf513dd4d3fbfa0f435392eea2c7f776f0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 23 Jan 2017 14:32:21 +0200 Subject: ovl: properly implement sync_filesystem() overlayfs syncs all inode pages on sync_filesystem(), but it also needs to call s_op->sync_fs() of upper fs for metadata sync. This fixes correctness of syncfs(2) as demonstrated by following xfs specific test: xfs_sync_stats() { echo $1 echo -n "xfs_log_force = " grep log /proc/fs/xfs/stat | awk '{ print $5 }' } xfs_sync_stats "before touch" touch x xfs_sync_stats "after touch" xfs_io -c syncfs . xfs_sync_stats "after syncfs" xfs_io -c fsync x xfs_sync_stats "after fsync" xfs_io -c fsync x xfs_sync_stats "after fsync #2" When this test is run in overlay mount over xfs, log force count does not increase with syncfs command. Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 6792bb70b4ac..346f668e7df0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -160,6 +160,25 @@ static void ovl_put_super(struct super_block *sb) kfree(ufs); } +static int ovl_sync_fs(struct super_block *sb, int wait) +{ + struct ovl_fs *ufs = sb->s_fs_info; + struct super_block *upper_sb; + int ret; + + if (!ufs->upper_mnt) + return 0; + upper_sb = ufs->upper_mnt->mnt_sb; + if (!upper_sb->s_op->sync_fs) + return 0; + + /* real inodes have already been synced by sync_filesystem(ovl_sb) */ + down_read(&upper_sb->s_umount); + ret = upper_sb->s_op->sync_fs(upper_sb, wait); + up_read(&upper_sb->s_umount); + return ret; +} + /** * ovl_statfs * @sb: The overlayfs super block @@ -222,6 +241,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations ovl_super_operations = { .put_super = ovl_put_super, + .sync_fs = ovl_sync_fs, .statfs = ovl_statfs, .show_options = ovl_show_options, .remount_fs = ovl_remount, -- cgit v1.2.3 From 51f8f3c4e22535933ef9aecc00e9a6069e051b57 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 10 Jan 2017 21:30:21 +0300 Subject: ovl: drop CAP_SYS_RESOURCE from saved mounter's credentials If overlay was mounted by root then quota set for upper layer does not work because overlay now always use mounter's credentials for operations. Also overlay might deplete reserved space and inodes in ext4. This patch drops capability SYS_RESOURCE from saved credentials. This affects creation new files, whiteouts, and copy-up operations. Signed-off-by: Konstantin Khlebnikov Fixes: 1175b6b8d963 ("ovl: do operations on underlying file system in mounter's context") Cc: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 346f668e7df0..7b2188181367 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -721,6 +721,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) unsigned int stacklen = 0; unsigned int i; bool remote = false; + struct cred *cred; int err; err = -ENOMEM; @@ -901,10 +902,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else sb->s_d_op = &ovl_dentry_operations; - ufs->creator_cred = prepare_creds(); - if (!ufs->creator_cred) + ufs->creator_cred = cred = prepare_creds(); + if (!cred) goto out_put_lower_mnt; + /* Never override disk quota limits or use reserved space */ + cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); + err = -ENOMEM; oe = ovl_alloc_entry(numlower); if (!oe) -- cgit v1.2.3