summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-02-04 11:45:21 +0000
committerLinus Torvalds <torvalds@linux-foundation.org>2020-02-04 11:45:21 +0000
commit7f879e1a94ac99586abf0659c03f35c1e48279c4 (patch)
treecf11e7944a7836bb6121f0fa4f3ba47d20d7eafb /fs
parenta45ad71e8995eed2b95c6ef0f4c442da0c4f6677 (diff)
parenta4ac9d45c0cd14a2adc872186431c79804b77dbf (diff)
Merge tag 'ovl-update-5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs update from Miklos Szeredi: - Try to preserve holes in sparse files when copying up, thus saving disk space and improving performance. - Fix a performance regression introduced in v4.19 by preserving asynchronicity of IO when fowarding to underlying layers. Add VFS helpers to submit async iocbs. - Fix a regression in lseek(2) introduced in v4.19 that breaks >2G seeks on 32bit kernels. - Fix a corner case where st_ino/st_dev was not preserved across copy up. - Miscellaneous fixes and cleanups. * tag 'ovl-update-5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: ovl: fix lseek overflow on 32bit ovl: add splice file read write helper ovl: implement async IO routines vfs: add vfs_iocb_iter_[read|write] helper functions ovl: layer is const ovl: fix corner case of non-constant st_dev;st_ino ovl: fix corner case of conflicting lower layer uuid ovl: generalize the lower_fs[] array ovl: simplify ovl_same_sb() helper ovl: generalize the lower_layers[] array ovl: improving copy-up efficiency for big sparse file ovl: use ovl_inode_lock in ovl_llseek() ovl: use pr_fmt auto generate prefix ovl: fix wrong WARN_ON() in ovl_cache_update_ino()
Diffstat (limited to 'fs')
-rw-r--r--fs/overlayfs/copy_up.c43
-rw-r--r--fs/overlayfs/dir.c10
-rw-r--r--fs/overlayfs/export.c28
-rw-r--r--fs/overlayfs/file.c162
-rw-r--r--fs/overlayfs/inode.c66
-rw-r--r--fs/overlayfs/namei.c38
-rw-r--r--fs/overlayfs/overlayfs.h24
-rw-r--r--fs/overlayfs/ovl_entry.h23
-rw-r--r--fs/overlayfs/readdir.c22
-rw-r--r--fs/overlayfs/super.c233
-rw-r--r--fs/overlayfs/util.c28
-rw-r--r--fs/read_write.c56
12 files changed, 505 insertions, 228 deletions
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 6220642fe113..9fc47c2e078d 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -24,7 +24,7 @@
static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
{
- pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
+ pr_warn("\"check_copy_up\" module option is obsolete\n");
return 0;
}
@@ -123,6 +123,9 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
loff_t old_pos = 0;
loff_t new_pos = 0;
loff_t cloned;
+ loff_t data_pos = -1;
+ loff_t hole_len;
+ bool skip_hole = false;
int error = 0;
if (len == 0)
@@ -144,7 +147,11 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
goto out;
/* Couldn't clone, so now we try to copy the data */
- /* FIXME: copy up sparse files efficiently */
+ /* Check if lower fs supports seek operation */
+ if (old_file->f_mode & FMODE_LSEEK &&
+ old_file->f_op->llseek)
+ skip_hole = true;
+
while (len) {
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
long bytes;
@@ -157,6 +164,36 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
break;
}
+ /*
+ * Fill zero for hole will cost unnecessary disk space
+ * and meanwhile slow down the copy-up speed, so we do
+ * an optimization for hole during copy-up, it relies
+ * on SEEK_DATA implementation in lower fs so if lower
+ * fs does not support it, copy-up will behave as before.
+ *
+ * Detail logic of hole detection as below:
+ * When we detect next data position is larger than current
+ * position we will skip that hole, otherwise we copy
+ * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
+ * it may not recognize all kind of holes and sometimes
+ * only skips partial of hole area. However, it will be
+ * enough for most of the use cases.
+ */
+
+ if (skip_hole && data_pos < old_pos) {
+ data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
+ if (data_pos > old_pos) {
+ hole_len = data_pos - old_pos;
+ len -= hole_len;
+ old_pos = new_pos = data_pos;
+ continue;
+ } else if (data_pos == -ENXIO) {
+ break;
+ } else if (data_pos < 0) {
+ skip_hole = false;
+ }
+ }
+
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
this_len, SPLICE_F_MOVE);
@@ -480,7 +517,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
}
inode_lock(temp->d_inode);
- if (c->metacopy)
+ if (S_ISREG(c->stat.mode))
err = ovl_set_size(temp, &c->stat);
if (!err)
err = ovl_set_attr(temp, &c->stat);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 29abdb1d3b5c..8e57d5372b8f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -35,7 +35,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
dput(wdentry);
if (err) {
- pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
+ pr_err("cleanup of '%pd2' failed (%i)\n",
wdentry, err);
}
@@ -53,7 +53,7 @@ static struct dentry *ovl_lookup_temp(struct dentry *workdir)
temp = lookup_one_len(name, workdir, strlen(name));
if (!IS_ERR(temp) && temp->d_inode) {
- pr_err("overlayfs: workdir/%s already exists\n", name);
+ pr_err("workdir/%s already exists\n", name);
dput(temp);
temp = ERR_PTR(-EIO);
}
@@ -134,7 +134,7 @@ static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
dentry->d_name.len);
if (IS_ERR(d)) {
- pr_warn("overlayfs: failed lookup after mkdir (%pd2, err=%i).\n",
+ pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
dentry, err);
return PTR_ERR(d);
}
@@ -267,7 +267,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
d_instantiate(dentry, inode);
if (inode != oip.newinode) {
- pr_warn_ratelimited("overlayfs: newly created inode found in cache (%pd2)\n",
+ pr_warn_ratelimited("newly created inode found in cache (%pd2)\n",
dentry);
}
@@ -1009,7 +1009,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
spin_unlock(&dentry->d_lock);
} else {
kfree(redirect);
- pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
+ pr_warn_ratelimited("failed to set redirect (%i)\n",
err);
/* Fall back to userspace copy-up */
err = -EXDEV;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 70e55588aedc..6f54d70cef27 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -30,7 +30,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry)
}
if (err) {
- pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
dentry, err);
}
@@ -244,7 +244,7 @@ out:
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
+ pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
dentry, err, buflen, fh ? (int)fh->fb.len : 0,
fh ? fh->fb.type : 0);
goto out;
@@ -358,7 +358,7 @@ static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
*/
static struct dentry *ovl_lookup_real_one(struct dentry *connected,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct inode *dir = d_inode(connected);
struct dentry *this, *parent = NULL;
@@ -406,7 +406,7 @@ out:
return this;
fail:
- pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
this = ERR_PTR(err);
goto out;
@@ -414,17 +414,16 @@ fail:
static struct dentry *ovl_lookup_real(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer);
+ const struct ovl_layer *layer);
/*
* Lookup an indexed or hashed overlay dentry by real inode.
*/
static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct ovl_fs *ofs = sb->s_fs_info;
- struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
struct dentry *index = NULL;
struct dentry *this = NULL;
struct inode *inode;
@@ -466,7 +465,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
* recursive call walks back from indexed upper to the topmost
* connected/hashed upper parent (or up to root).
*/
- this = ovl_lookup_real(sb, upper, &upper_layer);
+ this = ovl_lookup_real(sb, upper, &ofs->layers[0]);
dput(upper);
}
@@ -487,7 +486,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
*/
static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct dentry *next, *parent = NULL;
struct dentry *ancestor = ERR_PTR(-EIO);
@@ -540,7 +539,7 @@ static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
*/
static struct dentry *ovl_lookup_real(struct super_block *sb,
struct dentry *real,
- struct ovl_layer *layer)
+ const struct ovl_layer *layer)
{
struct dentry *connected;
int err = 0;
@@ -631,7 +630,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
return connected;
fail:
- pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
real, layer->idx, connected, err);
dput(connected);
return ERR_PTR(err);
@@ -646,8 +645,7 @@ static struct dentry *ovl_get_dentry(struct super_block *sb,
struct dentry *index)
{
struct ovl_fs *ofs = sb->s_fs_info;
- struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
- struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
+ const struct ovl_layer *layer = upper ? &ofs->layers[0] : lowerpath->layer;
struct dentry *real = upper ?: (index ?: lowerpath->dentry);
/*
@@ -822,7 +820,7 @@ out:
return dentry;
out_err:
- pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
+ pr_warn_ratelimited("failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
fh_len, fh_type, flags, err);
dentry = ERR_PTR(err);
goto out;
@@ -831,7 +829,7 @@ out_err:
static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
- pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
+ pr_warn_ratelimited("connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
return ERR_PTR(-EACCES);
}
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index e235a635d9ec..a5317216de73 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -9,8 +9,19 @@
#include <linux/xattr.h>
#include <linux/uio.h>
#include <linux/uaccess.h>
+#include <linux/splice.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
#include "overlayfs.h"
+struct ovl_aio_req {
+ struct kiocb iocb;
+ struct kiocb *orig_iocb;
+ struct fd fd;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{
if (realinode != ovl_inode_upper(inode))
@@ -146,7 +157,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file_inode(file);
struct fd real;
const struct cred *old_cred;
- ssize_t ret;
+ loff_t ret;
/*
* The two special cases below do not need to involve real fs,
@@ -171,7 +182,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
* limitations that are more strict than ->s_maxbytes for specific
* files, so we use the real file to perform seeks.
*/
- inode_lock(inode);
+ ovl_inode_lock(inode);
real.file->f_pos = file->f_pos;
old_cred = ovl_override_creds(inode->i_sb);
@@ -179,7 +190,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
revert_creds(old_cred);
file->f_pos = real.file->f_pos;
- inode_unlock(inode);
+ ovl_inode_unlock(inode);
fdput(real);
@@ -225,6 +236,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
return flags;
}
+static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+{
+ struct kiocb *iocb = &aio_req->iocb;
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ if (iocb->ki_flags & IOCB_WRITE) {
+ struct inode *inode = file_inode(orig_iocb->ki_filp);
+
+ file_end_write(iocb->ki_filp);
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ }
+
+ orig_iocb->ki_pos = iocb->ki_pos;
+ fdput(aio_req->fd);
+ kmem_cache_free(ovl_aio_request_cachep, aio_req);
+}
+
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+ struct ovl_aio_req *aio_req = container_of(iocb,
+ struct ovl_aio_req, iocb);
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ ovl_aio_cleanup_handler(aio_req);
+ orig_iocb->ki_complete(orig_iocb, res, res2);
+}
+
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
@@ -240,10 +278,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ if (is_sync_kiocb(iocb)) {
+ ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ } else {
+ struct ovl_aio_req *aio_req;
+
+ ret = -ENOMEM;
+ aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+ if (!aio_req)
+ goto out;
+
+ aio_req->fd = real;
+ real.flags = 0;
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+out:
revert_creds(old_cred);
-
ovl_file_accessed(file);
fdput(real);
@@ -274,15 +330,33 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_unlock;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
- file_start_write(real.file);
- ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
- file_end_write(real.file);
+ if (is_sync_kiocb(iocb)) {
+ file_start_write(real.file);
+ ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ file_end_write(real.file);
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ } else {
+ struct ovl_aio_req *aio_req;
+
+ ret = -ENOMEM;
+ aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+ if (!aio_req)
+ goto out;
+
+ file_start_write(real.file);
+ aio_req->fd = real;
+ real.flags = 0;
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+out:
revert_creds(old_cred);
-
- /* Update size */
- ovl_copyattr(ovl_inode_real(inode), inode);
-
fdput(real);
out_unlock:
@@ -291,6 +365,48 @@ out_unlock:
return ret;
}
+static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ ssize_t ret;
+ struct fd real;
+ const struct cred *old_cred;
+
+ ret = ovl_real_fdget(in, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(in)->i_sb);
+ ret = generic_file_splice_read(real.file, ppos, pipe, len, flags);
+ revert_creds(old_cred);
+
+ ovl_file_accessed(in);
+ fdput(real);
+ return ret;
+}
+
+static ssize_t
+ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ ret = ovl_real_fdget(out, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(out)->i_sb);
+ ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
+ revert_creds(old_cred);
+
+ ovl_file_accessed(out);
+ fdput(real);
+ return ret;
+}
+
static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct fd real;
@@ -647,7 +763,25 @@ const struct file_operations ovl_file_operations = {
.fadvise = ovl_fadvise,
.unlocked_ioctl = ovl_ioctl,
.compat_ioctl = ovl_compat_ioctl,
+ .splice_read = ovl_splice_read,
+ .splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
};
+
+int __init ovl_aio_request_cache_init(void)
+{
+ ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+ sizeof(struct ovl_aio_req),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ovl_aio_request_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ovl_aio_request_cache_destroy(void)
+{
+ kmem_cache_destroy(ovl_aio_request_cachep);
+}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b045cf1826fc..79e8994e3bc1 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -75,10 +75,9 @@ out:
return err;
}
-static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
- struct ovl_layer *lower_layer)
+static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
{
- bool samefs = ovl_same_sb(dentry->d_sb);
+ bool samefs = ovl_same_fs(dentry->d_sb);
unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
if (samefs) {
@@ -100,12 +99,10 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
* persistent for a given layer configuration.
*/
if (stat->ino >> shift) {
- pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+ pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
dentry, stat->ino, xinobits);
} else {
- if (lower_layer)
- stat->ino |= ((u64)lower_layer->fsid) << shift;
-
+ stat->ino |= ((u64)fsid) << shift;
stat->dev = dentry->d_sb->s_dev;
return 0;
}
@@ -124,15 +121,14 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
*/
stat->dev = dentry->d_sb->s_dev;
stat->ino = dentry->d_inode->i_ino;
- } else if (lower_layer && lower_layer->fsid) {
+ } else {
/*
* For non-samefs setup, if we cannot map all layers st_ino
* to a unified address space, we need to make sure that st_dev
- * is unique per lower fs. Upper layer uses real st_dev and
- * lower layers use the unique anonymous bdev assigned to the
- * lower fs.
+ * is unique per underlying fs, so we use the unique anonymous
+ * bdev assigned to the underlying fs.
*/
- stat->dev = lower_layer->fs->pseudo_dev;
+ stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev;
}
return 0;
@@ -146,8 +142,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
struct path realpath;
const struct cred *old_cred;
bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
- bool samefs = ovl_same_sb(dentry->d_sb);
- struct ovl_layer *lower_layer = NULL;
+ int fsid = 0;
int err;
bool metacopy_blocks = false;
@@ -168,9 +163,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
* If lower filesystem supports NFS file handles, this also guaranties
* persistent st_ino across mount cycle.
*/
- if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
+ if (!is_dir || ovl_same_dev(dentry->d_sb)) {
if (!OVL_TYPE_UPPER(type)) {
- lower_layer = ovl_layer_lower(dentry);
+ fsid = ovl_layer_lower(dentry)->fsid;
} else if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
u32 lowermask = STATX_INO | STATX_BLOCKS |
@@ -200,14 +195,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
(!ovl_verify_lower(dentry->d_sb) &&
(is_dir || lowerstat.nlink == 1))) {
- lower_layer = ovl_layer_lower(dentry);
- /*
- * Cannot use origin st_dev;st_ino because
- * origin inode content may differ from overlay
- * inode content.
- */
- if (samefs || lower_layer->fsid)
- stat->ino = lowerstat.ino;
+ fsid = ovl_layer_lower(dentry)->fsid;
+ stat->ino = lowerstat.ino;
}
/*
@@ -241,7 +230,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
}
}
- err = ovl_map_dev_ino(dentry, stat, lower_layer);
+ err = ovl_map_dev_ino(dentry, stat, fsid);
if (err)
goto out;
@@ -527,6 +516,27 @@ static const struct address_space_operations ovl_aops = {
* [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2)
* [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
* [...] &type->i_mutex_dir_key (stack_depth=0)
+ *
+ * Locking order w.r.t ovl_want_write() is important for nested overlayfs.
+ *
+ * This chain is valid:
+ * - inode->i_rwsem (inode_lock[2])
+ * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0])
+ * - OVL_I(inode)->lock (ovl_inode_lock[2])
+ * - OVL_I(lowerinode)->lock (ovl_inode_lock[1])
+ *
+ * And this chain is valid:
+ * - inode->i_rwsem (inode_lock[2])
+ * - OVL_I(inode)->lock (ovl_inode_lock[2])
+ * - lowerinode->i_rwsem (inode_lock[1])
+ * - OVL_I(lowerinode)->lock (ovl_inode_lock[1])
+ *
+ * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is
+ * held, because it is in reverse order of the non-nested case using the same
+ * upper fs:
+ * - inode->i_rwsem (inode_lock[1])
+ * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0])
+ * - OVL_I(inode)->lock (ovl_inode_lock[1])
*/
#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
@@ -565,7 +575,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
* ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
* upper inode i_ino on ovl_inode_init() or ovl_inode_update().
*/
- if (ovl_same_sb(inode->i_sb) || xinobits) {
+ if (ovl_same_dev(inode->i_sb)) {
inode->i_ino = ino;
if (xinobits && fsid && !(ino >> (64 - xinobits)))
inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
@@ -698,7 +708,7 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry,
return nlink;
fail:
- pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+ pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n",
upperdentry, err);
return fallback;
}
@@ -969,7 +979,7 @@ out:
return inode;
out_err:
- pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err);
+ pr_warn_ratelimited("failed to get inode (%i)\n", err);
inode = ERR_PTR(err);
goto out;
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 76ff66339173..ed9e129fae04 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -141,10 +141,10 @@ out:
return NULL;
fail:
- pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+ pr_warn_ratelimited("failed to get origin (%i)\n", res);
goto out;
invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+ pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
goto out;
}
@@ -322,16 +322,16 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *origin = NULL;
int i;
- for (i = 0; i < ofs->numlower; i++) {
+ for (i = 1; i < ofs->numlayer; i++) {
/*
* If lower fs uuid is not unique among lower fs we cannot match
* fh->uuid to layer.
*/
- if (ofs->lower_layers[i].fsid &&
- ofs->lower_layers[i].fs->bad_uuid)
+ if (ofs->layers[i].fsid &&
+ ofs->layers[i].fs->bad_uuid)
continue;
- origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
+ origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
connected);
if (origin)
break;
@@ -354,13 +354,13 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
}
**stackp = (struct ovl_path){
.dentry = origin,
- .layer = &ofs->lower_layers[i]
+ .layer = &ofs->layers[i]
};
return 0;
invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+ pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
dput(origin);
@@ -449,7 +449,7 @@ out:
fail:
inode = d_inode(real);
- pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+ pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
is_upper ? "upper" : "origin", real,
inode ? inode->i_ino : 0, err);
goto out;
@@ -475,7 +475,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
return upper ?: ERR_PTR(-ESTALE);
if (!d_is_dir(upper)) {
- pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
+ pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
index, upper);
dput(upper);
return ERR_PTR(-EIO);
@@ -589,12 +589,12 @@ out:
return err;
fail:
- pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
+ pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
index, d_inode(index)->i_mode & S_IFMT, err);
goto out;
orphan:
- pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+ pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
index, d_inode(index)->i_mode & S_IFMT,
d_inode(index)->i_nlink);
err = -ENOENT;
@@ -696,7 +696,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
index = NULL;
goto out;
}
- pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
+ pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
err);
@@ -723,13 +723,13 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
* unlinked, which means that finding a lower origin on lookup
* whose index is a whiteout should be treated as an error.
*/
- pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+ pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
index, d_inode(index)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
goto fail;
} else if (is_dir && verify) {
if (!upper) {
- pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+ pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
origin, index);
goto fail;
}
@@ -738,7 +738,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
err = ovl_verify_upper(index, upper, false);
if (err) {
if (err == -ESTALE) {
- pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+ pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
upper, origin, index);
}
goto fail;
@@ -885,7 +885,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (!d.stop && poe->numlower) {
err = -ENOMEM;
- stack = kcalloc(ofs->numlower, sizeof(struct ovl_path),
+ stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
GFP_KERNEL);
if (!stack)
goto out_put_upper;
@@ -967,7 +967,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
*/
err = -EPERM;
if (d.redirect && !ofs->config.redirect_follow) {
- pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
+ pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
dentry);
goto out_put;
}
@@ -994,7 +994,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
err = -EPERM;
if (!ofs->config.metacopy) {
- pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
+ pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n",
dentry);
goto out_put;
}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f283b1d69a9e..3623d28aa4fa 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,9 @@
#include <linux/fs.h>
#include "ovl_entry.h"
+#undef pr_fmt
+#define pr_fmt(fmt) "overlayfs: " fmt
+
enum ovl_path_type {
__OVL_PATH_UPPER = (1 << 0),
__OVL_PATH_MERGE = (1 << 1),
@@ -221,7 +224,6 @@ int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
-struct super_block *ovl_same_sb(struct super_block *sb);
int ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb);
bool ovl_index_all(struct super_block *sb);
@@ -237,7 +239,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
-struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
+const struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct dentry *ovl_i_dentry_upper(struct inode *inode);
struct inode *ovl_inode_upper(struct inode *inode);
@@ -299,11 +301,21 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
}
-static inline unsigned int ovl_xino_bits(struct super_block *sb)
+/* All layers on same fs? */
+static inline bool ovl_same_fs(struct super_block *sb)
+{
+ return OVL_FS(sb)->xino_mode == 0;
+}
+
+/* All overlay inodes have same st_dev? */
+static inline bool ovl_same_dev(struct super_block *sb)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ return OVL_FS(sb)->xino_mode >= 0;
+}
- return ofs->xino_bits;
+static inline unsigned int ovl_xino_bits(struct super_block *sb)
+{
+ return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0;
}
static inline int ovl_inode_lock(struct inode *inode)
@@ -438,6 +450,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
/* file.c */
extern const struct file_operations ovl_file_operations;
+int __init ovl_aio_request_cache_init(void);
+void ovl_aio_request_cache_destroy(void);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 28348c44ea5b..89015ea822e7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -24,6 +24,8 @@ struct ovl_sb {
dev_t pseudo_dev;
/* Unusable (conflicting) uuid */
bool bad_uuid;
+ /* Used as a lower layer (but maybe also as upper) */
+ bool is_lower;
};
struct ovl_layer {
@@ -38,18 +40,18 @@ struct ovl_layer {
};
struct ovl_path {
- struct ovl_layer *layer;
+ const struct ovl_layer *layer;
struct dentry *dentry;
};
/* private information held for overlayfs's superblock */
struct ovl_fs {
struct vfsmount *upper_mnt;
- unsigned int numlower;
- /* Number of unique lower sb that differ from upper sb */
- unsigned int numlowerfs;
- struct ovl_layer *lower_layers;
- struct ovl_sb *lower_fs;
+ unsigned int numlayer;
+ /* Number of unique fs among layers including upper fs */
+ unsigned int numfs;
+ const struct ovl_layer *layers;
+ struct ovl_sb *fs;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
/* workdir is the 'work' directory under workbasedir */
@@ -71,10 +73,15 @@ struct ovl_fs {
struct inode *workbasedir_trap;
struct inode *workdir_trap;
struct inode *indexdir_trap;
- /* Inode numbers in all layers do not use the high xino_bits */
- unsigned int xino_bits;
+ /* -1: disabled, 0: same fs, 1..32: number of unused ino bits */
+ int xino_mode;
};
+static inline struct ovl_fs *OVL_FS(struct super_block *sb)
+{
+ return (struct ovl_fs *)sb->s_fs_info;
+}
+
/* private information held for every overlayfs dentry */
struct ovl_entry {
union {
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 47a91c9733a5..40ac9ce2465a 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -441,7 +441,7 @@ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
const char *name, int namelen)
{
if (ino >> (64 - xinobits)) {
- pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",