summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 16:24:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 16:24:01 -0700
commit1d9d7cbf28a1c2f84f2a0224466f8eb5f0a62ace (patch)
tree35aa9ec8433f757073f21e1229e97d736b0c5593 /fs
parent2c45e7fbc962be1b03f2c2af817a76f5ba810af2 (diff)
parent00abf69dd24f4444d185982379c5cc3bb7b6d1fc (diff)
Merge tag 'ceph-for-5.2-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "On the filesystem side we have: - a fix to enforce quotas set above the mount point (Luis Henriques) - support for exporting snapshots through NFS (Zheng Yan) - proper statx implementation (Jeff Layton). statx flags are mapped to MDS caps, with AT_STATX_{DONT,FORCE}_SYNC taken into account. - some follow-up dentry name handling fixes, in particular elimination of our hand-rolled helper and the switch to __getname() as suggested by Al (Jeff Layton) - a set of MDS client cleanups in preparation for async MDS requests in the future (Jeff Layton) - a fix to sync the filesystem before remounting (Jeff Layton) On the rbd side, work is on-going on object-map and fast-diff image features" * tag 'ceph-for-5.2-rc1' of git://github.com/ceph/ceph-client: (29 commits) ceph: flush dirty inodes before proceeding with remount ceph: fix unaligned access in ceph_send_cap_releases libceph: make ceph_pr_addr take an struct ceph_entity_addr pointer libceph: fix unaligned accesses in ceph_entity_addr handling rbd: don't assert on writes to snapshots rbd: client_mutex is never nested ceph: print inode number in __caps_issued_mask debugging messages ceph: just call get_session in __ceph_lookup_mds_session ceph: simplify arguments and return semantics of try_get_cap_refs ceph: fix comment over ceph_drop_caps_for_unlink ceph: move wait for mds request into helper function ceph: have ceph_mdsc_do_request call ceph_mdsc_submit_request ceph: after an MDS request, do callback and completions ceph: use pathlen values returned by set_request_path_attr ceph: use __getname/__putname in ceph_mdsc_build_path ceph: use ceph_mdsc_build_path instead of clone_dentry_name ceph: fix potential use-after-free in ceph_mdsc_build_path ceph: dump granular cap info in "caps" debugfs file ceph: make iterate_session_caps a public symbol ceph: fix NULL pointer deref when debugging is enabled ...
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/caps.c93
-rw-r--r--fs/ceph/debugfs.c40
-rw-r--r--fs/ceph/export.c356
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c85
-rw-r--r--fs/ceph/locks.c13
-rw-r--r--fs/ceph/mds_client.c205
-rw-r--r--fs/ceph/mds_client.h33
-rw-r--r--fs/ceph/mdsmap.c2
-rw-r--r--fs/ceph/quota.c177
-rw-r--r--fs/ceph/super.c7
-rw-r--r--fs/ceph/super.h2
12 files changed, 751 insertions, 264 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 36a8dc699448..72f8e1311392 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -892,8 +892,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
int have = ci->i_snap_caps;
if ((have & mask) == mask) {
- dout("__ceph_caps_issued_mask %p snap issued %s"
- " (mask %s)\n", &ci->vfs_inode,
+ dout("__ceph_caps_issued_mask ino 0x%lx snap issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino,
ceph_cap_string(have),
ceph_cap_string(mask));
return 1;
@@ -904,8 +904,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
if (!__cap_is_valid(cap))
continue;
if ((cap->issued & mask) == mask) {
- dout("__ceph_caps_issued_mask %p cap %p issued %s"
- " (mask %s)\n", &ci->vfs_inode, cap,
+ dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino, cap,
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch)
@@ -916,8 +916,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
/* does a combination of caps satisfy mask? */
have |= cap->issued;
if ((have & mask) == mask) {
- dout("__ceph_caps_issued_mask %p combo issued %s"
- " (mask %s)\n", &ci->vfs_inode,
+ dout("__ceph_caps_issued_mask ino 0x%lx combo issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino,
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch) {
@@ -2257,8 +2257,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (datasync)
goto out;
- inode_lock(inode);
-
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -2273,7 +2271,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
}
- inode_unlock(inode);
out:
dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
return ret;
@@ -2528,9 +2525,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got,
* to (when applicable), and check against max_size here as well.
* Note that caller is responsible for ensuring max_size increases are
* requested from the MDS.
+ *
+ * Returns 0 if caps were not able to be acquired (yet), a 1 if they were,
+ * or a negative error code.
+ *
+ * FIXME: how does a 0 return differ from -EAGAIN?
*/
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
- loff_t endoff, bool nonblock, int *got, int *err)
+ loff_t endoff, bool nonblock, int *got)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
@@ -2550,8 +2552,7 @@ again:
if ((file_wanted & need) != need) {
dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
ceph_cap_string(need), ceph_cap_string(file_wanted));
- *err = -EBADF;
- ret = 1;
+ ret = -EBADF;
goto out_unlock;
}
@@ -2572,10 +2573,8 @@ again:
if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
inode, endoff, ci->i_max_size);
- if (endoff > ci->i_requested_max_size) {
- *err = -EAGAIN;
- ret = 1;
- }
+ if (endoff > ci->i_requested_max_size)
+ ret = -EAGAIN;
goto out_unlock;
}
/*
@@ -2610,8 +2609,7 @@ again:
* task isn't in TASK_RUNNING state
*/
if (nonblock) {
- *err = -EAGAIN;
- ret = 1;
+ ret = -EAGAIN;
goto out_unlock;
}
@@ -2640,8 +2638,7 @@ again:
if (session_readonly) {
dout("get_cap_refs %p needed %s but mds%d readonly\n",
inode, ceph_cap_string(need), ci->i_auth_cap->mds);
- *err = -EROFS;
- ret = 1;
+ ret = -EROFS;
goto out_unlock;
}
@@ -2650,16 +2647,14 @@ again:
if (READ_ONCE(mdsc->fsc->mount_state) ==
CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
- *err = -EIO;
- ret = 1;
+ ret = -EIO;
goto out_unlock;
}
mds_wanted = __ceph_caps_mds_wanted(ci, false);
if (need & ~(mds_wanted & need)) {
dout("get_cap_refs %p caps were dropped"
" (session killed?)\n", inode);
- *err = -ESTALE;
- ret = 1;
+ ret = -ESTALE;
goto out_unlock;
}
if (!(file_wanted & ~mds_wanted))
@@ -2710,7 +2705,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
bool nonblock, int *got)
{
- int ret, err = 0;
+ int ret;
BUG_ON(need & ~CEPH_CAP_FILE_RD);
BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
@@ -2718,15 +2713,8 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
if (ret < 0)
return ret;
- ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err);
- if (ret) {
- if (err == -EAGAIN) {
- ret = 0;
- } else if (err < 0) {
- ret = err;
- }
- }
- return ret;
+ ret = try_get_cap_refs(ci, need, want, 0, nonblock, got);
+ return ret == -EAGAIN ? 0 : ret;
}
/*
@@ -2737,7 +2725,7 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
loff_t endoff, int *got, struct page **pinned_page)
{
- int _got, ret, err = 0;
+ int _got, ret;
ret = ceph_pool_perm_check(ci, need);
if (ret < 0)
@@ -2747,21 +2735,19 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
if (endoff > 0)
check_max_size(&ci->vfs_inode, endoff);
- err = 0;
_got = 0;
ret = try_get_cap_refs(ci, need, want, endoff,
- false, &_got, &err);
- if (ret) {
- if (err == -EAGAIN)
- continue;
- if (err < 0)
- ret = err;
- } else {
+ false, &_got);
+ if (ret == -EAGAIN) {
+ continue;
+ } else if (!ret) {
+ int err;
+
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&ci->i_cap_wq, &wait);
- while (!try_get_cap_refs(ci, need, want, endoff,
- true, &_got, &err)) {
+ while (!(err = try_get_cap_refs(ci, need, want, endoff,
+ true, &_got))) {
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -2770,19 +2756,14 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
}
remove_wait_queue(&ci->i_cap_wq, &wait);
-
if (err == -EAGAIN)
continue;
- if (err < 0)
- ret = err;
}
- if (ret < 0) {
- if (err == -ESTALE) {
- /* session was killed, try renew caps */
- ret = ceph_renew_caps(&ci->vfs_inode);
- if (ret == 0)
- continue;
- }
+ if (ret == -ESTALE) {
+ /* session was killed, try renew caps */
+ ret = ceph_renew_caps(&ci->vfs_inode);
+ if (ret == 0)
+ continue;
return ret;
}
@@ -4099,7 +4080,7 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
}
/*
- * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
+ * For a soon-to-be unlinked file, drop the LINK caps. If it
* looks like the link count will hit 0, drop any other caps (other
* than PIN) we don't specifically want (due to the file still being
* open).
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 98365e74cb4a..b3fc5fe26a1a 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -37,7 +37,7 @@ static int mdsmap_show(struct seq_file *s, void *p)
struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
int state = mdsmap->m_info[i].state;
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
- ceph_pr_addr(&addr->in_addr),
+ ceph_pr_addr(addr),
ceph_mds_state_name(state));
}
return 0;
@@ -88,7 +88,7 @@ static int mdsc_show(struct seq_file *s, void *p)
req->r_dentry,
path ? path : "");
spin_unlock(&req->r_dentry->d_lock);
- kfree(path);
+ ceph_mdsc_free_path(path, pathlen);
} else if (req->r_path1) {
seq_printf(s, " #%llx/%s", req->r_ino1.ino,
req->r_path1);
@@ -108,7 +108,7 @@ static int mdsc_show(struct seq_file *s, void *p)
req->r_old_dentry,
path ? path : "");
spin_unlock(&req->r_old_dentry->d_lock);
- kfree(path);
+ ceph_mdsc_free_path(path, pathlen);
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
if (req->r_ino2.ino)
seq_printf(s, " #%llx/%s", req->r_ino2.ino,
@@ -124,18 +124,48 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0;
}
+static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
+{
+ struct seq_file *s = p;
+
+ seq_printf(s, "0x%-17lx%-17s%-17s\n", inode->i_ino,
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(cap->implemented));
+ return 0;
+}
+
static int caps_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
- int total, avail, used, reserved, min;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ int total, avail, used, reserved, min, i;
ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
seq_printf(s, "total\t\t%d\n"
"avail\t\t%d\n"
"used\t\t%d\n"
"reserved\t%d\n"
- "min\t%d\n",
+ "min\t\t%d\n\n",
total, avail, used, reserved, min);
+ seq_printf(s, "ino issued implemented\n");
+ seq_printf(s, "-----------------------------------------------\n");
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *session;
+
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
+ mutex_unlock(&mdsc->mutex);
+ mutex_lock(&session->s_mutex);
+ ceph_iterate_session_caps(session, caps_show_cb, s);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ mutex_lock(&mdsc->mutex);
+ }
+ mutex_unlock(&mdsc->mutex);
+
return 0;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 3c59ad180ef0..d3ef7ee429ec 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -22,18 +22,77 @@ struct ceph_nfs_confh {
u64 ino, parent_ino;
} __attribute__ ((packed));
+/*
+ * fh for snapped inode
+ */
+struct ceph_nfs_snapfh {
+ u64 ino;
+ u64 snapid;
+ u64 parent_ino;
+ u32 hash;
+} __attribute__ ((packed));
+
+static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
+ struct inode *parent_inode)
+{
+ const static int snap_handle_length =
+ sizeof(struct ceph_nfs_snapfh) >> 2;
+ struct ceph_nfs_snapfh *sfh = (void *)rawfh;
+ u64 snapid = ceph_snap(inode);
+ int ret;
+ bool no_parent = true;
+
+ if (*max_len < snap_handle_length) {
+ *max_len = snap_handle_length;
+ ret = FILEID_INVALID;
+ goto out;
+ }
+
+ ret = -EINVAL;
+ if (snapid != CEPH_SNAPDIR) {
+ struct inode *dir;
+ struct dentry *dentry = d_find_alias(inode);
+ if (!dentry)
+ goto out;
+
+ rcu_read_lock();
+ dir = d_inode_rcu(dentry->d_parent);
+ if (ceph_snap(dir) != CEPH_SNAPDIR) {
+ sfh->parent_ino = ceph_ino(dir);
+ sfh->hash = ceph_dentry_hash(dir, dentry);
+ no_parent = false;
+ }
+ rcu_read_unlock();
+ dput(dentry);
+ }
+
+ if (no_parent) {
+ if (!S_ISDIR(inode->i_mode))
+ goto out;
+ sfh->parent_ino = sfh->ino;
+ sfh->hash = 0;
+ }
+ sfh->ino = ceph_ino(inode);
+ sfh->snapid = snapid;
+
+ *max_len = snap_handle_length;
+ ret = FILEID_BTRFS_WITH_PARENT;
+out:
+ dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
+ return ret;
+}
+
static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
struct inode *parent_inode)
{
+ const static int handle_length =
+ sizeof(struct ceph_nfs_fh) >> 2;
+ const static int connected_handle_length =
+ sizeof(struct ceph_nfs_confh) >> 2;
int type;
- struct ceph_nfs_fh *fh = (void *)rawfh;
- struct ceph_nfs_confh *cfh = (void *)rawfh;
- int connected_handle_length = sizeof(*cfh)/4;
- int handle_length = sizeof(*fh)/4;
- /* don't re-export snaps */
if (ceph_snap(inode) != CEPH_NOSNAP)
- return -EINVAL;
+ return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
if (parent_inode && (*max_len < connected_handle_length)) {
*max_len = connected_handle_length;
@@ -44,6 +103,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
}
if (parent_inode) {
+ struct ceph_nfs_confh *cfh = (void *)rawfh;
dout("encode_fh %llx with parent %llx\n",
ceph_ino(inode), ceph_ino(parent_inode));
cfh->ino = ceph_ino(inode);
@@ -51,6 +111,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
*max_len = connected_handle_length;
type = FILEID_INO32_GEN_PARENT;
} else {
+ struct ceph_nfs_fh *fh = (void *)rawfh;
dout("encode_fh %llx\n", ceph_ino(inode));
fh->ino = ceph_ino(inode);
*max_len = handle_length;
@@ -59,7 +120,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
return type;
}
-static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
+static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
@@ -81,7 +142,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
mask = CEPH_STAT_CAP_INODE;
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
mask |= CEPH_CAP_XATTR_SHARED;
- req->r_args.getattr.mask = cpu_to_le32(mask);
+ req->r_args.lookupino.mask = cpu_to_le32(mask);
req->r_ino1 = vino;
req->r_num_caps = 1;
@@ -91,16 +152,114 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
ihold(inode);
ceph_mdsc_put_request(req);
if (!inode)
- return ERR_PTR(-ESTALE);
- if (inode->i_nlink == 0) {
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
+ return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
}
+ return inode;
+}
+
+struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
+{
+ struct inode *inode = __lookup_inode(sb, ino);
+ if (IS_ERR(inode))
+ return inode;
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ return inode;
+}
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
+{
+ struct inode *inode = __lookup_inode(sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
return d_obtain_alias(inode);
}
+static struct dentry *__snapfh_to_dentry(struct super_block *sb,
+ struct ceph_nfs_snapfh *sfh,
+ bool want_parent)
+{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+ struct ceph_mds_request *req;
+ struct inode *inode;
+ struct ceph_vino vino;
+ int mask;
+ int err;
+ bool unlinked = false;
+
+ if (want_parent) {
+ vino.ino = sfh->parent_ino;
+ if (sfh->snapid == CEPH_SNAPDIR)
+ vino.snap = CEPH_NOSNAP;
+ else if (sfh->ino == sfh->parent_ino)
+ vino.snap = CEPH_SNAPDIR;
+ else
+ vino.snap = sfh->snapid;
+ } else {
+ vino.ino = sfh->ino;
+ vino.snap = sfh->snapid;
+ }
+ inode = ceph_find_inode(sb, vino);
+ if (inode)
+ return d_obtain_alias(inode);
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+
+ mask = CEPH_STAT_CAP_INODE;
+ if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+ mask |= CEPH_CAP_XATTR_SHARED;
+ req->r_args.lookupino.mask = cpu_to_le32(mask);
+ if (vino.snap < CEPH_NOSNAP) {
+ req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
+ if (!want_parent && sfh->ino != sfh->parent_ino) {
+ req->r_args.lookupino.parent =
+ cpu_to_le64(sfh->parent_ino);
+ req->r_args.lookupino.hash =
+ cpu_to_le32(sfh->hash);
+ }
+ }
+
+ req->r_ino1 = vino;
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode) {
+ if (vino.snap == CEPH_SNAPDIR) {
+ if (inode->i_nlink == 0)
+ unlinked = true;
+ inode = ceph_get_snapdir(inode);
+ } else if (ceph_snap(inode) == vino.snap) {
+ ihold(inode);
+ } else {
+ /* mds does not support lookup snapped inode */
+ err = -EOPNOTSUPP;
+ inode = NULL;
+ }
+ }
+ ceph_mdsc_put_request(req);
+
+ if (want_parent) {
+ dout("snapfh_to_parent %llx.%llx\n err=%d\n",
+ vino.ino, vino.snap, err);
+ } else {
+ dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
+ vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
+ }
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+ /* see comments in ceph_get_parent() */
+ return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
+}
+
/*
* convert regular fh to dentry
*/
@@ -110,6 +269,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
{
struct ceph_nfs_fh *fh = (void *)fid->raw;
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+ struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
+ return __snapfh_to_dentry(sb, sfh, false);
+ }
+
if (fh_type != FILEID_INO32_GEN &&
fh_type != FILEID_INO32_GEN_PARENT)
return NULL;
@@ -163,13 +327,49 @@ static struct dentry *__get_parent(struct super_block *sb,
static struct dentry *ceph_get_parent(struct dentry *child)
{
- /* don't re-export snaps */
- if (ceph_snap(d_inode(child)) != CEPH_NOSNAP)
- return ERR_PTR(-EINVAL);
-
- dout("get_parent %p ino %llx.%llx\n",
- child, ceph_vinop(d_inode(child)));
- return __get_parent(child->d_sb, child, 0);
+ struct inode *inode = d_inode(child);
+ struct dentry *dn;
+
+ if (ceph_snap(inode) != CEPH_NOSNAP) {
+ struct inode* dir;
+ bool unlinked = false;
+ /* do not support non-directory */
+ if (!d_is_dir(child)) {
+ dn = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
+ if (IS_ERR(dir)) {
+ dn = ERR_CAST(dir);
+ goto out;
+ }
+ /* There can be multiple paths to access snapped inode.
+ * For simplicity, treat snapdir of head inode as parent */
+ if (ceph_snap(inode) != CEPH_SNAPDIR) {
+ struct inode *snapdir = ceph_get_snapdir(dir);
+ if (dir->i_nlink == 0)
+ unlinked = true;
+ iput(dir);
+ if (IS_ERR(snapdir)) {
+ dn = ERR_CAST(snapdir);
+ goto out;
+ }
+ dir = snapdir;
+ }
+ /* If directory has already been deleted, futher get_parent
+ * will fail. Do not mark snapdir dentry as disconnected,
+ * this prevent exportfs from doing futher get_parent. */
+ if (unlinked)
+ dn = d_obtain_root(dir);
+ else
+ dn = d_obtain_alias(dir);
+ } else {
+ dn = __get_parent(child->d_sb, child, 0);
+ }
+out:
+ dout("get_parent %p ino %llx.%llx err=%ld\n",
+ child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0));
+ return dn;
}
/*
@@ -182,6 +382,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
struct ceph_nfs_confh *cfh = (void *)fid->raw;
struct dentry *dentry;
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+ struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
+ return __snapfh_to_dentry(sb, sfh, true);
+ }
+
if (fh_type != FILEID_INO32_GEN_PARENT)
return NULL;
if (fh_len < sizeof(*cfh) / 4)
@@ -194,14 +399,115 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
return dentry;
}
+static int __get_snap_name(struct dentry *parent, char *name,
+ struct dentry *child)
+{
+ struct inode *inode = d_inode(child);
+ struct inode *dir = d_inode(parent);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_mds_request *req = NULL;
+ char *last_name = NULL;
+ unsigned next_offset = 2;
+ int err = -EINVAL;
+
+ if (ceph_ino(inode) != ceph_ino(dir))
+ goto out;
+ if (ceph_snap(inode) == CEPH_SNAPDIR) {
+ if (ceph_snap(dir) == CEPH_NOSNAP) {
+ strcpy(name, fsc->mount_options->snapdir_name);
+ err = 0;
+ }
+ goto out;
+ }
+ if (ceph_snap(dir) != CEPH_SNAPDIR)
+ goto out;
+
+ while (1) {
+ struct ceph_mds_reply_info_parsed *rinfo;
+ struct ceph_mds_reply_dir_entry *rde;
+ int i;
+
+ req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
+ USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ req = NULL;
+ goto out;
+ }
+ err = ceph_alloc_readdir_reply_buffer(req, inode);
+ if (err)
+ goto out;
+
+ req->r_direct_mode = USE_AUTH_MDS;
+ req->r_readdir_offset = next_offset;
+ req->r_args.readdir.flags =
+ cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
+ if (last_name) {
+ req->r_path2 = last_name;
+ last_name = NULL;
+ }
+
+ req->r_inode = dir;
+ ihold(dir);
+ req->r_dentry = dget(parent);
+
+ inode_lock(dir);
+ err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
+ inode_unlock(dir);
+
+ if (err < 0)
+ goto out;
+
+ rinfo = &req->r_reply_info;
+ for (i = 0; i < rinfo->dir_nr; i++) {
+ rde = rinfo->dir_entries + i;
+ BUG_ON(!rde->inode.in);
+ if (ceph_snap(inode) ==
+ le64_to_cpu(rde->inode.in->snapid)) {
+ memcpy(name, rde->name, rde->name_len);
+ name[rde->name_len] = '\0';
+ err = 0;
+ goto out;
+ }
+ }
+
+ if (rinfo->dir_end)
+ break;
+
+ BUG_ON(rinfo->dir_nr <= 0);
+ rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
+ next_offset += rinfo->dir_nr;
+ last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
+ if (!last_name) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ceph_mdsc_put_request(req);
+ req = NULL;
+ }
+ err = -ENOENT;
+out:
+ if (req)
+ ceph_mdsc_put_request(req);
+ kfree(last_name);
+ dout("get_snap_name %p ino %llx.%llx err=%d\n",
+ child, ceph_vinop(inode), err);
+ return err;
+}
+
static int ceph_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
struct ceph_mds_client *mdsc;
struct ceph_mds_request *req;
+ struct inode *inode = d_inode(child);
int err;
- mdsc = ceph_inode_to_client(d_inode(child))->mdsc;
+ if (ceph_snap(inode) != CEPH_NOSNAP)
+ return __get_snap_name(parent, name, child);
+
+ mdsc = ceph_inode_to_client(inode)->mdsc;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
USE_ANY_MDS);
if (IS_ERR(req))
@@ -209,8 +515,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
inode_lock(d_inode(parent));
- req->r_inode = d_inode(child);
- ihold(d_inode(child));
+ req->r_inode = inode;
+ ihold(inode);
req->r_ino2 = ceph_vino(d_inode(parent));
req->r_parent = d_inode(parent);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
@@ -224,10 +530,10 @@ static int ceph_get_name(struct dentry *parent, char *name,
memcpy(name, rinfo->dname, rinfo->dname_len);
name[rinfo->dname_len] = 0;
dout("get_name %p ino %llx.%llx name %s\n",
- child, ceph_vinop(d_inode(child)), name);
+ child, ceph_vinop(inode), name);
} else {
dout("get_name %p ino %llx.%llx err %d\n",
- child, ceph_vinop(d_inode(child)), err);
+ child, ceph_vinop(inode), err);
}
ceph_mdsc_put_request(req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 84725b53ac21..305daf043eb0 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -929,7 +929,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n",
(write ? "write" : "read"), file, pos, (unsigned)count,
- snapc, snapc->seq);
+ snapc, snapc ? snapc->seq : 0);
ret = filemap_write_and_wait_range(inode->i_mapping,
pos, pos + count - 1);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 35dae6d5493a..f85355bf49c4 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2266,43 +2266,72 @@ int ceph_permission(struct inode *inode, int mask)
return err;
}
+/* Craft a mask of needed caps given a set of requested statx attrs. */
+static int statx_to_caps(u32 want)
+{
+ int mask = 0;
+
+ if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME))
+ mask |= CEPH_CAP_AUTH_SHARED;
+
+ if (want & (STATX_NLINK|STATX_CTIME))
+ mask |= CEPH_CAP_LINK_SHARED;
+
+ if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
+ STATX_BLOCKS))
+ mask |= CEPH_CAP_FILE_SHARED;
+
+ if (want & (STATX_CTIME))
+ mask |= CEPH_CAP_XATTR_SHARED;
+
+ return mask;
+}
+
/*
- * Get all attributes. Hopefully somedata we'll have a statlite()
- * and can limit the fields we require to be accurate.
+ * Get all the attributes. If we have sufficient caps for the requested attrs,
+ * then we can avoid talking to the MDS at all.
*/
int ceph_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
- int err;
+ int err = 0;
- err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false);
- if (!err) {
- generic_fillattr(inode, stat);
- stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
- if (ceph_snap(inode) == CEPH_NOSNAP)
- stat->dev = inode->i_sb->s_dev;
+ /* Skip the getattr altogether if we're asked not to sync */
+ if (!(flags & AT_STATX_DONT_SYNC)) {
+ err = ceph_do_getattr(inode, statx_to_caps(request_mask),
+ flags & AT_STATX_FORCE_SYNC);
+ if (err)
+ return err;
+ }
+
+ generic_fillattr(inode, stat);
+ stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
+ if (ceph_snap(inode) == CEPH_NOSNAP)
+ stat->dev = inode->i_sb->s_dev;
+ else
+ stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
+
+ if (S_ISDIR(inode->i_mode)) {
+ if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
+ RBYTES))
+ stat->size = ci->i_rbytes;
else
- stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
-
- if (S_ISDIR(inode->i_mode)) {
- if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
- RBYTES))
- stat->size = ci->i_rbytes;
- else
- stat->size = ci->i_files + ci->i_subdirs;
- stat->blocks = 0;
- stat->blksize = 65536;
- /*
- * Some applications rely on the number of st_nlink
- * value on directories to be either 0 (if unlinked)
- * or 2 + number of subdirectories.
- */
- if (stat->nlink == 1)
- /* '.' + '..' + subdirs */
- stat->nlink = 1 + 1 + ci->i_subdirs;
- }
+ stat->size = ci->i_files + ci->i_subdirs;
+ stat->blocks = 0;
+ stat->blksize = 65536;
+ /*
+ * Some applications rely on the number of st_nlink
+ * value on directories to be either 0 (if unlinked)
+ * or 2 + number of subdirectories.
+ */
+ if (stat->nlink == 1)
+ /* '.' + '..' + subdirs */
+ stat->nlink = 1 + 1 + ci->i_subdirs;
}
+
+ /* Mask off any higher bits (e.g. btime) until we have support */
+ stat->result_mask = request_mask & STATX_BASIC_STATS;
return err;
}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 9dae2ec7e1fa..ac9b53b89365 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -237,15 +237,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
err = -EIO;
- } else if (op == CEPH_MDS_OP_SETFILELOCK) {
- /*
- * increasing i_filelock_ref closes race window between
- * handling request reply and adding file_lock struct to
- * inode. Otherwise, i_auth_cap may get trimmed in the
- * window. Caller function will decrease the counter.
- */
- fl->fl_ops = &ceph_fl_lock_ops;
- atomic_inc(&ci->i_filelock_ref);
}
spin_unlock(&ci->i_ceph_lock);
if (err < 0) {
@@ -299,10 +290,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
err = -EIO;
- } else {
- /* see comment in ceph_lock */
- fl->fl_ops = &ceph_fl_lock_ops;
- atomic_inc(&ci->i_filelock_ref);
}
spin_unlock(&ci->i_ceph_lock);
if (err < 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9049c2a3e972..959b1bf7c327 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -550,15 +550,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
int mds)
{
- struct ceph_mds_session *session;
-
if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
return NULL;
- session = mdsc->sessions[mds];
- dout("lookup_mds_session %p %d\n", session,
- refcount_read(&session->s_ref));
- get_session(session);
- return session;
+ return get_session(mdsc->sessions[mds]);
}
static bool __have_session(struct ceph_mds_client *mdsc, int mds)
@@ -1284,9 +1278,9 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
*
* Caller must hold session s_mutex.
*/
-static int iterate_session_caps(struct ceph_mds_session *session,
- int (*cb)(struct inode *, struct ceph_cap *,
- void *), void *arg)
+int ceph_iterate_session_caps(struct ceph_mds_session *session,
+ int (*cb)(struct inode *, struct ceph_cap *,
+ void *), void *arg)
{
struct list_head *p;
struct ceph_cap *cap;
@@ -1451,7 +1445,7 @@ static void remove_session_caps(struct ceph_mds_session *session)
LIST_HEAD(dispose);
dout("remove_session_caps on %p\n", session);
- iterate_session_caps(session, remove_session_caps_cb, fsc);
+ ceph_iterate_session_caps(session, remove_session_caps_cb, fsc);
wake_up_all(&fsc->mdsc->cap_flushing_wq);
@@ -1534,8 +1528,8 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
{
dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
- iterate_session_caps(session, wake_up_session_cb,
- (void *)(unsigned long)ev);
+ ceph_iterate_session_caps(session, wake_up_session_cb,
+ (void *)(unsigned long)ev);
}