summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-12-05 13:06:51 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-12-05 13:06:51 -0800
commita231582359ec27e121bf4bb0ab3df8355f919d1d (patch)
tree2379d13b9966ae10334fe4f641cf3a38da9c4a97
parent7ce4fab8191396a1b8e4bc42d3b90029876b2bcd (diff)
parent82995cc6c5ae4bf4d72edef381a085e52d5b5905 (diff)
Merge tag 'ceph-for-5.5-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "The two highlights are a set of improvements to how rbd read-only mappings are handled and a conversion to the new mount API (slightly complicated by the fact that we had a common option parsing framework that called out into rbd and the filesystem instead of them calling into it). Also included a few scattered fixes and a MAINTAINERS update for rbd, adding Dongsheng as a reviewer" * tag 'ceph-for-5.5-rc1' of git://github.com/ceph/ceph-client: libceph, rbd, ceph: convert to use the new mount API rbd: ask for a weaker incompat mask for read-only mappings rbd: don't query snapshot features rbd: remove snapshot existence validation code rbd: don't establish watch for read-only mappings rbd: don't acquire exclusive lock for read-only mappings rbd: disallow read-write partitions on images mapped read-only rbd: treat images mapped read-only seriously rbd: introduce RBD_DEV_FLAG_READONLY rbd: introduce rbd_is_snap() ceph: don't leave ino field in ceph_mds_request_head uninitialized ceph: tone down loglevel on ceph_mdsc_build_path warning rbd: update MAINTAINERS info ceph: fix geting random mds from mdsmap rbd: fix spelling mistake "requeueing" -> "requeuing" ceph: make several helper accessors take const pointers libceph: drop unnecessary check from dispatch() in mon_client.c
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/block/rbd.c467
-rw-r--r--fs/ceph/cache.c9
-rw-r--r--fs/ceph/cache.h5
-rw-r--r--fs/ceph/mds_client.c19
-rw-r--r--fs/ceph/mdsmap.c11
-rw-r--r--fs/ceph/super.c646
-rw-r--r--fs/ceph/super.h13
-rw-r--r--include/linux/ceph/libceph.h10
-rw-r--r--net/ceph/ceph_common.c419
-rw-r--r--net/ceph/messenger.c2
-rw-r--r--net/ceph/mon_client.c3
12 files changed, 803 insertions, 803 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 3c6038b4f10b..067cae5bde23 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13773,7 +13773,7 @@ F: drivers/media/radio/radio-tea5777.c
RADOS BLOCK DEVICE (RBD)
M: Ilya Dryomov <idryomov@gmail.com>
M: Sage Weil <sage@redhat.com>
-M: Alex Elder <elder@kernel.org>
+R: Dongsheng Yang <dongsheng.yang@easystack.cn>
L: ceph-devel@vger.kernel.org
W: http://ceph.com/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 13527a0b4e44..2b184563cd32 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -34,7 +34,7 @@
#include <linux/ceph/cls_lock_client.h>
#include <linux/ceph/striper.h>
#include <linux/ceph/decode.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
#include <linux/bsearch.h>
#include <linux/kernel.h>
@@ -377,7 +377,6 @@ struct rbd_client_id {
struct rbd_mapping {
u64 size;
- u64 features;
};
/*
@@ -462,8 +461,9 @@ struct rbd_device {
* by rbd_dev->lock
*/
enum rbd_dev_flags {
- RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */
+ RBD_DEV_FLAG_EXISTS, /* rbd_dev_device_setup() ran */
RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */
+ RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */
};
static DEFINE_MUTEX(client_mutex); /* Serialize client creation */
@@ -514,6 +514,16 @@ static int minor_to_rbd_dev_id(int minor)
return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
}
+static bool rbd_is_ro(struct rbd_device *rbd_dev)
+{
+ return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
+}
+
+static bool rbd_is_snap(struct rbd_device *rbd_dev)
+{
+ return rbd_dev->spec->snap_id != CEPH_NOSNAP;
+}
+
static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
{
lockdep_assert_held(&rbd_dev->lock_rwsem);
@@ -633,8 +643,6 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
u64 snap_id);
static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
u8 *order, u64 *snap_size);
-static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
- u64 *snap_features);
static int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev);
static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result);
@@ -695,9 +703,16 @@ static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
if (get_user(ro, (int __user *)arg))
return -EFAULT;
- /* Snapshots can't be marked read-write */
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro)
- return -EROFS;
+ /*
+ * Both images mapped read-only and snapshots can't be marked
+ * read-write.
+ */
+ if (!ro) {
+ if (rbd_is_ro(rbd_dev))
+ return -EROFS;
+
+ rbd_assert(!rbd_is_snap(rbd_dev));
+ }
/* Let blkdev_roset() handle it */
return -ENOTTY;
@@ -823,34 +838,34 @@ enum {
Opt_queue_depth,
Opt_alloc_size,
Opt_lock_timeout,
- Opt_last_int,
/* int args above */
Opt_pool_ns,
- Opt_last_string,
/* string args above */
Opt_read_only,
Opt_read_write,
Opt_lock_on_read,
Opt_exclusive,
Opt_notrim,
- Opt_err
};
-static match_table_t rbd_opts_tokens = {
- {Opt_queue_depth, "queue_depth=%d"},
- {Opt_alloc_size, "alloc_size=%d"},
- {Opt_lock_timeout, "lock_timeout=%d"},
- /* int args above */
- {Opt_pool_ns, "_pool_ns=%s"},
- /* string args above */
- {Opt_read_only, "read_only"},
- {Opt_read_only, "ro"}, /* Alternate spelling */
- {Opt_read_write, "read_write"},
- {Opt_read_write, "rw"}, /* Alternate spelling */
- {Opt_lock_on_read, "lock_on_read"},
- {Opt_exclusive, "exclusive"},
- {Opt_notrim, "notrim"},
- {Opt_err, NULL}
+static const struct fs_parameter_spec rbd_param_specs[] = {
+ fsparam_u32 ("alloc_size", Opt_alloc_size),
+ fsparam_flag ("exclusive", Opt_exclusive),
+ fsparam_flag ("lock_on_read", Opt_lock_on_read),
+ fsparam_u32 ("lock_timeout", Opt_lock_timeout),
+ fsparam_flag ("notrim", Opt_notrim),
+ fsparam_string ("_pool_ns", Opt_pool_ns),
+ fsparam_u32 ("queue_depth", Opt_queue_depth),
+ fsparam_flag ("read_only", Opt_read_only),
+ fsparam_flag ("read_write", Opt_read_write),
+ fsparam_flag ("ro", Opt_read_only),
+ fsparam_flag ("rw", Opt_read_write),
+ {}
+};
+
+static const struct fs_parameter_description rbd_parameters = {
+ .name = "rbd",
+ .specs = rbd_param_specs,
};
struct rbd_options {
@@ -871,87 +886,12 @@ struct rbd_options {
#define RBD_EXCLUSIVE_DEFAULT false
#define RBD_TRIM_DEFAULT true
-struct parse_rbd_opts_ctx {
+struct rbd_parse_opts_ctx {
struct rbd_spec *spec;
+ struct ceph_options *copts;
struct rbd_options *opts;
};
-static int parse_rbd_opts_token(char *c, void *private)
-{
- struct parse_rbd_opts_ctx *pctx = private;
- substring_t argstr[MAX_OPT_ARGS];
- int token, intval, ret;
-
- token = match_token(c, rbd_opts_tokens, argstr);
- if (token < Opt_last_int) {
- ret = match_int(&argstr[0], &intval);
- if (ret < 0) {
- pr_err("bad option arg (not int) at '%s'\n", c);
- return ret;
- }
- dout("got int token %d val %d\n", token, intval);
- } else if (token > Opt_last_int && token < Opt_last_string) {
- dout("got string token %d val %s\n", token, argstr[0].from);
- } else {
- dout("got token %d\n", token);
- }
-
- switch (token) {
- case Opt_queue_depth:
- if (intval < 1) {
- pr_err("queue_depth out of range\n");
- return -EINVAL;
- }
- pctx->opts->queue_depth = intval;
- break;
- case Opt_alloc_size:
- if (intval < SECTOR_SIZE) {
- pr_err("alloc_size out of range\n");
- return -EINVAL;
- }
- if (!is_power_of_2(intval)) {
- pr_err("alloc_size must be a power of 2\n");
- return -EINVAL;
- }
- pctx->opts->alloc_size = intval;
- break;
- case Opt_lock_timeout:
- /* 0 is "wait forever" (i.e. infinite timeout) */
- if (intval < 0 || intval > INT_MAX / 1000) {
- pr_err("lock_timeout out of range\n");
- return -EINVAL;
- }
- pctx->opts->lock_timeout = msecs_to_jiffies(intval * 1000);
- break;
- case Opt_pool_ns:
- kfree(pctx->spec->pool_ns);
- pctx->spec->pool_ns = match_strdup(argstr);
- if (!pctx->spec->pool_ns)
- return -ENOMEM;
- break;
- case Opt_read_only:
- pctx->opts->read_only = true;
- break;
- case Opt_read_write:
- pctx->opts->read_only = false;
- break;
- case Opt_lock_on_read:
- pctx->opts->lock_on_read = true;
- break;
- case Opt_exclusive:
- pctx->opts->exclusive = true;
- break;
- case Opt_notrim:
- pctx->opts->trim = false;
- break;
- default:
- /* libceph prints "bad option" msg */
- return -EINVAL;
- }
-
- return 0;
-}
-
static char* obj_op_name(enum obj_operation_type op_type)
{
switch (op_type) {
@@ -1302,51 +1242,23 @@ static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
return 0;
}
-static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
- u64 *snap_features)
-{
- rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
- if (snap_id == CEPH_NOSNAP) {
- *snap_features = rbd_dev->header.features;
- } else if (rbd_dev->image_format == 1) {
- *snap_features = 0; /* No features for format 1 */
- } else {
- u64 features = 0;
- int ret;
-
- ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features);
- if (ret)
- return ret;
-
- *snap_features = features;
- }
- return 0;
-}
-
static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
{
u64 snap_id = rbd_dev->spec->snap_id;
u64 size = 0;
- u64 features = 0;
int ret;
ret = rbd_snap_size(rbd_dev, snap_id, &size);
if (ret)
return ret;
- ret = rbd_snap_features(rbd_dev, snap_id, &features);
- if (ret)
- return ret;
rbd_dev->mapping.size = size;
- rbd_dev->mapping.features = features;
-
return 0;
}
static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
{
rbd_dev->mapping.size = 0;
- rbd_dev->mapping.features = 0;
}
static void zero_bvec(struct bio_vec *bv)
@@ -1832,6 +1744,17 @@ static u8 rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
static bool use_object_map(struct rbd_device *rbd_dev)
{
+ /*
+ * An image mapped read-only can't use the object map -- it isn't
+ * loaded because the header lock isn't acquired. Someone else can
+ * write to the image and update the object map behind our back.
+ *
+ * A snapshot can't be written to, so using the object map is always
+ * safe.
+ */
+ if (!rbd_is_snap(rbd_dev) && rbd_is_ro(rbd_dev))
+ return false;
+
return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) &&
!(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID));
}
@@ -3555,7 +3478,7 @@ static bool need_exclusive_lock(struct rbd_img_request *img_req)
if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK))
return false;
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+ if (rbd_is_ro(rbd_dev))
return false;
rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
@@ -4230,7 +4153,7 @@ again:
* lock owner acked, but resend if we don't see them
* release the lock
*/
- dout("%s rbd_dev %p requeueing lock_dwork\n", __func__,
+ dout("%s rbd_dev %p requeuing lock_dwork\n", __func__,
rbd_dev);
mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC));
@@ -4826,24 +4749,14 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq;
}
- if (op_type != OBJ_OP_READ && rbd_dev->spec->snap_id != CEPH_NOSNAP) {
- rbd_warn(rbd_dev, "%s on read-only snapshot",
- obj_op_name(op_type));
- result = -EIO;
- goto err;
- }
-
- /*
- * Quit early if the mapped snapshot no longer exists. It's
- * still possible the snapshot will have disappeared by the
- * time our request arrives at the osd, but there's no sense in
- * sending it if we already know.
- */
- if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
- dout("request for non-existent snapshot");
- rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
- result = -ENXIO;
- goto err_rq;
+ if (op_type != OBJ_OP_READ) {
+ if (rbd_is_ro(rbd_dev)) {
+ rbd_warn(rbd_dev, "%s on read-only mapping",
+ obj_op_name(op_type));
+ result = -EIO;
+ goto err;
+ }
+ rbd_assert(!rbd_is_snap(rbd_dev));
}
if (offset && length > U64_MAX - offset + 1) {
@@ -5025,25 +4938,6 @@ out:
return ret;
}
-/*
- * Clear the rbd device's EXISTS flag if the snapshot it's mapped to
- * has disappeared from the (just updated) snapshot context.
- */
-static void rbd_exists_validate(struct rbd_device *rbd_dev)
-{
- u64 snap_id;
-
- if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags))
- return;
-
- snap_id = rbd_dev->spec->snap_id;
- if (snap_id == CEPH_NOSNAP)
- return;
-
- if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX)
- clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
-}
-
static void rbd_dev_update_size(struct rbd_device *rbd_dev)
{
sector_t size;
@@ -5084,12 +4978,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
goto out;
}
- if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
- rbd_dev->mapping.size = rbd_dev->header.image_size;
- } else {
- /* validate mapped snapshot's EXISTS flag */
- rbd_exists_validate(rbd_dev);
- }
+ rbd_assert(!rbd_is_snap(rbd_dev));
+ rbd_dev->mapping.size = rbd_dev->header.image_size;
out:
up_write(&rbd_dev->header_rwsem);
@@ -5211,17 +5101,12 @@ static ssize_t rbd_size_show(struct device *dev,
(unsigned long long)rbd_dev->mapping.size);
}
-/*
- * Note this shows the features for whatever's mapped, which is not
- * necessarily the base image.
- */
static ssize_t rbd_features_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- return sprintf(buf, "0x%016llx\n",
- (unsigned long long)rbd_dev->mapping.features);
+ return sprintf(buf, "0x%016llx\n", rbd_dev->header.features);
}
static ssize_t rbd_major_show(struct device *dev,
@@ -5709,9 +5594,12 @@ out:
}
static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
- u64 *snap_features)
+ bool read_only, u64 *snap_features)
{
- __le64 snapid = cpu_to_le64(snap_id);
+ struct {
+ __le64 snap_id;
+ u8 read_only;
+ } features_in;
struct {
__le64 features;
__le64 incompat;
@@ -5719,9 +5607,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
u64 unsup;
int ret;
+ features_in.snap_id = cpu_to_le64(snap_id);
+ features_in.read_only = read_only;
+
ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
&rbd_dev->header_oloc, "get_features",
- &snapid, sizeof(snapid),
+ &features_in, sizeof(features_in),
&features_buf, sizeof(features_buf));
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret < 0)
@@ -5749,7 +5640,8 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
{
return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
- &rbd_dev->header.features);
+ rbd_is_ro(rbd_dev),
+ &rbd_dev->header.features);
}
/*
@@ -6456,6 +6348,122 @@ static inline char *dup_token(const char **buf, size_t *lenp)
return dup;
}
+static int rbd_parse_param(struct fs_parameter *param,
+ struct rbd_parse_opts_ctx *pctx)
+{
+ struct rbd_options *opt = pctx->opts;
+ struct fs_parse_result result;
+ int token, ret;
+
+ ret = ceph_parse_param(param, pctx->copts, NULL);
+ if (ret != -ENOPARAM)
+ return ret;
+
+ token = fs_parse(NULL, &rbd_parameters, param, &result);
+ dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
+ if (token < 0) {
+ if (token == -ENOPARAM) {
+ return invalf(NULL, "rbd: Unknown parameter '%s'",
+ param->key);
+ }
+ return token;
+ }
+
+ switch (token) {
+ case Opt_queue_depth:
+ if (result.uint_32 < 1)
+ goto out_of_range;
+ opt->queue_depth = result.uint_32;
+ break;
+ case Opt_alloc_size:
+ if (result.uint_32 < SECTOR_SIZE)
+ goto out_of_range;
+ if (!is_power_of_2(result.uint_32)) {
+ return invalf(NULL, "rbd: alloc_size must be a power of 2");
+ }
+ opt->alloc_size = result.uint_32;
+ break;
+ case Opt_lock_timeout:
+ /* 0 is "wait forever" (i.e. infinite timeout) */
+ if (result.uint_32 > INT_MAX / 1000)
+ goto out_of_range;
+ opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000);
+ break;
+ case Opt_pool_ns:
+ kfree(pctx->spec->pool_ns);
+ pctx->spec->pool_ns = param->string;
+ param->string = NULL;
+ break;
+ case Opt_read_only:
+ opt->read_only = true;
+ break;
+ case Opt_read_write:
+ opt->read_only = false;
+ break;
+ case Opt_lock_on_read:
+ opt->lock_on_read = true;
+ break;
+ case Opt_exclusive:
+ opt->exclusive = true;
+ break;
+ case Opt_notrim:
+ opt->trim = false;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+
+out_of_range:
+ return invalf(NULL, "rbd: %s out of range", param->key);
+}
+
+/*
+ * This duplicates most of generic_parse_monolithic(), untying it from
+ * fs_context and skipping standard superblock and security options.
+ */
+static int rbd_parse_options(char *options, struct rbd_parse_opts_ctx *pctx)
+{
+ char *key;
+ int ret = 0;
+
+ dout("%s '%s'\n", __func__, options);
+ while ((key = strsep(&options, ",")) != NULL) {
+ if (*key) {
+ struct fs_parameter param = {
+ .key = key,
+ .type = fs_value_is_string,
+ };
+ char *value = strchr(key, '=');
+ size_t v_len = 0;
+
+ if (value) {
+ if (value == key)
+ continue;
+ *value++ = 0;
+ v_len = strlen(value);
+ }
+
+
+ if (v_len > 0) {
+ param.string = kmemdup_nul(value, v_len,
+ GFP_KERNEL);
+ if (!param.string)
+ return -ENOMEM;
+ }
+ param.size = v_len;
+
+ ret = rbd_parse_param(&param, pctx);
+ kfree(param.string);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* Parse the options provided for an "rbd add" (i.e., rbd image
* mapping) request. These arrive via a write to /sys/bus/rbd/add,
@@ -6507,8 +6515,7 @@ static int rbd_add_parse_args(const char *buf,
const char *mon_addrs;
char *snap_name;
size_t mon_addrs_size;
- struct parse_rbd_opts_ctx pctx = { 0 };
- struct ceph_options *copts;
+ struct rbd_parse_opts_ctx pctx = { 0 };
int ret;
/* The first four tokens are required */
@@ -6519,7 +6526,7 @@ static int rbd_add_parse_args(const char *buf,
return -EINVAL;
}
mon_addrs = buf;
- mon_addrs_size = len + 1;
+ mon_addrs_size = len;
buf += len;
ret = -EINVAL;
@@ -6569,6 +6576,10 @@ static int rbd_add_parse_args(const char *buf,
*(snap_name + len) = '\0';
pctx.spec->snap_name = snap_name;
+ pctx.copts = ceph_alloc_options();
+ if (!pctx.copts)
+ goto out_mem;
+
/* Initialize all rbd options to the defaults */
pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL);
@@ -6583,27 +6594,27 @@ static int rbd_add_parse_args(const char *buf,
pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
pctx.opts->trim = RBD_TRIM_DEFAULT;
- copts = ceph_parse_options(options, mon_addrs,
- mon_addrs + mon_addrs_size - 1,
- parse_rbd_opts_token, &pctx);
- if (IS_ERR(copts)) {
- ret = PTR_ERR(copts);
+ ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
+ if (ret)
+ goto out_err;
+
+ ret = rbd_parse_options(options, &pctx);
+ if (ret)
goto out_err;
- }
- kfree(options);
- *ceph_opts = copts;
+ *ceph_opts = pctx.copts;
*opts = pctx.opts;
*rbd_spec = pctx.spec;
-
+ kfree(options);
return 0;
+
out_mem:
ret = -ENOMEM;
out_err:
kfree(pctx.opts);
+ ceph_destroy_options(pctx.copts);
rbd_spec_put(pctx.spec);
kfree(options);
-
return ret;
}
@@ -6632,7 +6643,7 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
return -EINVAL;
}
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+ if (rbd_is_ro(rbd_dev))
return 0;
rbd_assert(!rbd_is_lock_owner(rbd_dev));
@@ -6838,6 +6849,8 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
__rbd_get_client(rbd_dev->rbd_client);
rbd_spec_get(rbd_dev->parent_spec);
+ __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
+
ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
goto out_err;
@@ -6889,7 +6902,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
goto err_out_blkdev;
set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
- set_disk_ro(rbd_dev->disk, rbd_dev->opts->read_only);
+ set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev));
ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
if (ret)
@@ -6927,6 +6940,24 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
return ret;
}
+static void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap)
+{
+ if (!is_snap) {
+ pr_info("image %s/%s%s%s does not exist\n",
+ rbd_dev->spec->pool_name,
+ rbd_dev->spec->pool_ns ?: "",
+ rbd_dev->spec->pool_ns ? "/" : "",
+ rbd_dev->spec->image_name);
+ } else {
+ pr_info("snap %s/%s%s%s@%s does not exist\n",
+ rbd_dev->spec->pool_name,
+ rbd_dev->spec->pool_ns ?: "",
+ rbd_dev->spec->pool_ns ? "/" : "",
+ rbd_dev->spec->image_name,
+ rbd_dev->spec->snap_name);
+ }
+}
+
static void rbd_dev_image_release(struct rbd_device *rbd_dev)
{
rbd_dev_unprobe(rbd_dev);
@@ -6945,6 +6976,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
*/
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
+ bool need_watch = !rbd_is_ro(rbd_dev);
int ret;
/*
@@ -6961,22 +6993,21 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (ret)
goto err_out_format;
- if (!depth) {
+ if (need_watch) {
ret = rbd_register_watch(rbd_dev);
if (ret) {
if (ret == -ENOENT)
- pr_info("image %s/%s%s%s does not exist\n",
- rbd_dev->spec->pool_name,
- rbd_dev->spec->pool_ns ?: "",
- rbd_dev->spec->pool_ns ? "/" : "",
- rbd_dev->spec->image_name);
+ rbd_print_dne(rbd_dev, false);
goto err_out_format;
}
}
ret = rbd_dev_header_info(rbd_dev);
- if (ret)
+ if (ret) {
+ if (ret == -ENOENT && !need_watch)
+ rbd_print_dne(rbd_dev, false);
goto err_out_watch;
+ }
/*
* If this image is the one being mapped, we have pool name and
@@ -6990,12 +7021,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
ret = rbd_spec_fill_names(rbd_dev);
if (ret) {
if (ret == -ENOENT)
- pr_info("snap %s/%s%s%s@%s does not exist\n",
- rbd_dev->spec->pool_name,
- rbd_dev->spec->pool_ns ?: "",
- rbd_dev->spec->pool_ns ? "/" : "",
- rbd_dev->spec->image_name,
- rbd_dev->spec->snap_name);
+ rbd_print_dne(rbd_dev, true);
goto err_out_probe;
}
@@ -7003,7 +7029,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (ret)
goto err_out_probe;
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP &&
+ if (rbd_is_snap(rbd_dev) &&
(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) {
ret = rbd_object_map_load(rbd_dev);
if (ret)
@@ -7027,7 +7053,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- if (!depth)
+ if (need_watch)
rbd_unregister_watch(rbd_dev);
err_out_format:
rbd_dev->image_format = 0;
@@ -7079,6 +7105,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,
spec = NULL; /* rbd_dev now owns this */
rbd_opts = NULL; /* rbd_dev now owns this */
+ /* if we are mapping a snapshot it will be a read-only mapping */
+ if (rbd_dev->opts->read_only ||
+ strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME))
+ __set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
+
rbd_dev->config_info = kstrdup(buf, GFP_KERNEL);
if (!rbd_dev->config_info) {
rc = -ENOMEM;
@@ -7092,10 +7123,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
goto err_out_rbd_dev;
}
- /* If we are mapping a snapshot it must be marked read-only */
- if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
- rbd_dev->opts->read_only = true;
-
if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
rbd_warn(rbd_dev, "alloc_size adjusted to %u",
rbd_dev->layout.object_size);
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index b2ec29eeb4c4..73f24f307a4a 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -8,6 +8,7 @@
#include <linux/ceph/ceph_debug.h>
+#include <linux/fs_context.h>
#include "super.h"
#include "cache.h"
@@ -49,7 +50,7 @@ void ceph_fscache_unregister(void)
fscache_unregister_netfs(&ceph_cache_netfs);
}
-int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
+int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc)
{
const struct ceph_fsid *fsid = &fsc->client->fsid;
const char *fscache_uniq = fsc->mount_options->fscache_uniq;
@@ -66,8 +67,8 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len))
continue;
- pr_err("fscache cookie already registered for fsid %pU\n", fsid);
- pr_err(" use fsc=%%s mount option to specify a uniquifier\n");
+ errorf(fc, "ceph: fscache cookie already registered for fsid %pU, use fsc=<uniquifier> option",
+ fsid);
err = -EBUSY;
goto out_unlock;
}
@@ -95,7 +96,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
list_add_tail(&ent->list, &ceph_fscache_list);
} else {
kfree(ent);
- pr_err("unable to register fscache cookie for fsid %pU\n",
+ errorf(fc, "ceph: unable to register fscache cookie for fsid %pU",
fsid);
/* all other fs ignore this error */
}
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index e486fac3434d..89dbdd1eb14a 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -16,7 +16,7 @@ extern struct fscache_netfs ceph_cache_netfs;
int ceph_fscache_register(void);
void ceph_fscache_unregister(void);
-int ceph_fscache_register_fs(struct ceph_fs_client* fsc);
+int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc);
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc);
void ceph_fscache_register_inode_cookie(struct inode *inode);
@@ -88,7 +88,8 @@ static inline void ceph_fscache_unregister(void)
{
}
-static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
+static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc,
+ struct fs_context *fc)
{
return 0;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a5163296d9d9..068b029cf073 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2182,13 +2182,17 @@ retry:
}
base = ceph_ino(d_inode(temp));
rcu_read_unlock();
- if (pos < 0 || read_seqretry(&rename_lock, seq)) {
- pr_err("build_path did not end path lookup where "
- "expected, pos is %d\n", pos);
- /* presumably this is only possible if racing with a
- rename of one of the parent directories (we can not
- lock the dentries above us to prevent this, but
- retrying should be harmless) */
+
+ if (read_seqretry(&rename_lock, seq))
+ goto retry;
+
+ if (pos < 0) {
+ /*
+ * A rename didn't occur, but somehow we didn't end up where
+ * we thought we would. Throw a warning and try again.
+ */
+ pr_warn("build_path did not end path lookup where "
+ "expected, pos is %d\n", pos);
goto retry;
}
@@ -2345,6 +2349,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
head->op = cpu_to_le32(req->r_op);
head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
+ head->ino = 0;
head->args = req->r_args;
ceph_encode_filepath(&p, end, ino1, path1);
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index ce2d00da5096..aeec1d6e3769 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -20,7 +20,7 @@
int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
{
int n = 0;
- int i;
+ int i, j;
/* special case for one mds */
if (1 == m->m_num_mds && m->m_info[0].state > 0)
@@ -35,9 +35,12 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
/* pick */
n = prandom_u32() % n;
- for (i = 0; n > 0; i++, n--)
- while (m->m_info[i].state <= 0)
- i++;
+ for (j = 0, i = 0; i < m->m_num_mds; i++) {
+ if (m->m_info[i].state > 0)
+ j++;
+ if (j > n)
+ break;
+ }
return i;
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b47f43fc2d68..9c9a7c68eea3 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -9,7 +9,8 @@
#include <linux/in6.h>
#include <linux/module.h>
#include <linux/mount.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -138,280 +139,308 @@ enum {
Opt_readdir_max_entries,
Opt_readdir_max_bytes,
Opt_congestion_kb,
- Opt_last_int,
/* int args above */
Opt_snapdirname,
Opt_mds_namespace,
- Opt_fscache_uniq,
Opt_recover_session,
- Opt_last_string,
+ Opt_source,
/* string args above */
Opt_dirstat,
- Opt_nodirstat,
Opt_rbytes,
- Opt_norbytes,
Opt_asyncreaddir,
- Opt_noasyncreaddir,
Opt_dcache,
- Opt_nodcache,
Opt_ino32,
- Opt_noino32,
Opt_fscache,
- Opt_nofscache,
Opt_poolperm,
- Opt_nopoolperm,
Opt_require_active_mds,
- Opt_norequire_active_mds,
-#ifdef CONFIG_CEPH_FS_POSIX_ACL
Opt_acl,
-#endif
- Opt_noacl,
Opt_quotadf,
- Opt_noquotadf,
Opt_copyfrom,
- Opt_nocopyfrom,
};
-static match_table_t fsopt_tokens = {
- {Opt_wsize, "wsize=%d"},
- {Opt_rsize, "rsize=%d"},
- {Opt_rasize, "rasize=%d"},
- {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
- {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
- {Opt_caps_max, "caps_max=%d"},
- {Opt_readdir_max_entries, "readdir_max_entries=%d"},
- {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
- {Opt_congestion_kb, "write_congestion_kb=%d"},
- /* int args above */
- {Opt_snapdirname, "snapdirname=%s"},
- {Opt_mds_namespace, "mds_namespace=%s"},
- {Opt_recover_session, "recover_session=%s"},
- {Opt_fscache_uniq, "fsc=%s"},
- /* string args above */
- {Opt_dirstat, "dirstat"},
- {Opt_nodirstat, "nodirstat"},
- {Opt_rbytes, "rbytes"},
- {Opt_norbytes, "norbytes"},
- {Opt_asyncreaddir, "asyncreaddir"},
- {Opt_noasyncreaddir, "noasyncreaddir"},
- {Opt_dcache, "dcache"},
- {Opt_nodcache, "nodcache"},
- {Opt_ino32, "ino32"},
- {Opt_noino32, "noino32"},
- {Opt_fscache, "fsc"},
- {Opt_nofscache, "nofsc"},
- {Opt_poolperm, "poolperm"},
- {Opt_nopoolperm, "nopoolperm"},
- {Opt_require_active_mds, "require_active_mds"},
- {Opt_norequire_active_mds, "norequire_active_mds"},
-#ifdef CONFIG_CEPH_FS_POSIX_ACL
- {Opt_acl, "acl"},
-#endif
- {Opt_noacl, "noacl"},
- {Opt_quotadf, "quotadf"},
- {Opt_noquotadf, "noquotadf"},
- {Opt_copyfrom, "copyfrom"},
- {Opt_nocopyfrom, "nocopyfrom"},
- {-1, NULL}
+enum ceph_recover_session_mode {
+ ceph_recover_session_no,
+ ceph_recover_session_clean
+};
+
+static const struct fs_parameter_enum ceph_mount_param_enums[] = {
+ { Opt_recover_session, "no", ceph_recover_session_no },
+ { Opt_recover_session, "clean", ceph_recover_session_clean },
+ {}
+};
+
+static const struct fs_parameter_spec ceph_mount_param_specs[] = {
+ fsparam_flag_no ("acl", Opt_acl),
+ fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir),
+ fsparam_u32 ("caps_max", Opt_caps_max),
+ fsparam_u32 ("caps_wanted_delay_max", Opt_caps