diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-05 14:05:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-05 14:05:57 -0700 |
commit | 242b23319809e05170b3cc0d44d3b4bd202bb073 (patch) | |
tree | 195e39fd02942ee0ef60ead7239859f2fe0c12a1 /drivers/block | |
parent | 3f7e82379fc91102d82ed89822bd4242c83e40d5 (diff) | |
parent | fba97dc7fc76b2c9a909fa0b3786d30a9899f5cf (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A more active cycle than most of the recent past, with a few large,
long discussed works this time.
The RNBD block driver has been posted for nearly two years now, and
flowing through RDMA due to it also introducing a new ULP.
The removal of FMR has been a recurring discussion theme for a long
time.
And the usual smattering of features and bug fixes.
Summary:
- Various small driver bugs fixes in rxe, mlx5, hfi1, and efa
- Continuing driver cleanups in bnxt_re, hns
- Big cleanup of mlx5 QP creation flows
- More consistent use of src port and flow label when LAG is used and
a mlx5 implementation
- Additional set of cleanups for IB CM
- 'RNBD' network block driver and target. This is a network block
RDMA device specific to ionos's cloud environment. It brings strong
multipath and resiliency capabilities.
- Accelerated IPoIB for HFI1
- QP/WQ/SRQ ioctl migration for uverbs, and support for multiple
async fds
- Support for exchanging the new IBTA defiend ECE data during RDMA CM
exchanges
- Removal of the very old and insecure FMR interface from all ULPs
and drivers. FRWR should be preferred for at least a decade now"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (247 commits)
RDMA/cm: Spurious WARNING triggered in cm_destroy_id()
RDMA/mlx5: Return ECE DC support
RDMA/mlx5: Don't rely on FW to set zeros in ECE response
RDMA/mlx5: Return an error if copy_to_user fails
IB/hfi1: Use free_netdev() in hfi1_netdev_free()
RDMA/hns: Uninitialized variable in modify_qp_init_to_rtr()
RDMA/core: Move and rename trace_cm_id_create()
IB/hfi1: Fix hfi1_netdev_rx_init() error handling
RDMA: Remove 'max_map_per_fmr'
RDMA: Remove 'max_fmr'
RDMA/core: Remove FMR device ops
RDMA/rdmavt: Remove FMR memory registration
RDMA/mthca: Remove FMR support for memory registration
RDMA/mlx4: Remove FMR support for memory registration
RDMA/i40iw: Remove FMR leftovers
RDMA/bnxt_re: Remove FMR leftovers
RDMA/mlx5: Remove FMR leftovers
RDMA/core: Remove FMR pool API
RDMA/rds: Remove FMR support for memory registration
RDMA/srp: Remove support for FMR memory registration
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/rnbd/Kconfig | 28 | ||||
-rw-r--r-- | drivers/block/rnbd/Makefile | 15 | ||||
-rw-r--r-- | drivers/block/rnbd/README | 92 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-clt-sysfs.c | 639 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-clt.c | 1729 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-clt.h | 156 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-common.c | 23 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-log.h | 41 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-proto.h | 303 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-srv-dev.c | 134 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-srv-dev.h | 92 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-srv-sysfs.c | 215 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-srv.c | 844 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-srv.h | 78 |
16 files changed, 4392 insertions, 0 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 025b1b77b11a..084b9efcefca 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -458,4 +458,6 @@ config BLK_DEV_RSXX To compile this driver as a module, choose M here: the module will be called rsxx. +source "drivers/block/rnbd/Kconfig" + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 795facd8cf19..e1f63117ee94 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_ZRAM) += zram/ +obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o null_blk-objs := null_blk_main.o diff --git a/drivers/block/rnbd/Kconfig b/drivers/block/rnbd/Kconfig new file mode 100644 index 000000000000..4b6d3d816d1f --- /dev/null +++ b/drivers/block/rnbd/Kconfig @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +config BLK_DEV_RNBD + bool + +config BLK_DEV_RNBD_CLIENT + tristate "RDMA Network Block Device driver client" + depends on INFINIBAND_RTRS_CLIENT + select BLK_DEV_RNBD + help + RNBD client is a network block device driver using rdma transport. + + RNBD client allows for mapping of a remote block devices over + RTRS protocol from a target system where RNBD server is running. + + If unsure, say N. + +config BLK_DEV_RNBD_SERVER + tristate "RDMA Network Block Device driver server" + depends on INFINIBAND_RTRS_SERVER + select BLK_DEV_RNBD + help + RNBD server is the server side of RNBD using rdma transport. + + RNBD server allows for exporting local block devices to a remote client + over RTRS protocol. + + If unsure, say N. diff --git a/drivers/block/rnbd/Makefile b/drivers/block/rnbd/Makefile new file mode 100644 index 000000000000..5bb1a7ad1ada --- /dev/null +++ b/drivers/block/rnbd/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +ccflags-y := -I$(srctree)/drivers/infiniband/ulp/rtrs + +rnbd-client-y := rnbd-clt.o \ + rnbd-clt-sysfs.o \ + rnbd-common.o + +rnbd-server-y := rnbd-common.o \ + rnbd-srv.o \ + rnbd-srv-dev.o \ + rnbd-srv-sysfs.o + +obj-$(CONFIG_BLK_DEV_RNBD_CLIENT) += rnbd-client.o +obj-$(CONFIG_BLK_DEV_RNBD_SERVER) += rnbd-server.o diff --git a/drivers/block/rnbd/README b/drivers/block/rnbd/README new file mode 100644 index 000000000000..1773c0aa0bd4 --- /dev/null +++ b/drivers/block/rnbd/README @@ -0,0 +1,92 @@ +******************************** +RDMA Network Block Device (RNBD) +******************************** + +Introduction +------------ + +RNBD (RDMA Network Block Device) is a pair of kernel modules +(client and server) that allow for remote access of a block device on +the server over RTRS protocol using the RDMA (InfiniBand, RoCE, iWARP) +transport. After being mapped, the remote block devices can be accessed +on the client side as local block devices. + +I/O is transferred between client and server by the RTRS transport +modules. The administration of RNBD and RTRS modules is done via +sysfs entries. + +Requirements +------------ + + RTRS kernel modules + +Quick Start +----------- + +Server side: + # modprobe rnbd_server + +Client side: + # modprobe rnbd_client + # echo "sessname=blya path=ip:10.50.100.66 device_path=/dev/ram0" > \ + /sys/devices/virtual/rnbd-client/ctl/map_device + + Where "sessname=" is a session name, a string to identify the session + on client and on server sides; "path=" is a destination IP address or + a pair of a source and a destination IPs, separated by comma. Multiple + "path=" options can be specified in order to use multipath (see RTRS + description for details); "device_path=" is the block device to be + mapped from the server side. After the session to the server machine is + established, the mapped device will appear on the client side under + /dev/rnbd<N>. + + +RNBD-Server Module Parameters +============================= + +dev_search_path +--------------- + +When a device is mapped from the client, the server generates the path +to the block device on the server side by concatenating dev_search_path +and the "device_path" that was specified in the map_device operation. + +The default dev_search_path is: "/". + +dev_search_path option can also contain %SESSNAME% in order to provide +different device namespaces for different sessions. See "device_path" +option for details. + +============================ +Protocol (rnbd/rnbd-proto.h) +============================ + +1. Before mapping first device from a given server, client sends an +RNBD_MSG_SESS_INFO to the server. Server responds with +RNBD_MSG_SESS_INFO_RSP. Currently the messages only contain the protocol +version for backward compatibility. + +2. Client requests to open a device by sending RNBD_MSG_OPEN message. This +contains the path to the device and access mode (read-only or writable). +Server responds to the message with RNBD_MSG_OPEN_RSP. This contains +a 32 bit device id to be used for IOs and device "geometry" related +information: side, max_hw_sectors, etc. + +3. Client attaches RNBD_MSG_IO to each IO message send to a device. This +message contains device id, provided by server in his rnbd_msg_open_rsp, +sector to be accessed, read-write flags and bi_size. + +4. Client closes a device by sending RNBD_MSG_CLOSE which contains only the +device id provided by the server. + +========================================= +Contributors List(in alphabetical order) +========================================= +Danil Kipnis <danil.kipnis@profitbricks.com> +Fabian Holler <mail@fholler.de> +Guoqing Jiang <guoqing.jiang@cloud.ionos.com> +Jack Wang <jinpu.wang@profitbricks.com> +Kleber Souza <kleber.souza@profitbricks.com> +Lutz Pogrell <lutz.pogrell@cloud.ionos.com> +Milind Dumbare <Milind.dumbare@gmail.com> +Roman Penyaev <roman.penyaev@profitbricks.com> diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c new file mode 100644 index 000000000000..4f4474eecadb --- /dev/null +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -0,0 +1,639 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include <linux/types.h> +#include <linux/ctype.h> +#include <linux/parser.h> +#include <linux/module.h> +#include <linux/in6.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/device.h> +#include <rdma/ib.h> +#include <rdma/rdma_cm.h> + +#include "rnbd-clt.h" + +static struct device *rnbd_dev; +static struct class *rnbd_dev_class; +static struct kobject *rnbd_devs_kobj; + +enum { + RNBD_OPT_ERR = 0, + RNBD_OPT_DEST_PORT = 1 << 0, + RNBD_OPT_PATH = 1 << 1, + RNBD_OPT_DEV_PATH = 1 << 2, + RNBD_OPT_ACCESS_MODE = 1 << 3, + RNBD_OPT_SESSNAME = 1 << 6, +}; + +static const unsigned int rnbd_opt_mandatory[] = { + RNBD_OPT_PATH, + RNBD_OPT_DEV_PATH, + RNBD_OPT_SESSNAME, +}; + +static const match_table_t rnbd_opt_tokens = { + {RNBD_OPT_PATH, "path=%s" }, + {RNBD_OPT_DEV_PATH, "device_path=%s"}, + {RNBD_OPT_DEST_PORT, "dest_port=%d" }, + {RNBD_OPT_ACCESS_MODE, "access_mode=%s"}, + {RNBD_OPT_SESSNAME, "sessname=%s" }, + {RNBD_OPT_ERR, NULL }, +}; + +struct rnbd_map_options { + char *sessname; + struct rtrs_addr *paths; + size_t *path_cnt; + char *pathname; + u16 *dest_port; + enum rnbd_access_mode *access_mode; +}; + +static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt, + struct rnbd_map_options *opt) +{ + char *options, *sep_opt; + char *p; + substring_t args[MAX_OPT_ARGS]; + int opt_mask = 0; + int token; + int ret = -EINVAL; + int i, dest_port; + int p_cnt = 0; + + options = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + sep_opt = strstrip(options); + while ((p = strsep(&sep_opt, " ")) != NULL) { + if (!*p) + continue; + + token = match_token(p, rnbd_opt_tokens, args); + opt_mask |= token; + + switch (token) { + case RNBD_OPT_SESSNAME: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + if (strlen(p) > NAME_MAX) { + pr_err("map_device: sessname too long\n"); + ret = -EINVAL; + kfree(p); + goto out; + } + strlcpy(opt->sessname, p, NAME_MAX); + kfree(p); + break; + + case RNBD_OPT_PATH: + if (p_cnt >= max_path_cnt) { + pr_err("map_device: too many (> %zu) paths provided\n", + max_path_cnt); + ret = -ENOMEM; + goto out; + } + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + + ret = rtrs_addr_to_sockaddr(p, strlen(p), + *opt->dest_port, + &opt->paths[p_cnt]); + if (ret) { + pr_err("Can't parse path %s: %d\n", p, ret); + kfree(p); + goto out; + } + + p_cnt++; + + kfree(p); + break; + + case RNBD_OPT_DEV_PATH: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + if (strlen(p) > NAME_MAX) { + pr_err("map_device: Device path too long\n"); + ret = -EINVAL; + kfree(p); + goto out; + } + strlcpy(opt->pathname, p, NAME_MAX); + kfree(p); + break; + + case RNBD_OPT_DEST_PORT: + if (match_int(args, &dest_port) || dest_port < 0 || + dest_port > 65535) { + pr_err("bad destination port number parameter '%d'\n", + dest_port); + ret = -EINVAL; + goto out; + } + *opt->dest_port = dest_port; + break; + + case RNBD_OPT_ACCESS_MODE: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + + if (!strcmp(p, "ro")) { + *opt->access_mode = RNBD_ACCESS_RO; + } else if (!strcmp(p, "rw")) { + *opt->access_mode = RNBD_ACCESS_RW; + } else if (!strcmp(p, "migration")) { + *opt->access_mode = RNBD_ACCESS_MIGRATION; + } else { + pr_err("map_device: Invalid access_mode: '%s'\n", + p); + ret = -EINVAL; + kfree(p); + goto out; + } + + kfree(p); + break; + + default: + pr_err("map_device: Unknown parameter or missing value '%s'\n", + p); + ret = -EINVAL; + goto out; + } + } + + for (i = 0; i < ARRAY_SIZE(rnbd_opt_mandatory); i++) { + if ((opt_mask & rnbd_opt_mandatory[i])) { + ret = 0; + } else { + pr_err("map_device: Parameters missing\n"); + ret = -EINVAL; + break; + } + } + +out: + *opt->path_cnt = p_cnt; + kfree(options); + return ret; +} + +static ssize_t state_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + switch (dev->dev_state) { + case DEV_STATE_INIT: + return snprintf(page, PAGE_SIZE, "init\n"); + case DEV_STATE_MAPPED: + /* TODO fix cli tool before changing to proper state */ + return snprintf(page, PAGE_SIZE, "open\n"); + case DEV_STATE_MAPPED_DISCONNECTED: + /* TODO fix cli tool before changing to proper state */ + return snprintf(page, PAGE_SIZE, "closed\n"); + case DEV_STATE_UNMAPPED: + return snprintf(page, PAGE_SIZE, "unmapped\n"); + default: + return snprintf(page, PAGE_SIZE, "unknown\n"); + } +} + +static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state); + +static ssize_t mapping_path_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + return scnprintf(page, PAGE_SIZE, "%s\n", dev->pathname); +} + +static struct kobj_attribute rnbd_clt_mapping_path_attr = + __ATTR_RO(mapping_path); + +static ssize_t access_mode_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + return snprintf(page, PAGE_SIZE, "%s\n", + rnbd_access_mode_str(dev->access_mode)); +} + +static struct kobj_attribute rnbd_clt_access_mode = + __ATTR_RO(access_mode); + +static ssize_t rnbd_clt_unmap_dev_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo <normal|force> > %s\n", + attr->attr.name); +} + +static ssize_t rnbd_clt_unmap_dev_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rnbd_clt_dev *dev; + char *opt, *options; + bool force; + int err; + + opt = kstrdup(buf, GFP_KERNEL); + if (!opt) + return -ENOMEM; + + options = strstrip(opt); + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + if (sysfs_streq(options, "normal")) { + force = false; + } else if (sysfs_streq(options, "force")) { + force = true; + } else { + rnbd_clt_err(dev, + "unmap_device: Invalid value: %s\n", + options); + err = -EINVAL; + goto out; + } + + rnbd_clt_info(dev, "Unmapping device, option: %s.\n", + force ? "force" : "normal"); + + /* + * We take explicit module reference only for one reason: do not + * race with lockless rnbd_destroy_sessions(). + */ + if (!try_module_get(THIS_MODULE)) { + err = -ENODEV; + goto out; + } + err = rnbd_clt_unmap_device(dev, force, &attr->attr); + if (err) { + if (err != -EALREADY) + rnbd_clt_err(dev, "unmap_device: %d\n", err); + goto module_put; + } + + /* + * Here device can be vanished! + */ + + err = count; + +module_put: + module_put(THIS_MODULE); +out: + kfree(opt); + + return err; +} + +static struct kobj_attribute rnbd_clt_unmap_device_attr = + __ATTR(unmap_device, 0644, rnbd_clt_unmap_dev_show, + rnbd_clt_unmap_dev_store); + +static ssize_t rnbd_clt_resize_dev_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + return scnprintf(page, PAGE_SIZE, + "Usage: echo <new size in sectors> > %s\n", + attr->attr.name); +} + +static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned long sectors; + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + ret = kstrtoul(buf, 0, §ors); + if (ret) + return ret; + + ret = rnbd_clt_resize_disk(dev, (size_t)sectors); + if (ret) + return ret; + + return count; +} + +static struct kobj_attribute rnbd_clt_resize_dev_attr = + __ATTR(resize, 0644, rnbd_clt_resize_dev_show, + rnbd_clt_resize_dev_store); + +static ssize_t rnbd_clt_remap_dev_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo <1> > %s\n", + attr->attr.name); +} + +static ssize_t rnbd_clt_remap_dev_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rnbd_clt_dev *dev; + char *opt, *options; + int err; + + opt = kstrdup(buf, GFP_KERNEL); + if (!opt) + return -ENOMEM; + + options = strstrip(opt); + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + if (!sysfs_streq(options, "1")) { + rnbd_clt_err(dev, + "remap_device: Invalid value: %s\n", + options); + err = -EINVAL; + goto out; + } + err = rnbd_clt_remap_device(dev); + if (likely(!err)) + err = count; + +out: + kfree(opt); + + return err; +} + +static struct kobj_attribute rnbd_clt_remap_device_attr = + __ATTR(remap_device, 0644, rnbd_clt_remap_dev_show, + rnbd_clt_remap_dev_store); + +static ssize_t session_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + return scnprintf(page, PAGE_SIZE, "%s\n", dev->sess->sessname); +} + +static struct kobj_attribute rnbd_clt_session_attr = + __ATTR_RO(session); + +static struct attribute *rnbd_dev_attrs[] = { + &rnbd_clt_unmap_device_attr.attr, + &rnbd_clt_resize_dev_attr.attr, + &rnbd_clt_remap_device_attr.attr, + &rnbd_clt_mapping_path_attr.attr, + &rnbd_clt_state_attr.attr, + &rnbd_clt_session_attr.attr, + &rnbd_clt_access_mode.attr, + NULL, +}; + +void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) +{ + /* + * The module unload rnbd_client_exit path is racing with unmapping of + * the last single device from the sysfs manually + * i.e. rnbd_clt_unmap_dev_store() leading to a sysfs warning because + * of sysfs link already was removed already. + */ + if (strlen(dev->blk_symlink_name) && try_module_get(THIS_MODULE)) { + sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name); + module_put(THIS_MODULE); + } +} + +static struct kobj_type rnbd_dev_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_attrs = rnbd_dev_attrs, +}; + +static int rnbd_clt_add_dev_kobj(struct rnbd_clt_dev *dev) +{ + int ret; + struct kobject *gd_kobj = &disk_to_dev(dev->gd)->kobj; + + ret = kobject_init_and_add(&dev->kobj, &rnbd_dev_ktype, gd_kobj, "%s", + "rnbd"); + if (ret) + rnbd_clt_err(dev, "Failed to create device sysfs dir, err: %d\n", + ret); + + return ret; +} + +static ssize_t rnbd_clt_map_device_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *page) +{ + return scnprintf(page, PAGE_SIZE, + "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n", + attr->attr.name); +} + +static int rnbd_clt_get_path_name(struct rnbd_clt_dev *dev, char *buf, + size_t len) +{ + int ret; + char pathname[NAME_MAX], *s; + + strlcpy(pathname, dev->pathname, sizeof(pathname)); + while ((s = strchr(pathname, '/'))) + s[0] = '!'; + + ret = snprintf(buf, len, "%s", pathname); + if (ret >= len) + return -ENAMETOOLONG; + + return 0; +} + +static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) +{ + struct kobject *gd_kobj = &disk_to_dev(dev->gd)->kobj; + int ret; + + ret = rnbd_clt_get_path_name(dev, dev->blk_symlink_name, + sizeof(dev->blk_symlink_name)); + if (ret) { + rnbd_clt_err(dev, "Failed to get /sys/block symlink path, err: %d\n", + ret); + goto out_err; + } + + ret = sysfs_create_link(rnbd_devs_kobj, gd_kobj, + dev->blk_symlink_name); + if (ret) { + rnbd_clt_err(dev, "Creating /sys/block symlink failed, err: %d\n", + ret); + goto out_err; + } + + return 0; + +out_err: + dev->blk_symlink_name[0] = '\0'; + return ret; +} + +static ssize_t rnbd_clt_map_device_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rnbd_clt_dev *dev; + struct rnbd_map_options opt; + int ret; + char pathname[NAME_MAX]; + char sessname[NAME_MAX]; + enum rnbd_access_mode access_mode = RNBD_ACCESS_RW; + u16 port_nr = RTRS_PORT; + + struct sockaddr_storage *addrs; + struct rtrs_addr paths[6]; + size_t path_cnt; + + opt.sessname = sessname; + opt.paths = paths; + opt.path_cnt = &path_cnt; + opt.pathname = pathname; + opt.dest_port = &port_nr; + opt.access_mode = &access_mode; + addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL); + if (!addrs) + return -ENOMEM; + + for (path_cnt = 0; path_cnt < ARRAY_SIZE(paths); path_cnt++) { + paths[path_cnt].src = &addrs[path_cnt * 2]; + paths[path_cnt].dst = &addrs[path_cnt * 2 + 1]; + } + + ret = rnbd_clt_parse_map_options(buf, ARRAY_SIZE(paths), &opt); + if (ret) + goto out; + + pr_info("Mapping device %s on session %s, (access_mode: %s)\n", + pathname, sessname, + rnbd_access_mode_str(access_mode)); + + dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname, + access_mode); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto out; + } + + ret = rnbd_clt_add_dev_kobj(dev); + if (ret) + goto unmap_dev; + + ret = rnbd_clt_add_dev_symlink(dev); + if (ret) + goto unmap_dev; + + kfree(addrs); + return count; + +unmap_dev: + rnbd_clt_unmap_device(dev, true, NULL); +out: + kfree(addrs); + return ret; +} + +static struct kobj_attribute rnbd_clt_map_device_attr = + __ATTR(map_device, 0644, + rnbd_clt_map_device_show, rnbd_clt_map_device_store); + +static struct attribute *default_attrs[] = { + &rnbd_clt_map_device_attr.attr, + NULL, +}; + +static struct attribute_group default_attr_group = { + .attrs = default_attrs, +}; + +static const struct attribute_group *default_attr_groups[] = { + &default_attr_group, + NULL, +}; + +int rnbd_clt_create_sysfs_files(void) +{ + int err; + + rnbd_dev_class = class_create(THIS_MODULE, "rnbd-client"); + if (IS_ERR(rnbd_dev_class)) + return PTR_ERR(rnbd_dev_class); + + rnbd_dev = device_create_with_groups(rnbd_dev_class, NULL, + MKDEV(0, 0), NULL, + default_attr_groups, "ctl"); + if (IS_ERR(rnbd_dev)) { + err = PTR_ERR(rnbd_dev); + goto cls_destroy; + } + rnbd_devs_kobj = kobject_create_and_add("devices", &rnbd_dev->kobj); + if (!rnbd_devs_kobj) { + err = -ENOMEM; + goto dev_destroy; + } + + return 0; + +dev_destroy: + device_destroy(rnbd_dev_class, MKDEV(0, 0)); +cls_destroy: + class_destroy(rnbd_dev_class); + + return err; +} + +void rnbd_clt_destroy_default_group(void) +{ + sysfs_remove_group(&rnbd_dev->kobj, &default_attr_group); +} + +void rnbd_clt_destroy_sysfs_files(void) +{ + kobject_del(rnbd_devs_kobj); + kobject_put(rnbd_devs_kobj); + device_destroy(rnbd_dev_class, MKDEV(0, 0)); + class_destroy(rnbd_dev_class); +} diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c new file mode 100644 index 000000000000..cc6a4e2587ae --- /dev/null +++ b/drivers/block/rnbd/rnbd-clt.c @@ -0,0 +1,1729 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RDMA Network Block Driver + * + * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. + * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. + * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. + */ + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt + +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/hdreg.h> +#include <linux/scatterlist.h> +#include <linux/idr.h> + +#include "rnbd-clt.h" + +MODULE_DESCRIPTION("RDMA Network Block Device Client"); +MODULE_LICENSE("GPL"); + +static int rnbd_client_major; +static DEFINE_IDA(index_ida); +static DEFINE_MUTEX(ida_lock); +static DEFINE_MUTEX(sess_lock); +static LIST_HEAD(sess_list); + +/* + * Maximum number of partitions an instance can have. + * 6 bits = 64 minors = 63 partitions (one minor is used for the device itself) + */ +#define RNBD_PART_BITS 6 + +static inline bool rnbd_clt_get_sess(struct rnbd_clt_session *sess) +{ + return refcount_inc_not_zero(&sess->refcount); +} + +static void free_sess(struct rnbd_clt_session *sess); + +static void rnbd_clt_put_sess(struct rnbd_clt_session *sess) +{ + might_sleep(); + + if (refcount_dec_and_test(&sess->refcount)) + free_sess(sess); +} + +static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) +{ + might_sleep(); + + if (!refcount_dec_and_test(&dev->refcount)) + return; + + mutex_lock(&ida_lock); + ida_simple_remove(&index_ida, dev->clt_device_id); + mutex_unlock(&ida_lock); + kfree(dev->hw_queues); + rnbd_clt_put_sess(dev->sess); + mutex_destroy(&dev->lock); + kfree(dev); +} + +static inline bool rnbd_clt_get_dev(struct rnbd_clt_dev *dev) +{ + return refcount_inc_not_zero(&dev->refcount); +} + +static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, + const struct rnbd_msg_open_rsp *rsp) +{ + struct rnbd_clt_session *sess = dev->sess; + + if (!rsp->logical_block_size) + return -EINVAL; + + dev->device_id = le32_to_cpu(rsp->device_id); + dev->nsectors = le64_to_cpu(rsp->nsectors); + dev->logical_block_size = le16_to_cpu(rsp->logical_block_size); + dev->physical_block_size = le16_to_cpu(rsp->physical_block_size); + dev->max_write_same_sectors = le32_to_cpu(rsp->max_write_same_sectors); + dev->max_discard_sectors = le32_to_cpu(rsp->max_discard_sectors); + dev->discard_granularity = le32_to_cpu(rsp->discard_granularity); + dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); + dev->secure_discard = le16_to_cpu(rsp->secure_discard); + dev->rotational = rsp->rotational; + + dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; + dev->max_segments = BMAX_SEGMENTS; + + dev->max_hw_sectors = min_t(u32, dev->max_hw_sectors, + le32_to_cpu(rsp->max_hw_sectors)); + dev->max_segments = min_t(u16, dev->max_segments, + le16_to_cpu(rsp->max_segments)); + + return 0; +} + +static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, + size_t new_nsectors) +{ + int err = 0; + + rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", + dev->nsectors, new_nsectors); + dev->nsectors = new_nsectors; + set_capacity(dev->gd, dev->nsectors); + err = revalidate_disk(dev->gd); + if (err) + rnbd_clt_err(dev, + "Failed to change device size from %zu to %zu, err: %d\n", + dev->nsectors, new_nsectors, err); + return err; +} + +static int process_msg_open_rsp(struct rnbd_clt_dev *dev, + struct rnbd_msg_open_rsp *rsp) +{ + int err = 0; + + mutex_lock(&dev->lock); + if (dev->dev_state == DEV_STATE_UNMAPPED) { + rnbd_clt_info(dev, + "Ignoring Open-Response message from server for unmapped device\n"); + err = -ENOENT; + goto out; + } + if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) { + u64 nsectors = le64_to_cpu(rsp->nsectors); + + /* + * If the device was remapped and the size changed in the + * meantime we need to revalidate it + */ + if (dev->nsectors != nsectors) + rnbd_clt_change_capacity(dev, nsectors); + rnbd_clt_info(dev, "Device online, device remapped successfully\n"); + } + err = rnbd_clt_set_dev_attr(dev, rsp); + if (err) + goto out; + dev->dev_state = DEV_STATE_MAPPED; + +out: + mutex_unlock(&dev->lock); + + return err; +} + +int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize) +{ + int ret = 0; + + mutex_lock(&dev->lock); + if (dev->dev_state != DEV_STATE_MAPPED) { + pr_err("Failed to set new size of the device, device is not opened\n"); + ret = -ENOENT; + goto out; + } + ret = rnbd_clt_change_capacity(dev, newsize); + +out: + mutex_unlock(&dev->lock); + + return ret; +} + +static inline void rnbd_clt_dev_requeue(struct rnbd_queue *q) +{ + if (WARN_ON(!q->hctx)) + return; + + /* We can come here from interrupt, thus async=true */ + blk_mq_run_hw_queue(q->hctx, true); +} + +enum { + RNBD_DELAY_IFBUSY = -1, +}; + +/** + * rnbd_get_cpu_qlist() - finds a list with HW queues to be rerun + * @sess: Session to find a queue for + * @cpu: Cpu to start the search from + * + * Description: + * Each CPU has a list of HW queues, which |