diff options
Diffstat (limited to 'drivers/infiniband/sw')
34 files changed, 12884 insertions, 0 deletions
diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile index 988b6a0101a4..8b095b27db87 100644 --- a/drivers/infiniband/sw/Makefile +++ b/drivers/infiniband/sw/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/ +obj-$(CONFIG_RDMA_RXE) += rxe/ diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig new file mode 100644 index 000000000000..1e4e628fe7b0 --- /dev/null +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -0,0 +1,24 @@ +config RDMA_RXE + tristate "Software RDMA over Ethernet (RoCE) driver" + depends on INET && PCI && INFINIBAND + depends on NET_UDP_TUNNEL + ---help--- + This driver implements the InfiniBand RDMA transport over + the Linux network stack. It enables a system with a + standard Ethernet adapter to interoperate with a RoCE + adapter or with another system running the RXE driver. + Documentation on InfiniBand and RoCE can be downloaded at + www.infinibandta.org and www.openfabrics.org. (See also + siw which is a similar software driver for iWARP.) + + The driver is split into two layers, one interfaces with the + Linux RDMA stack and implements a kernel or user space + verbs API. The user space verbs API requires a support + library named librxe which is loaded by the generic user + space verbs API, libibverbs. The other layer interfaces + with the Linux network stack at layer 3. + + To configure and work with soft-RoCE driver please use the + following wiki page under "configure Soft-RoCE (RXE)" section: + + https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile new file mode 100644 index 000000000000..3b3fb9d1c470 --- /dev/null +++ b/drivers/infiniband/sw/rxe/Makefile @@ -0,0 +1,24 @@ +obj-$(CONFIG_RDMA_RXE) += rdma_rxe.o + +rdma_rxe-y := \ + rxe.o \ + rxe_comp.o \ + rxe_req.o \ + rxe_resp.o \ + rxe_recv.o \ + rxe_pool.o \ + rxe_queue.o \ + rxe_verbs.o \ + rxe_av.o \ + rxe_srq.o \ + rxe_qp.o \ + rxe_cq.o \ + rxe_mr.o \ + rxe_dma.o \ + rxe_opcode.o \ + rxe_mmap.o \ + rxe_icrc.o \ + rxe_mcast.o \ + rxe_task.o \ + rxe_net.o \ + rxe_sysfs.o diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c new file mode 100644 index 000000000000..55f0e8f0ca79 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); +MODULE_DESCRIPTION("Soft RDMA transport"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.2"); + +/* free resources for all ports on a device */ +static void rxe_cleanup_ports(struct rxe_dev *rxe) +{ + kfree(rxe->port.pkey_tbl); + rxe->port.pkey_tbl = NULL; + +} + +/* free resources for a rxe device all objects created for this device must + * have been destroyed + */ +static void rxe_cleanup(struct rxe_dev *rxe) +{ + rxe_pool_cleanup(&rxe->uc_pool); + rxe_pool_cleanup(&rxe->pd_pool); + rxe_pool_cleanup(&rxe->ah_pool); + rxe_pool_cleanup(&rxe->srq_pool); + rxe_pool_cleanup(&rxe->qp_pool); + rxe_pool_cleanup(&rxe->cq_pool); + rxe_pool_cleanup(&rxe->mr_pool); + rxe_pool_cleanup(&rxe->mw_pool); + rxe_pool_cleanup(&rxe->mc_grp_pool); + rxe_pool_cleanup(&rxe->mc_elem_pool); + + rxe_cleanup_ports(rxe); +} + +/* called when all references have been dropped */ +void rxe_release(struct kref *kref) +{ + struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt); + + rxe_cleanup(rxe); + ib_dealloc_device(&rxe->ib_dev); +} + +void rxe_dev_put(struct rxe_dev *rxe) +{ + kref_put(&rxe->ref_cnt, rxe_release); +} +EXPORT_SYMBOL_GPL(rxe_dev_put); + +/* initialize rxe device parameters */ +static int rxe_init_device_param(struct rxe_dev *rxe) +{ + rxe->max_inline_data = RXE_MAX_INLINE_DATA; + + rxe->attr.fw_ver = RXE_FW_VER; + rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; + rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; + rxe->attr.vendor_id = RXE_VENDOR_ID; + rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; + rxe->attr.hw_ver = RXE_HW_VER; + rxe->attr.max_qp = RXE_MAX_QP; + rxe->attr.max_qp_wr = RXE_MAX_QP_WR; + rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; + rxe->attr.max_sge = RXE_MAX_SGE; + rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; + rxe->attr.max_cq = RXE_MAX_CQ; + rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; + rxe->attr.max_mr = RXE_MAX_MR; + rxe->attr.max_pd = RXE_MAX_PD; + rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; + rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; + rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; + rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; + rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; + rxe->attr.atomic_cap = RXE_ATOMIC_CAP; + rxe->attr.max_ee = RXE_MAX_EE; + rxe->attr.max_rdd = RXE_MAX_RDD; + rxe->attr.max_mw = RXE_MAX_MW; + rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; + rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; + rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; + rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; + rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; + rxe->attr.max_ah = RXE_MAX_AH; + rxe->attr.max_fmr = RXE_MAX_FMR; + rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; + rxe->attr.max_srq = RXE_MAX_SRQ; + rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; + rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; + rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; + rxe->attr.max_pkeys = RXE_MAX_PKEYS; + rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; + + rxe->max_ucontext = RXE_MAX_UCONTEXT; + + return 0; +} + +/* initialize port attributes */ +static int rxe_init_port_param(struct rxe_port *port) +{ + port->attr.state = RXE_PORT_STATE; + port->attr.max_mtu = RXE_PORT_MAX_MTU; + port->attr.active_mtu = RXE_PORT_ACTIVE_MTU; + port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; + port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; + port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; + port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR; + port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR; + port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN; + port->attr.lid = RXE_PORT_LID; + port->attr.sm_lid = RXE_PORT_SM_LID; + port->attr.lmc = RXE_PORT_LMC; + port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM; + port->attr.sm_sl = RXE_PORT_SM_SL; + port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT; + port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY; + port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; + port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; + port->attr.phys_state = RXE_PORT_PHYS_STATE; + port->mtu_cap = + ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU); + port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); + + return 0; +} + +/* initialize port state, note IB convention that HCA ports are always + * numbered from 1 + */ +static int rxe_init_ports(struct rxe_dev *rxe) +{ + struct rxe_port *port = &rxe->port; + + rxe_init_port_param(port); + + if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) + return -EINVAL; + + port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, + sizeof(*port->pkey_tbl), GFP_KERNEL); + + if (!port->pkey_tbl) + return -ENOMEM; + + port->pkey_tbl[0] = 0xffff; + port->port_guid = rxe->ifc_ops->port_guid(rxe); + + spin_lock_init(&port->port_lock); + + return 0; +} + +/* init pools of managed objects */ +static int rxe_init_pools(struct rxe_dev *rxe) +{ + int err; + + err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, + rxe->max_ucontext); + if (err) + goto err1; + + err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, + rxe->attr.max_pd); + if (err) + goto err2; + + err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, + rxe->attr.max_ah); + if (err) + goto err3; + + err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, + rxe->attr.max_srq); + if (err) + goto err4; + + err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, + rxe->attr.max_qp); + if (err) + goto err5; + + err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, + rxe->attr.max_cq); + if (err) + goto err6; + + err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, + rxe->attr.max_mr); + if (err) + goto err7; + + err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, + rxe->attr.max_mw); + if (err) + goto err8; + + err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, + rxe->attr.max_mcast_grp); + if (err) + goto err9; + + err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, + rxe->attr.max_total_mcast_qp_attach); + if (err) + goto err10; + + return 0; + +err10: + rxe_pool_cleanup(&rxe->mc_grp_pool); +err9: + rxe_pool_cleanup(&rxe->mw_pool); +err8: + rxe_pool_cleanup(&rxe->mr_pool); +err7: + rxe_pool_cleanup(&rxe->cq_pool); +err6: + rxe_pool_cleanup(&rxe->qp_pool); +err5: + rxe_pool_cleanup(&rxe->srq_pool); +err4: + rxe_pool_cleanup(&rxe->ah_pool); +err3: + rxe_pool_cleanup(&rxe->pd_pool); +err2: + rxe_pool_cleanup(&rxe->uc_pool); +err1: + return err; +} + +/* initialize rxe device state */ +static int rxe_init(struct rxe_dev *rxe) +{ + int err; + + /* init default device parameters */ + rxe_init_device_param(rxe); + + err = rxe_init_ports(rxe); + if (err) + goto err1; + + err = rxe_init_pools(rxe); + if (err) + goto err2; + + /* init pending mmap list */ + spin_lock_init(&rxe->mmap_offset_lock); + spin_lock_init(&rxe->pending_lock); + INIT_LIST_HEAD(&rxe->pending_mmaps); + INIT_LIST_HEAD(&rxe->list); + + mutex_init(&rxe->usdev_lock); + + return 0; + +err2: + rxe_cleanup_ports(rxe); +err1: + return err; +} + +int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) +{ + struct rxe_port *port = &rxe->port; + enum ib_mtu mtu; + + mtu = eth_mtu_int_to_enum(ndev_mtu); + + /* Make sure that new MTU in range */ + mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256; + + port->attr.active_mtu = mtu; + port->mtu_cap = ib_mtu_enum_to_int(mtu); + + return 0; +} +EXPORT_SYMBOL(rxe_set_mtu); + +/* called by ifc layer to create new rxe device. + * The caller should allocate memory for rxe by calling ib_alloc_device. + */ +int rxe_add(struct rxe_dev *rxe, unsigned int mtu) +{ + int err; + + kref_init(&rxe->ref_cnt); + + err = rxe_init(rxe); + if (err) + goto err1; + + err = rxe_set_mtu(rxe, mtu); + if (err) + goto err1; + + err = rxe_register_device(rxe); + if (err) + goto err1; + + return 0; + +err1: + rxe_dev_put(rxe); + return err; +} +EXPORT_SYMBOL(rxe_add); + +/* called by the ifc layer to remove a device */ +void rxe_remove(struct rxe_dev *rxe) +{ + rxe_unregister_device(rxe); + + rxe_dev_put(rxe); +} +EXPORT_SYMBOL(rxe_remove); + +static int __init rxe_module_init(void) +{ + int err; + + /* initialize slab caches for managed objects */ + err = rxe_cache_init(); + if (err) { + pr_err("rxe: unable to init object pools\n"); + return err; + } + + err = rxe_net_init(); + if (err) { + pr_err("rxe: unable to init\n"); + rxe_cache_exit(); + return err; + } + pr_info("rxe: loaded\n"); + + return 0; +} + +static void __exit rxe_module_exit(void) +{ + rxe_remove_all(); + rxe_net_exit(); + rxe_cache_exit(); + + pr_info("rxe: unloaded\n"); +} + +module_init(rxe_module_init); +module_exit(rxe_module_exit); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h new file mode 100644 index 000000000000..12c71c549f97 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_H +#define RXE_H + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/crc32.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_pack.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_addr.h> + +#include "rxe_net.h" +#include "rxe_opcode.h" +#include "rxe_hdr.h" +#include "rxe_param.h" +#include "rxe_verbs.h" + +#define RXE_UVERBS_ABI_VERSION (1) + +#define IB_PHYS_STATE_LINK_UP (5) +#define IB_PHYS_STATE_LINK_DOWN (3) + +#define RXE_ROCE_V2_SPORT (0xc000) + +int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); + +int rxe_add(struct rxe_dev *rxe, unsigned int mtu); +void rxe_remove(struct rxe_dev *rxe); +void rxe_remove_all(void); + +int rxe_rcv(struct sk_buff *skb); + +void rxe_dev_put(struct rxe_dev *rxe); +struct rxe_dev *net_to_rxe(struct net_device *ndev); +struct rxe_dev *get_rxe_by_name(const char* name); + +void rxe_port_up(struct rxe_dev *rxe); +void rxe_port_down(struct rxe_dev *rxe); + +#endif /* RXE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c new file mode 100644 index 000000000000..5c9474212d4e --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) +{ + struct rxe_port *port; + + if (attr->port_num != 1) { + pr_info("rxe: invalid port_num = %d\n", attr->port_num); + return -EINVAL; + } + + port = &rxe->port; + + if (attr->ah_flags & IB_AH_GRH) { + if (attr->grh.sgid_index > port->attr.gid_tbl_len) { + pr_info("rxe: invalid sgid index = %d\n", + attr->grh.sgid_index); + return -EINVAL; + } + } + + return 0; +} + +int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num, + struct rxe_av *av, struct ib_ah_attr *attr) +{ + memset(av, 0, sizeof(*av)); + memcpy(&av->grh, &attr->grh, sizeof(attr->grh)); + av->port_num = port_num; + return 0; +} + +int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av, + struct ib_ah_attr *attr) +{ + memcpy(&attr->grh, &av->grh, sizeof(av->grh)); + attr->port_num = av->port_num; + return 0; +} + +int rxe_av_fill_ip_info(struct rxe_dev *rxe, + struct rxe_av *av, + struct ib_ah_attr *attr, + struct ib_gid_attr *sgid_attr, + union ib_gid *sgid) +{ + rdma_gid2ip(&av->sgid_addr._sockaddr, sgid); + rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid); + av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); + + return 0; +} + +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +{ + if (!pkt || !pkt->qp) + return NULL; + + if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) + return &pkt->qp->pri_av; + + return (pkt->wqe) ? &pkt->wqe->av : NULL; +} diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c new file mode 100644 index 000000000000..36f67de44095 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" +#include "rxe_task.h" + +enum comp_state { + COMPST_GET_ACK, + COMPST_GET_WQE, + COMPST_COMP_WQE, + COMPST_COMP_ACK, + COMPST_CHECK_PSN, + COMPST_CHECK_ACK, + COMPST_READ, + COMPST_ATOMIC, + COMPST_WRITE_SEND, + COMPST_UPDATE_COMP, + COMPST_ERROR_RETRY, + COMPST_RNR_RETRY, + COMPST_ERROR, + COMPST_EXIT, /* We have an issue, and we want to rerun the completer */ + COMPST_DONE, /* The completer finished successflly */ +}; + +static char *comp_state_name[] = { + [COMPST_GET_ACK] = "GET ACK", + [COMPST_GET_WQE] = "GET WQE", + [COMPST_COMP_WQE] = "COMP WQE", + [COMPST_COMP_ACK] = "COMP ACK", + [COMPST_CHECK_PSN] = "CHECK PSN", + [COMPST_CHECK_ACK] = "CHECK ACK", + [COMPST_READ] = "READ", + [COMPST_ATOMIC] = "ATOMIC", + [COMPST_WRITE_SEND] = "WRITE/SEND", + [COMPST_UPDATE_COMP] = "UPDATE COMP", + [COMPST_ERROR_RETRY] = "ERROR RETRY", + [COMPST_RNR_RETRY] = "RNR RETRY", + [COMPST_ERROR] = "ERROR", + [COMPST_EXIT] = "EXIT", + [COMPST_DONE] = "DONE", +}; + +static unsigned long rnrnak_usec[32] = { + [IB_RNR_TIMER_655_36] = 655360, + [IB_RNR_TIMER_000_01] = 10, + [IB_RNR_TIMER_000_02] = 20, + [IB_RNR_TIMER_000_03] = 30, + [IB_RNR_TIMER_000_04] = 40, + [IB_RNR_TIMER_000_06] = 60, + [IB_RNR_TIMER_000_08] = 80, + [IB_RNR_TIMER_000_12] = 120, + [IB_RNR_TIMER_000_16] = 160, + [IB_RNR_TIMER_000_24] = 240, + [IB_RNR_TIMER_000_32] = 320, + [IB_RNR_TIMER_000_48] = 480, + [IB_RNR_TIMER_000_64] = 640, + [IB_RNR_TIMER_000_96] = 960, + [IB_RNR_TIMER_001_28] = 1280, + [IB_RNR_TIMER_001_92] = 1920, + [IB_RNR_TIMER_002_56] = 2560, + [IB_RNR_TIMER_003_84] = 3840, + [IB_RNR_TIMER_005_12] = 5120, + [IB_RNR_TIMER_007_68] = 7680, + [IB_RNR_TIMER_010_24] = 10240, + [IB_RNR_TIMER_015_36] = 15360, + [IB_RNR_TIMER_020_48] = 20480, + [IB_RNR_TIMER_030_72] = 30720, + [IB_RNR_TIMER_040_96] = 40960, + [IB_RNR_TIMER_061_44] = 61410, + [IB_RNR_TIMER_081_92] = 81920, + [IB_RNR_TIMER_122_88] = 122880, + [IB_RNR_TIMER_163_84] = 163840, + [IB_RNR_TIMER_245_76] = 245760, + [IB_RNR_TIMER_327_68] = 327680, + [IB_RNR_TIMER_491_52] = 491520, +}; + +static inline unsigned long rnrnak_jiffies(u8 timeout) +{ + return max_t(unsigned long, + usecs_to_jiffies(rnrnak_usec[timeout]), 1); +} + +static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE; + case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE; + case IB_WR_SEND: return IB_WC_SEND; + case IB_WR_SEND_WITH_IMM: return IB_WC_SEND; + case IB_WR_RDMA_READ: return IB_WC_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP; + case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD; + case IB_WR_LSO: return IB_WC_LSO; + case IB_WR_SEND_WITH_INV: return IB_WC_SEND; + case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; + case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; + case IB_WR_REG_MR: return IB_WC_REG_MR; + + default: + return 0xff; + } +} + +void retransmit_timer(unsigned long data) +{ + struct rxe_qp *qp = (struct rxe_qp *)data; + + if (qp->valid) { + qp->comp.timeout = 1; + rxe_run_task(&qp->comp.task, 1); + } +} + +void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, + struct sk_buff *skb) +{ + int must_sched; + + skb_queue_tail(&qp->resp_pkts, skb); + + must_sched = skb_queue_len(&qp->resp_pkts) > 1; + rxe_run_task(&qp->comp.task, must_sched); +} + +static inline enum comp_state get_wqe(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe **wqe_p) +{ + struct rxe_send_wqe *wqe; + + /* we come here whether or not we found a response packet to see if + * there are any posted WQEs + */ + wqe = queue_head(qp->sq.queue); + *wqe_p = wqe; + + /* no WQE or requester has not started it yet */ + if (!wqe || wqe->state == wqe_state_posted) + return pkt ? COMPST_DONE : COMPST_EXIT; + + /* WQE does not require an ack */ + if (wqe->state == wqe_state_done) + return COMPST_COMP_WQE; + + /* WQE caused an error */ + if (wqe->state == wqe_state_error) + return COMPST_ERROR; + + /* we have a WQE, if we also have an ack check its PSN */ + return pkt ? COMPST_CHECK_PSN : COMPST_EXIT; +} + +static inline void reset_retry_counters(struct rxe_qp *qp) +{ + qp->comp.retry_cnt = qp->attr.retry_cnt; + qp->comp.rnr_retry = qp->attr.rnr_retry; +} + +static inline enum comp_state check_psn(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + s32 diff; + + /* check to see if response is past the oldest WQE. if it is, complete + * send/write or error read/atomic + */ + diff = psn_compare(pkt->psn, wqe->last_psn); + if (diff > 0) { + if (wqe->state == wqe_state_pending) { + if (wqe->mask & WR_ATOMIC_OR_READ_MASK) + return COMPST_ERROR_RETRY; + + reset_retry_counters(qp); + return COMPST_COMP_WQE; + } else { + return COMPST_DONE; + } + } + + /* compare response packet to expected response */ + diff = psn_compare(pkt->psn, qp->comp.psn); + if (diff < 0) { + /* response is most likely a retried packet if it matches an + * uncompleted WQE go complete it else ignore it + */ + if (pkt->psn == wqe->last_psn) + return COMPST_COMP_ACK; + els |