From 8700e3e7c4857d28ebaa824509934556da0b3e76 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 16 Jun 2016 16:45:23 +0300 Subject: Soft RoCE driver Soft RoCE (RXE) - The software RoCE driver ib_rxe implements the RDMA transport and registers to the RDMA core device as a kernel verbs provider. It also implements the packet IO layer. On the other hand ib_rxe registers to the Linux netdev stack as a udp encapsulating protocol, in that case RDMA, for sending and receiving packets over any Ethernet device. This yields a RDMA transport over the UDP/Ethernet network layer forming a RoCEv2 compatible device. The configuration procedure of the Soft RoCE drivers requires binding to any existing Ethernet network device. This is done with /sys interface. A userspace Soft RoCE library (librxe) provides user applications the ability to run with Soft RoCE devices. The use of rxe verbs ins user space requires the inclusion of librxe as a device specifics plug-in to libibverbs. librxe is packaged separately. Architecture: +-----------------------------------------------------------+ | Application | +-----------------------------------------------------------+ +-----------------------------------+ | libibverbs | User +-----------------------------------+ +----------------+ +----------------+ | librxe | | HW RoCE lib | +----------------+ +----------------+ +---------------------------------------------------------------+ +--------------+ +------------+ | Sockets | | RDMA ULP | +--------------+ +------------+ +--------------+ +---------------------+ | TCP/IP | | ib_core | +--------------+ +---------------------+ +------------+ +----------------+ Kernel | ib_rxe | | HW RoCE driver | +------------+ +----------------+ +------------------------------------+ | NIC driver | +------------------------------------+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-----------------------------------------------------------+ | Application | +-----------------------------------------------------------+ +-----------------------------------+ | libibverbs | User +-----------------------------------+ +----------------+ +----------------+ | librxe | | HW RoCE lib | +----------------+ +----------------+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------+ +------------+ | Sockets | | RDMA ULP | +--------------+ +------------+ +--------------+ +---------------------+ | TCP/IP | | ib_core | +--------------+ +---------------------+ +------------+ +----------------+ Kernel | ib_rxe | | HW RoCE driver | +------------+ +----------------+ +------------------------------------+ | NIC driver | +------------------------------------+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Soft RoCE resources: [1[ https://github.com/SoftRoCE/librxe-dev librxe - source code in Github [2] https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home - Soft RoCE Wiki page [3] https://github.com/SoftRoCE/librxe-dev - Soft RoCE userspace library Signed-off-by: Kamal Heib Signed-off-by: Amir Vadai Signed-off-by: Moni Shoua Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford --- MAINTAINERS | 9 + drivers/infiniband/Kconfig | 1 + drivers/infiniband/sw/Makefile | 1 + drivers/infiniband/sw/rxe/Kconfig | 24 + drivers/infiniband/sw/rxe/Makefile | 24 + drivers/infiniband/sw/rxe/rxe.c | 386 +++++++++ drivers/infiniband/sw/rxe/rxe.h | 77 ++ drivers/infiniband/sw/rxe/rxe_av.c | 98 +++ drivers/infiniband/sw/rxe/rxe_comp.c | 734 +++++++++++++++++ drivers/infiniband/sw/rxe/rxe_cq.c | 165 ++++ drivers/infiniband/sw/rxe/rxe_dma.c | 166 ++++ drivers/infiniband/sw/rxe/rxe_hdr.h | 952 ++++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_icrc.c | 96 +++ drivers/infiniband/sw/rxe/rxe_loc.h | 286 +++++++ drivers/infiniband/sw/rxe/rxe_mcast.c | 190 +++++ drivers/infiniband/sw/rxe/rxe_mmap.c | 173 ++++ drivers/infiniband/sw/rxe/rxe_mr.c | 643 +++++++++++++++ drivers/infiniband/sw/rxe/rxe_net.c | 708 ++++++++++++++++ drivers/infiniband/sw/rxe/rxe_net.h | 53 ++ drivers/infiniband/sw/rxe/rxe_opcode.c | 961 ++++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_opcode.h | 129 +++ drivers/infiniband/sw/rxe/rxe_param.h | 172 ++++ drivers/infiniband/sw/rxe/rxe_pool.c | 502 ++++++++++++ drivers/infiniband/sw/rxe/rxe_pool.h | 163 ++++ drivers/infiniband/sw/rxe/rxe_qp.c | 851 ++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_queue.c | 217 +++++ drivers/infiniband/sw/rxe/rxe_queue.h | 178 ++++ drivers/infiniband/sw/rxe/rxe_recv.c | 420 ++++++++++ drivers/infiniband/sw/rxe/rxe_req.c | 726 +++++++++++++++++ drivers/infiniband/sw/rxe/rxe_resp.c | 1380 ++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_srq.c | 193 +++++ drivers/infiniband/sw/rxe/rxe_sysfs.c | 157 ++++ drivers/infiniband/sw/rxe/rxe_task.c | 154 ++++ drivers/infiniband/sw/rxe/rxe_task.h | 95 +++ drivers/infiniband/sw/rxe/rxe_verbs.c | 1330 ++++++++++++++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_verbs.h | 480 +++++++++++ include/uapi/rdma/Kbuild | 1 + include/uapi/rdma/rdma_user_rxe.h | 144 ++++ 38 files changed, 13039 insertions(+) create mode 100644 drivers/infiniband/sw/rxe/Kconfig create mode 100644 drivers/infiniband/sw/rxe/Makefile create mode 100644 drivers/infiniband/sw/rxe/rxe.c create mode 100644 drivers/infiniband/sw/rxe/rxe.h create mode 100644 drivers/infiniband/sw/rxe/rxe_av.c create mode 100644 drivers/infiniband/sw/rxe/rxe_comp.c create mode 100644 drivers/infiniband/sw/rxe/rxe_cq.c create mode 100644 drivers/infiniband/sw/rxe/rxe_dma.c create mode 100644 drivers/infiniband/sw/rxe/rxe_hdr.h create mode 100644 drivers/infiniband/sw/rxe/rxe_icrc.c create mode 100644 drivers/infiniband/sw/rxe/rxe_loc.h create mode 100644 drivers/infiniband/sw/rxe/rxe_mcast.c create mode 100644 drivers/infiniband/sw/rxe/rxe_mmap.c create mode 100644 drivers/infiniband/sw/rxe/rxe_mr.c create mode 100644 drivers/infiniband/sw/rxe/rxe_net.c create mode 100644 drivers/infiniband/sw/rxe/rxe_net.h create mode 100644 drivers/infiniband/sw/rxe/rxe_opcode.c create mode 100644 drivers/infiniband/sw/rxe/rxe_opcode.h create mode 100644 drivers/infiniband/sw/rxe/rxe_param.h create mode 100644 drivers/infiniband/sw/rxe/rxe_pool.c create mode 100644 drivers/infiniband/sw/rxe/rxe_pool.h create mode 100644 drivers/infiniband/sw/rxe/rxe_qp.c create mode 100644 drivers/infiniband/sw/rxe/rxe_queue.c create mode 100644 drivers/infiniband/sw/rxe/rxe_queue.h create mode 100644 drivers/infiniband/sw/rxe/rxe_recv.c create mode 100644 drivers/infiniband/sw/rxe/rxe_req.c create mode 100644 drivers/infiniband/sw/rxe/rxe_resp.c create mode 100644 drivers/infiniband/sw/rxe/rxe_srq.c create mode 100644 drivers/infiniband/sw/rxe/rxe_sysfs.c create mode 100644 drivers/infiniband/sw/rxe/rxe_task.c create mode 100644 drivers/infiniband/sw/rxe/rxe_task.h create mode 100644 drivers/infiniband/sw/rxe/rxe_verbs.c create mode 100644 drivers/infiniband/sw/rxe/rxe_verbs.h create mode 100644 include/uapi/rdma/rdma_user_rxe.h diff --git a/MAINTAINERS b/MAINTAINERS index e1b090f86e0d..4daa6712eac1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7444,6 +7444,15 @@ W: http://www.mellanox.com Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlxsw/ +SOFT-ROCE DRIVER (rxe) +M: Moni Shoua +L: linux-rdma@vger.kernel.org +S: Supported +W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home +Q: http://patchwork.kernel.org/project/linux-rdma/list/ +F: drivers/infiniband/hw/rxe/ +F: include/uapi/rdma/rdma_user_rxe.h + MEMBARRIER SUPPORT M: Mathieu Desnoyers M: "Paul E. McKenney" diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 2137adfbd8c3..e9b7dc037ff8 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -84,6 +84,7 @@ source "drivers/infiniband/ulp/iser/Kconfig" source "drivers/infiniband/ulp/isert/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +source "drivers/infiniband/sw/rxe/Kconfig" source "drivers/infiniband/hw/hfi1/Kconfig" diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile index 988b6a0101a4..8b095b27db87 100644 --- a/drivers/infiniband/sw/Makefile +++ b/drivers/infiniband/sw/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/ +obj-$(CONFIG_RDMA_RXE) += rxe/ diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig new file mode 100644 index 000000000000..1e4e628fe7b0 --- /dev/null +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -0,0 +1,24 @@ +config RDMA_RXE + tristate "Software RDMA over Ethernet (RoCE) driver" + depends on INET && PCI && INFINIBAND + depends on NET_UDP_TUNNEL + ---help--- + This driver implements the InfiniBand RDMA transport over + the Linux network stack. It enables a system with a + standard Ethernet adapter to interoperate with a RoCE + adapter or with another system running the RXE driver. + Documentation on InfiniBand and RoCE can be downloaded at + www.infinibandta.org and www.openfabrics.org. (See also + siw which is a similar software driver for iWARP.) + + The driver is split into two layers, one interfaces with the + Linux RDMA stack and implements a kernel or user space + verbs API. The user space verbs API requires a support + library named librxe which is loaded by the generic user + space verbs API, libibverbs. The other layer interfaces + with the Linux network stack at layer 3. + + To configure and work with soft-RoCE driver please use the + following wiki page under "configure Soft-RoCE (RXE)" section: + + https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile new file mode 100644 index 000000000000..3b3fb9d1c470 --- /dev/null +++ b/drivers/infiniband/sw/rxe/Makefile @@ -0,0 +1,24 @@ +obj-$(CONFIG_RDMA_RXE) += rdma_rxe.o + +rdma_rxe-y := \ + rxe.o \ + rxe_comp.o \ + rxe_req.o \ + rxe_resp.o \ + rxe_recv.o \ + rxe_pool.o \ + rxe_queue.o \ + rxe_verbs.o \ + rxe_av.o \ + rxe_srq.o \ + rxe_qp.o \ + rxe_cq.o \ + rxe_mr.o \ + rxe_dma.o \ + rxe_opcode.o \ + rxe_mmap.o \ + rxe_icrc.o \ + rxe_mcast.o \ + rxe_task.o \ + rxe_net.o \ + rxe_sysfs.o diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c new file mode 100644 index 000000000000..55f0e8f0ca79 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); +MODULE_DESCRIPTION("Soft RDMA transport"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.2"); + +/* free resources for all ports on a device */ +static void rxe_cleanup_ports(struct rxe_dev *rxe) +{ + kfree(rxe->port.pkey_tbl); + rxe->port.pkey_tbl = NULL; + +} + +/* free resources for a rxe device all objects created for this device must + * have been destroyed + */ +static void rxe_cleanup(struct rxe_dev *rxe) +{ + rxe_pool_cleanup(&rxe->uc_pool); + rxe_pool_cleanup(&rxe->pd_pool); + rxe_pool_cleanup(&rxe->ah_pool); + rxe_pool_cleanup(&rxe->srq_pool); + rxe_pool_cleanup(&rxe->qp_pool); + rxe_pool_cleanup(&rxe->cq_pool); + rxe_pool_cleanup(&rxe->mr_pool); + rxe_pool_cleanup(&rxe->mw_pool); + rxe_pool_cleanup(&rxe->mc_grp_pool); + rxe_pool_cleanup(&rxe->mc_elem_pool); + + rxe_cleanup_ports(rxe); +} + +/* called when all references have been dropped */ +void rxe_release(struct kref *kref) +{ + struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt); + + rxe_cleanup(rxe); + ib_dealloc_device(&rxe->ib_dev); +} + +void rxe_dev_put(struct rxe_dev *rxe) +{ + kref_put(&rxe->ref_cnt, rxe_release); +} +EXPORT_SYMBOL_GPL(rxe_dev_put); + +/* initialize rxe device parameters */ +static int rxe_init_device_param(struct rxe_dev *rxe) +{ + rxe->max_inline_data = RXE_MAX_INLINE_DATA; + + rxe->attr.fw_ver = RXE_FW_VER; + rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; + rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; + rxe->attr.vendor_id = RXE_VENDOR_ID; + rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; + rxe->attr.hw_ver = RXE_HW_VER; + rxe->attr.max_qp = RXE_MAX_QP; + rxe->attr.max_qp_wr = RXE_MAX_QP_WR; + rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; + rxe->attr.max_sge = RXE_MAX_SGE; + rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; + rxe->attr.max_cq = RXE_MAX_CQ; + rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; + rxe->attr.max_mr = RXE_MAX_MR; + rxe->attr.max_pd = RXE_MAX_PD; + rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; + rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; + rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; + rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; + rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; + rxe->attr.atomic_cap = RXE_ATOMIC_CAP; + rxe->attr.max_ee = RXE_MAX_EE; + rxe->attr.max_rdd = RXE_MAX_RDD; + rxe->attr.max_mw = RXE_MAX_MW; + rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; + rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; + rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; + rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; + rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; + rxe->attr.max_ah = RXE_MAX_AH; + rxe->attr.max_fmr = RXE_MAX_FMR; + rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; + rxe->attr.max_srq = RXE_MAX_SRQ; + rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; + rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; + rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; + rxe->attr.max_pkeys = RXE_MAX_PKEYS; + rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; + + rxe->max_ucontext = RXE_MAX_UCONTEXT; + + return 0; +} + +/* initialize port attributes */ +static int rxe_init_port_param(struct rxe_port *port) +{ + port->attr.state = RXE_PORT_STATE; + port->attr.max_mtu = RXE_PORT_MAX_MTU; + port->attr.active_mtu = RXE_PORT_ACTIVE_MTU; + port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; + port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; + port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; + port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR; + port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR; + port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN; + port->attr.lid = RXE_PORT_LID; + port->attr.sm_lid = RXE_PORT_SM_LID; + port->attr.lmc = RXE_PORT_LMC; + port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM; + port->attr.sm_sl = RXE_PORT_SM_SL; + port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT; + port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY; + port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; + port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; + port->attr.phys_state = RXE_PORT_PHYS_STATE; + port->mtu_cap = + ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU); + port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); + + return 0; +} + +/* initialize port state, note IB convention that HCA ports are always + * numbered from 1 + */ +static int rxe_init_ports(struct rxe_dev *rxe) +{ + struct rxe_port *port = &rxe->port; + + rxe_init_port_param(port); + + if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) + return -EINVAL; + + port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, + sizeof(*port->pkey_tbl), GFP_KERNEL); + + if (!port->pkey_tbl) + return -ENOMEM; + + port->pkey_tbl[0] = 0xffff; + port->port_guid = rxe->ifc_ops->port_guid(rxe); + + spin_lock_init(&port->port_lock); + + return 0; +} + +/* init pools of managed objects */ +static int rxe_init_pools(struct rxe_dev *rxe) +{ + int err; + + err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, + rxe->max_ucontext); + if (err) + goto err1; + + err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, + rxe->attr.max_pd); + if (err) + goto err2; + + err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, + rxe->attr.max_ah); + if (err) + goto err3; + + err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, + rxe->attr.max_srq); + if (err) + goto err4; + + err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, + rxe->attr.max_qp); + if (err) + goto err5; + + err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, + rxe->attr.max_cq); + if (err) + goto err6; + + err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, + rxe->attr.max_mr); + if (err) + goto err7; + + err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, + rxe->attr.max_mw); + if (err) + goto err8; + + err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, + rxe->attr.max_mcast_grp); + if (err) + goto err9; + + err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, + rxe->attr.max_total_mcast_qp_attach); + if (err) + goto err10; + + return 0; + +err10: + rxe_pool_cleanup(&rxe->mc_grp_pool); +err9: + rxe_pool_cleanup(&rxe->mw_pool); +err8: + rxe_pool_cleanup(&rxe->mr_pool); +err7: + rxe_pool_cleanup(&rxe->cq_pool); +err6: + rxe_pool_cleanup(&rxe->qp_pool); +err5: + rxe_pool_cleanup(&rxe->srq_pool); +err4: + rxe_pool_cleanup(&rxe->ah_pool); +err3: + rxe_pool_cleanup(&rxe->pd_pool); +err2: + rxe_pool_cleanup(&rxe->uc_pool); +err1: + return err; +} + +/* initialize rxe device state */ +static int rxe_init(struct rxe_dev *rxe) +{ + int err; + + /* init default device parameters */ + rxe_init_device_param(rxe); + + err = rxe_init_ports(rxe); + if (err) + goto err1; + + err = rxe_init_pools(rxe); + if (err) + goto err2; + + /* init pending mmap list */ + spin_lock_init(&rxe->mmap_offset_lock); + spin_lock_init(&rxe->pending_lock); + INIT_LIST_HEAD(&rxe->pending_mmaps); + INIT_LIST_HEAD(&rxe->list); + + mutex_init(&rxe->usdev_lock); + + return 0; + +err2: + rxe_cleanup_ports(rxe); +err1: + return err; +} + +int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) +{ + struct rxe_port *port = &rxe->port; + enum ib_mtu mtu; + + mtu = eth_mtu_int_to_enum(ndev_mtu); + + /* Make sure that new MTU in range */ + mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256; + + port->attr.active_mtu = mtu; + port->mtu_cap = ib_mtu_enum_to_int(mtu); + + return 0; +} +EXPORT_SYMBOL(rxe_set_mtu); + +/* called by ifc layer to create new rxe device. + * The caller should allocate memory for rxe by calling ib_alloc_device. + */ +int rxe_add(struct rxe_dev *rxe, unsigned int mtu) +{ + int err; + + kref_init(&rxe->ref_cnt); + + err = rxe_init(rxe); + if (err) + goto err1; + + err = rxe_set_mtu(rxe, mtu); + if (err) + goto err1; + + err = rxe_register_device(rxe); + if (err) + goto err1; + + return 0; + +err1: + rxe_dev_put(rxe); + return err; +} +EXPORT_SYMBOL(rxe_add); + +/* called by the ifc layer to remove a device */ +void rxe_remove(struct rxe_dev *rxe) +{ + rxe_unregister_device(rxe); + + rxe_dev_put(rxe); +} +EXPORT_SYMBOL(rxe_remove); + +static int __init rxe_module_init(void) +{ + int err; + + /* initialize slab caches for managed objects */ + err = rxe_cache_init(); + if (err) { + pr_err("rxe: unable to init object pools\n"); + return err; + } + + err = rxe_net_init(); + if (err) { + pr_err("rxe: unable to init\n"); + rxe_cache_exit(); + return err; + } + pr_info("rxe: loaded\n"); + + return 0; +} + +static void __exit rxe_module_exit(void) +{ + rxe_remove_all(); + rxe_net_exit(); + rxe_cache_exit(); + + pr_info("rxe: unloaded\n"); +} + +module_init(rxe_module_init); +module_exit(rxe_module_exit); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h new file mode 100644 index 000000000000..12c71c549f97 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_H +#define RXE_H + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "rxe_net.h" +#include "rxe_opcode.h" +#include "rxe_hdr.h" +#include "rxe_param.h" +#include "rxe_verbs.h" + +#define RXE_UVERBS_ABI_VERSION (1) + +#define IB_PHYS_STATE_LINK_UP (5) +#define IB_PHYS_STATE_LINK_DOWN (3) + +#define RXE_ROCE_V2_SPORT (0xc000) + +int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); + +int rxe_add(struct rxe_dev *rxe, unsigned int mtu); +void rxe_remove(struct rxe_dev *rxe); +void rxe_remove_all(void); + +int rxe_rcv(struct sk_buff *skb); + +void rxe_dev_put(struct rxe_dev *rxe); +struct rxe_dev *net_to_rxe(struct net_device *ndev); +struct rxe_dev *get_rxe_by_name(const char* name); + +void rxe_port_up(struct rxe_dev *rxe); +void rxe_port_down(struct rxe_dev *rxe); + +#endif /* RXE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c new file mode 100644 index 000000000000..5c9474212d4e --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) +{ + struct rxe_port *port; + + if (attr->port_num != 1) { + pr_info("rxe: invalid port_num = %d\n", attr->port_num); + return -EINVAL; + } + + port = &rxe->port; + + if (attr->ah_flags & IB_AH_GRH) { + if (attr->grh.sgid_index > port->attr.gid_tbl_len) { + pr_info("rxe: invalid sgid index = %d\n", + attr->grh.sgid_index); + return -EINVAL; + } + } + + return 0; +} + +int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num, + struct rxe_av *av, struct ib_ah_attr *attr) +{ + memset(av, 0, sizeof(*av)); + memcpy(&av->grh, &attr->grh, sizeof(attr->grh)); + av->port_num = port_num; + return 0; +} + +int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av, + struct ib_ah_attr *attr) +{ + memcpy(&attr->grh, &av->grh, sizeof(av->grh)); + attr->port_num = av->port_num; + return 0; +} + +int rxe_av_fill_ip_info(struct rxe_dev *rxe, + struct rxe_av *av, + struct ib_ah_attr *attr, + struct ib_gid_attr *sgid_attr, + union ib_gid *sgid) +{ + rdma_gid2ip(&av->sgid_addr._sockaddr, sgid); + rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid); + av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); + + return 0; +} + +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +{ + if (!pkt || !pkt->qp) + return NULL; + + if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) + return &pkt->qp->pri_av; + + return (pkt->wqe) ? &pkt->wqe->av : NULL; +} diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c new file mode 100644 index 000000000000..36f67de44095 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" +#include "rxe_task.h" + +enum comp_state { + COMPST_GET_ACK, + COMPST_GET_WQE, + COMPST_COMP_WQE, + COMPST_COMP_ACK, + COMPST_CHECK_PSN, + COMPST_CHECK_ACK, + COMPST_READ, + COMPST_ATOMIC, + COMPST_WRITE_SEND, + COMPST_UPDATE_COMP, + COMPST_ERROR_RETRY, + COMPST_RNR_RETRY, + COMPST_ERROR, + COMPST_EXIT, /* We have an issue, and we want to rerun the completer */ + COMPST_DONE, /* The completer finished successflly */ +}; + +static char *comp_state_name[] = { + [COMPST_GET_ACK] = "GET ACK", + [COMPST_GET_WQE] = "GET WQE", + [COMPST_COMP_WQE] = "COMP WQE", + [COMPST_COMP_ACK] = "COMP ACK", + [COMPST_CHECK_PSN] = "CHECK PSN", + [COMPST_CHECK_ACK] = "CHECK ACK", + [COMPST_READ] = "READ", + [COMPST_ATOMIC] = "ATOMIC", + [COMPST_WRITE_SEND] = "WRITE/SEND", + [COMPST_UPDATE_COMP] = "UPDATE COMP", + [COMPST_ERROR_RETRY] = "ERROR RETRY", + [COMPST_RNR_RETRY] = "RNR RETRY", + [COMPST_ERROR] = "ERROR", + [COMPST_EXIT] = "EXIT", + [COMPST_DONE] = "DONE", +}; + +static unsigned long rnrnak_usec[32] = { + [IB_RNR_TIMER_655_36] = 655360, + [IB_RNR_TIMER_000_01] = 10, + [IB_RNR_TIMER_000_02] = 20, + [IB_RNR_TIMER_000_03] = 30, + [IB_RNR_TIMER_000_04] = 40, + [IB_RNR_TIMER_000_06] = 60, + [IB_RNR_TIMER_000_08] = 80, + [IB_RNR_TIMER_000_12] = 120, + [IB_RNR_TIMER_000_16] = 160, + [IB_RNR_TIMER_000_24] = 240, + [IB_RNR_TIMER_000_32] = 320, + [IB_RNR_TIMER_000_48] = 480, + [IB_RNR_TIMER_000_64] = 640, + [IB_RNR_TIMER_000_96] = 960, + [IB_RNR_TIMER_001_28] = 1280, + [IB_RNR_TIMER_001_92] = 1920, + [IB_RNR_TIMER_002_56] = 2560, + [IB_RNR_TIMER_003_84] = 3840, + [IB_RNR_TIMER_005_12] = 5120, + [IB_RNR_TIMER_007_68] = 7680, + [IB_RNR_TIMER_010_24] = 10240, + [IB_RNR_TIMER_015_36] = 15360, + [IB_RNR_TIMER_020_48] = 20480, + [IB_RNR_TIMER_030_72] = 30720, + [IB_RNR_TIMER_040_96] = 40960, + [IB_RNR_TIMER_061_44] = 61410, + [IB_RNR_TIMER_081_92] = 81920, + [IB_RNR_TIMER_122_88] = 122880, + [IB_RNR_TIMER_163_84] = 163840, + [IB_RNR_TIMER_245_76] = 245760, + [IB_RNR_TIMER_327_68] = 327680, + [IB_RNR_TIMER_491_52] = 491520, +}; + +static inline unsigned long rnrnak_jiffies(u8 timeout) +{ + return max_t(unsigned long, + usecs_to_jiffies(rnrnak_usec[timeout]), 1); +} + +static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE; + case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE; + case IB_WR_SEND: return IB_WC_SEND; + case IB_WR_SEND_WITH_IMM: return IB_WC_SEND; + case IB_WR_RDMA_READ: return IB_WC_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP; + case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD; + case IB_WR_LSO: return IB_WC_LSO; + case IB_WR_SEND_WITH_INV: return IB_WC_SEND; + case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; + case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; + case IB_WR_REG_MR: return IB_WC_REG_MR; + + default: + return 0xff; + } +} + +void retransmit_timer(unsigned long data) +{ + struct rxe_qp *qp = (struct rxe_qp *)data; + + if (qp->valid) { + qp->comp.timeout = 1; + rxe_run_task(&qp->comp.task, 1); + } +} + +void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, + struct sk_buff *skb) +{ + int must_sched; + + skb_queue_tail(&qp->resp_pkts, skb); + + must_sched = skb_queue_len(&qp->resp_pkts) > 1; + rxe_run_task(&qp->comp.task, must_sched); +} + +static inline enum comp_state get_wqe(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe **wqe_p) +{ + struct rxe_send_wqe *wqe; + + /* we come here whether or not we found a response packet to see if + * there are any posted WQEs + */ + wqe = queue_head(qp->sq.queue); + *wqe_p = wqe; + + /* no WQE or requester has not started it yet */ + if (!wqe || wqe->state == wqe_state_posted) + return pkt ? COMPST_DONE : COMPST_EXIT; + + /* WQE does not require an ack */ + if (wqe->state == wqe_state_done) + return COMPST_COMP_WQE; + + /* WQE caused an error */ + if (wqe->state == wqe_state_error) + return COMPST_ERROR; + + /* we have a WQE, if we also have an ack check its PSN */ + return pkt ? COMPST_CHECK_PSN : COMPST_EXIT; +} + +static inline void reset_retry_counters(struct rxe_qp *qp) +{ + qp->comp.retry_cnt = qp->attr.retry_cnt; + qp->comp.rnr_retry = qp->attr.rnr_retry; +} + +static inline enum comp_state check_psn(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + s32 diff; + + /* check to see if response is past the oldest WQE. if it is, complete + * send/write or error read/atomic + */ + diff = psn_compare(pkt->psn, wqe->last_psn); + if (diff > 0) { + if (wqe->state == wqe_state_pending) { + if (wqe->mask & WR_ATOMIC_OR_READ_MASK) + return COMPST_ERROR_RETRY; + + reset_retry_counters(qp); + return COMPST_COMP_WQE; + } else { + return COMPST_DONE; + } + } + + /* compare response packet to expected response */ + diff = psn_compare(pkt->psn, qp->comp.psn); + if (diff < 0) { + /* response is most likely a retried packet if it matches an + * uncompleted WQE go complete it else ignore it + */ + if (pkt->psn == wqe->last_psn) + return COMPST_COMP_ACK; + else + return COMPST_DONE; + } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { + return COMPST_ERROR_RETRY; + } else { + return COMPST_CHECK_ACK; + } +} + +static inline enum comp_state check_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + unsigned int mask = pkt->mask; + u8 syn; + + /* Check the sequence only */ + switch (qp->comp.opcode) { + case -1: + /* Will catch all *_ONLY cases. */ + if (!(mask & RXE_START_MASK)) + return COMPST_ERROR; + + break; + + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && + pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { + return COMPST_ERROR; + } + break; + default: + WARN_ON(1); + } + + /* Check operation validity. */ + switch (pkt->opcode) { + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY: + syn = aeth_syn(pkt); + + if ((syn & AETH_TYPE_MASK) != AETH_ACK) + return COMPST_ERROR; + + /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE + * doesn't have an AETH) + */ + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + if (wqe->wr.opcode != IB_WR_RDMA_READ && + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + return COMPST_ERROR; + } + reset_retry_counters(qp); + return COMPST_READ; + + case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE: + syn = aeth_syn(pkt); + + if ((syn & AETH_TYPE_MASK) != AETH_ACK) + return COMPST_ERROR; + + if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP && + wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD) + return COMPST_ERROR; + reset_retry_counters(qp); + return COMPST_ATOMIC; + + case IB_OPCODE_RC_ACKNOWLEDGE: + syn = aeth_syn(pkt); + switch (syn & AETH_TYPE_MASK) { + case AETH_ACK: + reset_retry_counters(qp); + return COMPST_WRITE_SEND; + + case AETH_RNR_NAK: + return COMPST_RNR_RETRY; + + case AETH_NAK: + switch (syn) { + case AETH_NAK_PSN_SEQ_ERROR: + /* a nak implicitly acks all packets with psns + * before + */ + if (psn_compare(pkt->psn, qp->comp.psn) > 0) { + qp->comp.psn = pkt->psn; + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + } + return COMPST_ERROR_RETRY; + + case AETH_NAK_INVALID_REQ: + wqe->status = IB_WC_REM_INV_REQ_ERR; + return COMPST_ERROR; + + case AETH_NAK_REM_ACC_ERR: + wqe->status = IB_WC_REM_ACCESS_ERR; + return COMPST_ERROR; + + case AETH_NAK_REM_OP_ERR: + wqe->status = IB_WC_REM_OP_ERR; + return COMPST_ERROR; + + default: + pr_warn("unexpected nak %x\n", syn); + wqe->status = IB_WC_REM_OP_ERR; + return COMPST_ERROR; + } + + default: + return COMPST_ERROR; + } + break; + + default: + pr_warn("unexpected opcode\n"); + } + + return COMPST_ERROR; +} + +static inline enum comp_state do_read(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int ret; + + ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, + &wqe->dma, payload_addr(pkt), + payload_size(pkt), to_mem_obj, NULL); + if (ret) + return COMPST_ERROR; + + if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) + return COMPST_COMP_ACK; + else + return COMPST_UPDATE_COMP; +} + +static inline enum comp_state do_atomic(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int ret; + + u64 atomic_orig = atmack_orig(pkt); + + ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, + &wqe->dma, &atomic_orig, + sizeof(u64), to_mem_obj, NULL); + if (ret) + return COMPST_ERROR; + else + return COMPST_COMP_ACK; +} + +static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_cqe *cqe) +{ + memset(cqe, 0, sizeof(*cqe)); + + if (!qp->is_user) { + struct ib_wc *wc = &cqe->ibwc; + + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + wc->qp = &qp->ibqp; + } else { + struct ib_uverbs_wc *uwc = &cqe->uibwc; + + uwc->wr_id = wqe->wr.wr_id; + uwc->status = wqe->status; + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + uwc->wc_flags = IB_WC_WITH_IMM; + uwc->byte_len = wqe->dma.length; + uwc->qp_num = qp->ibqp.qp_num; + } +} + +static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_cqe cqe; + + if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + (qp->req.state == QP_STATE_ERROR)) { + make_send_cqe(qp, wqe, &cqe); + rxe_cq_post(qp->scq, &cqe, 0); + } + + advance_consumer(qp->sq.queue); + + /* + * we completed something so let req run again + * if it is trying to fence + */ + if (qp->req.wait_fence) { + qp->req.wait_fence = 0; + rxe_run_task(&qp->req.task, 1); + } +} + +static inline enum comp_state complete_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + unsigned long flags; + + if (wqe->has_rd_atomic) { + wqe->has_rd_atomic = 0; + atomic_inc(&qp->req.rd_atomic); + if (qp->req.need_rd_atomic) { + qp->comp.timeout_retry = 0; + qp->req.need_rd_atomic = 0; + rxe_run_task(&qp->req.task, 1); + } + } + + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { + /* state_lock used by requester & completer */ + spin_lock_irqsave(&qp->state_lock, flags); + if ((qp->req.state == QP_STATE_DRAIN) && + (qp->comp.psn == qp->req.psn)) { + qp->req.state = QP_STATE_DRAINED; + spin_unlock_irqrestore(&qp->state_lock, flags); + + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_SQ_DRAINED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } else { + spin_unlock_irqrestore(&qp->state_lock, flags); + } + } + + do_complete(qp, wqe); + + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + return COMPST_UPDATE_COMP; + else + return COMPST_DONE; +} + +static inline enum comp_state complete_wqe(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + qp->comp.opcode = -1; + + if (pkt) { + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + } + + do_complete(qp, wqe); + + return COMPST_GET_WQE; +} + +int rxe_completer(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_send_wqe *wqe = wqe; + struct sk_buff *skb = NULL; + struct rxe_pkt_info *pkt = NULL; + enum comp_state state; + + if (!qp->valid) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while (queue_head(qp->sq.queue)) + advance_consumer(qp->sq.queue); + + goto exit; + } + + if (qp->req.state == QP_STATE_ERROR) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while ((wqe = queue_head(qp->sq.queue))) { + wqe->status = IB_WC_WR_FLUSH_ERR; + do_complete(qp, wqe); + } + + goto exit; + } + + if (qp->req.state == QP_STATE_RESET) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while (queue_head(qp->sq.queue)) + advance_consumer(qp->sq.queue); + + goto exit; + } + + if (qp->comp.timeout) { + qp->comp.timeout_retry = 1; + qp->comp.timeout = 0; + } else { + qp->comp.timeout_retry = 0; + } + + if (qp->req.need_retry) + goto exit; + + state = COMPST_GET_ACK; + + while (1) { + pr_debug("state = %s\n", comp_state_name[state]); + switch (state) { + case COMPST_GET_ACK: + skb = skb_dequeue(&qp->resp_pkts); + if (skb) { + pkt = SKB_TO_PKT(skb); + qp->comp.timeout_retry = 0; + } + state = COMPST_GET_WQE; + break; + + case COMPST_GET_WQE: + state = get_wqe(qp, pkt, &wqe); + break; + + case COMPST_CHECK_PSN: + state = check_psn(qp, pkt, wqe); + break; + + case COMPST_CHECK_ACK: + state = check_ack(qp, pkt, wqe); + break; + + case COMPST_READ: + state = do_read(qp, pkt, wqe); + break; + + case COMPST_ATOMIC: + state = do_atomic(qp, pkt, wqe); + break; + + case COMPST_WRITE_SEND: + if (wqe->state == wqe_state_pending && + wqe->last_psn == pkt->psn) + state = COMPST_COMP_ACK; + else + state = COMPST_UPDATE_COMP; + break; + + case COMPST_COMP_ACK: + state = complete_ack(qp, pkt, wqe); + break; + + case COMPST_COMP_WQE: + state = complete_wqe(qp, pkt, wqe); + break; + + case COMPST_UPDATE_COMP: + if (pkt->mask & RXE_END_MASK) + qp->comp.opcode = -1; + else + qp->comp.opcode = pkt->opcode; + + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + + state = COMPST_DONE; + break; + + case COMPST_DONE: + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto done; + + case COMPST_EXIT: + if (qp->comp.timeout_retry && wqe) { + state = COMPST_ERROR_RETRY; + break; + } + + /* re reset the timeout counter if + * (1) QP is type RC + * (2) the QP is alive + * (3) there is a packet sent by the requester that + * might be acked (we still might get spurious + * timeouts but try to keep them as few as possible) + * (4) the timeout parameter is set + */ + if ((qp_type(qp) == IB_QPT_RC) && + (qp->req.state == QP_STATE_READY) && + (psn_compare(qp->req.psn, qp->comp.psn) > 0) && + qp->qp_timeout_jiffies) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); + goto exit; + + case COMPST_ERROR_RETRY: + /* we come here if the retry timer fired and we did + * not receive a response packet. try to retry the send + * queue if that makes sense and the limits have not + * been exceeded. remember that some timeouts are + * spurious since we do not reset the timer but kick + * it down the road or let it expire + */ + + /* there is nothing to retry in this case */ + if (!wqe || (wqe->state == wqe_state_posted)) + goto exit; + + if (qp->comp.retry_cnt > 0) { + if (qp->comp.retry_cnt != 7) + qp->comp.retry_cnt--; + + /* no point in retrying if we have already + * seen the last ack that the requester could + * have caused + */ + if (psn_compare(qp->req.psn, + qp->comp.psn) > 0) { + /* tell the requester to retry the + * send send queue next time around + */ + qp->req.need_retry = 1; + rxe_run_task(&qp->req.task, 1); + } + goto exit; + } else { + wqe->status = IB_WC_RETRY_EXC_ERR; + state = COMPST_ERROR; + } + break; + + case COMPST_RNR_RETRY: + if (qp->comp.rnr_retry > 0) { + if (qp->comp.rnr_retry != 7) + qp->comp.rnr_retry--; + + qp->req.need_retry = 1; + pr_debug("set rnr nak timer\n"); + mod_timer(&qp->rnr_nak_timer, + jiffies + rnrnak_jiffies(aeth_syn(pkt) + & ~AETH_TYPE_MASK)); + goto exit; + } else { + wqe->status = IB_WC_RNR_RETRY_EXC_ERR; + state = COMPST_ERROR; + } + break; + + case COMPST_ERROR: + do_complete(qp, wqe); + rxe_qp_error(qp); + goto exit; + } + } + +exit: + /* we come here if we are done with processing and want the task to + * exit from the loop calling us + */ + return -EAGAIN; + +done: + /* we come here if we have processed a packet we want the task to call + * us again to see if there is anything else to do + */ + return 0; +} diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c new file mode 100644 index 000000000000..e5e6a5e7dee9 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, + int cqe, int comp_vector, struct ib_udata *udata) +{ + int count; + + if (cqe <= 0) { + pr_warn("cqe(%d) <= 0\n", cqe); + goto err1; + } + + if (cqe > rxe->attr.max_cqe) { + pr_warn("cqe(%d) > max_cqe(%d)\n", + cqe, rxe->attr.max_cqe); + goto err1; + } + + if (cq) { + count = queue_count(cq->queue); + if (cqe < count) { + pr_warn("cqe(%d) < current # elements in queue (%d)", + cqe, count); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static void rxe_send_complete(unsigned long data) +{ + struct rxe_cq *cq = (struct rxe_cq *)data; + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + int err; + + cq->queue = rxe_queue_init(rxe, &cqe, + sizeof(struct rxe_cqe)); + if (!cq->queue) { + pr_warn("unable to create cq\n"); + return -ENOMEM; + } + + err = do_mmap_info(rxe, udata, false, context, cq->queue->buf, + cq->queue->buf_size, &cq->queue->ip); + if (err) { + kvfree(cq->queue->buf); + kfree(cq->queue); + return err; + } + + if (udata) + cq->is_user = 1; + + tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq); + + spin_lock_init(&cq->cq_lock); + cq->ibcq.cqe = cqe; + return 0; +} + +int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata) +{ + int err; + + err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe, + sizeof(struct rxe_cqe), + cq->queue->ip ? cq->queue->ip->context : NULL, + udata, NULL, &cq->cq_lock); + if (!err) + cq->ibcq.cqe = cqe; + + return err; +} + +int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) +{ + struct ib_event ev; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + + if (unlikely(queue_full(cq->queue))) { + spin_unlock_irqrestore(&cq->cq_lock, flags); + if (cq->ibcq.event_handler) { + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + + return -EBUSY; + } + + memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); + + /* make sure all changes to the CQ are written before we update the + * producer pointer + */ + smp_wmb(); + + advance_producer(cq->queue); + spin_unlock_irqrestore(&cq->cq_lock, flags); + + if ((cq->notify == IB_CQ_NEXT_COMP) || + (cq->notify == IB_CQ_SOLICITED && solicited)) { + cq->notify = 0; + tasklet_schedule(&cq->comp_task); + } + + return 0; +} + +void rxe_cq_cleanup(void *arg) +{ + struct rxe_cq *cq = arg; + + if (cq->queue) + rxe_queue_cleanup(cq->queue); +} diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c new file mode 100644 index 000000000000..7634c1a81b2b --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_dma.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +#define DMA_BAD_ADDER ((u64)0) + +static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == DMA_BAD_ADDER; +} + +static u64 rxe_dma_map_single(struct ib_device *dev, + void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); + return (uintptr_t)cpu_addr; +} + +static void rxe_dma_unmap_single(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static u64 rxe_dma_map_page(struct ib_device *dev, + struct page *page, + unsigned long offset, + size_t size, enum dma_data_direction direction) +{ + u64 addr; + + WARN_ON(!valid_dma_direction(direction)); + + if (offset + size > PAGE_SIZE) { + addr = DMA_BAD_ADDER; + goto done; + } + + addr = (uintptr_t)page_address(page); + if (addr) + addr += offset; + +done: + return addr; +} + +static void rxe_dma_unmap_page(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + u64 addr; + int i; + int ret = nents; + + WARN_ON(!valid_dma_direction(direction)); + + for_each_sg(sgl, sg, nents, i) { + addr = (uintptr_t)page_address(sg_page(sg)); + if (!addr) { + ret = 0; + break; + } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif + } + + return ret; +} + +static void rxe_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static void rxe_sync_single_for_cpu(struct ib_device *dev, + u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void rxe_sync_single_for_device(struct ib_device *dev, + u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + + p = alloc_pages(flag, get_order(size)); + if (p) + addr = page_address(p); + + if (dma_handle) + *dma_handle = (uintptr_t)addr; + + return addr; +} + +static void rxe_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +struct ib_dma_mapping_ops rxe_dma_mapping_ops = { + .mapping_error = rxe_mapping_error, + .map_single = rxe_dma_map_single, + .unmap_single = rxe_dma_unmap_single, + .map_page = rxe_dma_map_page, + .unmap_page = rxe_dma_unmap_page, + .map_sg = rxe_map_sg, + .unmap_sg = rxe_unmap_sg, + .sync_single_for_cpu = rxe_sync_single_for_cpu, + .sync_single_for_device = rxe_sync_single_for_device, + .alloc_coherent = rxe_dma_alloc_coherent, + .free_coherent = rxe_dma_free_coherent +}; diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h new file mode 100644 index 000000000000..d57b5e956ceb --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -0,0 +1,952 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_HDR_H +#define RXE_HDR_H + +/* extracted information about a packet carried in an sk_buff struct fits in + * the skbuff cb array. Must be at most 48 bytes. stored in control block of + * sk_buff for received packets. + */ +struct rxe_pkt_info { + struct rxe_dev *rxe; /* device that owns packet */ + struct rxe_qp *qp; /* qp that owns packet */ + struct rxe_send_wqe *wqe; /* send wqe */ + u8 *hdr; /* points to bth */ + u32 mask; /* useful info about pkt */ + u32 psn; /* bth psn of packet */ + u16 pkey_index; /* partition of pkt */ + u16 paylen; /* length of bth - icrc */ + u8 port_num; /* port pkt received on */ + u8 opcode; /* bth opcode of packet */ + u8 offset; /* bth offset from pkt->hdr */ +}; + +/* Macros should be used only for received skb */ +#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb) +#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb) + +/* + * IBA header types and methods + * + * Some of these are for reference and completeness only since + * rxe does not currently support RD transport + * most of this could be moved into IB core. ib_pack.h has + * part of this but is incomplete + * + * Header specific routines to insert/extract values to/from headers + * the routines that are named __hhh_(set_)fff() take a pointer to a + * hhh header and get(set) the fff field. The routines named + * hhh_(set_)fff take a packet info struct and find the + * header and field based on the opcode in the packet. + * Conversion to/from network byte order from cpu order is also done. + */ + +#define RXE_ICRC_SIZE (4) +#define RXE_MAX_HDR_LENGTH (80) + +/****************************************************************************** + * Base Transport Header + ******************************************************************************/ +struct rxe_bth { + u8 opcode; + u8 flags; + __be16 pkey; + __be32 qpn; + __be32 apsn; +}; + +#define BTH_TVER (0) +#define BTH_DEF_PKEY (0xffff) + +#define BTH_SE_MASK (0x80) +#define BTH_MIG_MASK (0x40) +#define BTH_PAD_MASK (0x30) +#define BTH_TVER_MASK (0x0f) +#define BTH_FECN_MASK (0x80000000) +#define BTH_BECN_MASK (0x40000000) +#define BTH_RESV6A_MASK (0x3f000000) +#define BTH_QPN_MASK (0x00ffffff) +#define BTH_ACK_MASK (0x80000000) +#define BTH_RESV7_MASK (0x7f000000) +#define BTH_PSN_MASK (0x00ffffff) + +static inline u8 __bth_opcode(void *arg) +{ + struct rxe_bth *bth = arg; + + return bth->opcode; +} + +static inline void __bth_set_opcode(void *arg, u8 opcode) +{ + struct rxe_bth *bth = arg; + + bth->opcode = opcode; +} + +static inline u8 __bth_se(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (BTH_SE_MASK & bth->flags); +} + +static inline void __bth_set_se(void *arg, int se) +{ + struct rxe_bth *bth = arg; + + if (se) + bth->flags |= BTH_SE_MASK; + else + bth->flags &= ~BTH_SE_MASK; +} + +static inline u8 __bth_mig(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (BTH_MIG_MASK & bth->flags); +} + +static inline void __bth_set_mig(void *arg, u8 mig) +{ + struct rxe_bth *bth = arg; + + if (mig) + bth->flags |= BTH_MIG_MASK; + else + bth->flags &= ~BTH_MIG_MASK; +} + +static inline u8 __bth_pad(void *arg) +{ + struct rxe_bth *bth = arg; + + return (BTH_PAD_MASK & bth->flags) >> 4; +} + +static inline void __bth_set_pad(void *arg, u8 pad) +{ + struct rxe_bth *bth = arg; + + bth->flags = (BTH_PAD_MASK & (pad << 4)) | + (~BTH_PAD_MASK & bth->flags); +} + +static inline u8 __bth_tver(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_TVER_MASK & bth->flags; +} + +static inline void __bth_set_tver(void *arg, u8 tver) +{ + struct rxe_bth *bth = arg; + + bth->flags = (BTH_TVER_MASK & tver) | + (~BTH_TVER_MASK & bth->flags); +} + +static inline u16 __bth_pkey(void *arg) +{ + struct rxe_bth *bth = arg; + + return be16_to_cpu(bth->pkey); +} + +static inline void __bth_set_pkey(void *arg, u16 pkey) +{ + struct rxe_bth *bth = arg; + + bth->pkey = cpu_to_be16(pkey); +} + +static inline u32 __bth_qpn(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_QPN_MASK & be32_to_cpu(bth->qpn); +} + +static inline void __bth_set_qpn(void *arg, u32 qpn) +{ + struct rxe_bth *bth = arg; + u32 resvqpn = be32_to_cpu(bth->qpn); + + bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) | + (~BTH_QPN_MASK & resvqpn)); +} + +static inline int __bth_fecn(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn); +} + +static inline void __bth_set_fecn(void *arg, int fecn) +{ + struct rxe_bth *bth = arg; + + if (fecn) + bth->qpn |= cpu_to_be32(BTH_FECN_MASK); + else + bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK); +} + +static inline int __bth_becn(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn); +} + +static inline void __bth_set_becn(void *arg, int becn) +{ + struct rxe_bth *bth = arg; + + if (becn) + bth->qpn |= cpu_to_be32(BTH_BECN_MASK); + else + bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK); +} + +static inline u8 __bth_resv6a(void *arg) +{ + struct rxe_bth *bth = arg; + + return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24; +} + +static inline void __bth_set_resv6a(void *arg) +{ + struct rxe_bth *bth = arg; + + bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK); +} + +static inline int __bth_ack(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn); +} + +static inline void __bth_set_ack(void *arg, int ack) +{ + struct rxe_bth *bth = arg; + + if (ack) + bth->apsn |= cpu_to_be32(BTH_ACK_MASK); + else + bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK); +} + +static inline void __bth_set_resv7(void *arg) +{ + struct rxe_bth *bth = arg; + + bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK); +} + +static inline u32 __bth_psn(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_PSN_MASK & be32_to_cpu(bth->apsn); +} + +static inline void __bth_set_psn(void *arg, u32 psn) +{ + struct rxe_bth *bth = arg; + u32 apsn = be32_to_cpu(bth->apsn); + + bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) | + (~BTH_PSN_MASK & apsn)); +} + +static inline u8 bth_opcode(struct rxe_pkt_info *pkt) +{ + return __bth_opcode(pkt->hdr + pkt->offset); +} + +static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode) +{ + __bth_set_opcode(pkt->hdr + pkt->offset, opcode); +} + +static inline u8 bth_se(struct rxe_pkt_info *pkt) +{ + return __bth_se(pkt->hdr + pkt->offset); +} + +static inline void bth_set_se(struct rxe_pkt_info *pkt, int se) +{ + __bth_set_se(pkt->hdr + pkt->offset, se); +} + +static inline u8 bth_mig(struct rxe_pkt_info *pkt) +{ + return __bth_mig(pkt->hdr + pkt->offset); +} + +static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig) +{ + __bth_set_mig(pkt->hdr + pkt->offset, mig); +} + +static inline u8 bth_pad(struct rxe_pkt_info *pkt)