From 462075a6ea85aa1cf6ee1620a232c483dfd4b520 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:47 -0500 Subject: uapi/hfi1_user: Correct comment for capability bit The HFI1_CAP_TID_UNMAP comment was incorrectly implying the opposite of what capability actually did. Correct this error. Reviewed-by: Ira Weiny Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford --- include/uapi/rdma/hfi/hfi1_user.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 288694e422fb..cf172718e3d5 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -93,7 +93,7 @@ #define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/ #define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */ #define HFI1_CAP_NODROP_EGR_FULL (1UL << 9) /* Don't drop on EGR buffs full */ -#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Enable Expected TID caching */ +#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Disable Expected TID caching */ #define HFI1_CAP_PRINT_UNIMPL (1UL << 11) /* Show for unimplemented feats */ #define HFI1_CAP_ALLOW_PERM_JKEY (1UL << 12) /* Allow use of permissive JKEY */ #define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */ -- cgit v1.2.3 From 955ad36dcde4639664253c2bd39f626cd88d2acf Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:48 -0500 Subject: uapi/hfi1_user: Add command and event for TID caching TID caching will use a new event to signal userland that cache invalidation has occurred and needs a matching command code that will be used to read the invalidated TIDs. Add the event bit and the new command to the exported header file. The command is also added to the switch() statement in file_ops.c for completeness and in preparation for its usage later. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- include/uapi/rdma/hfi/hfi1_user.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index cf172718e3d5..92be2e373019 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -134,6 +134,7 @@ #define HFI1_CMD_ACK_EVENT 10 /* ack & clear user status bits */ #define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ #define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ +#define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */ /* separate EPROM commands from normal PSM commands */ #define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */ #define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */ @@ -147,13 +148,15 @@ #define _HFI1_EVENT_LID_CHANGE_BIT 2 #define _HFI1_EVENT_LMC_CHANGE_BIT 3 #define _HFI1_EVENT_SL2VL_CHANGE_BIT 4 -#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_SL2VL_CHANGE_BIT +#define _HFI1_EVENT_TID_MMU_NOTIFY_BIT 5 +#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_TID_MMU_NOTIFY_BIT #define HFI1_EVENT_FROZEN (1UL << _HFI1_EVENT_FROZEN_BIT) #define HFI1_EVENT_LINKDOWN (1UL << _HFI1_EVENT_LINKDOWN_BIT) #define HFI1_EVENT_LID_CHANGE (1UL << _HFI1_EVENT_LID_CHANGE_BIT) #define HFI1_EVENT_LMC_CHANGE (1UL << _HFI1_EVENT_LMC_CHANGE_BIT) #define HFI1_EVENT_SL2VL_CHANGE (1UL << _HFI1_EVENT_SL2VL_CHANGE_BIT) +#define HFI1_EVENT_TID_MMU_NOTIFY (1UL << _HFI1_EVENT_TID_MMU_NOTIFY_BIT) /* * These are the status bits readable (in ASCII form, 64bit value) -- cgit v1.2.3 From 0b091fb32c5ae4737bf606a313e6625dad34bbc6 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Fri, 5 Feb 2016 11:57:58 -0500 Subject: staging/hfi1: Enable TID caching feature This commit "flips the switch" on the TID caching feature implemented in this patch series. As well as enabling the new feature by tying the new function with the PSM API, it also cleans up the old unneeded code, data structure members, and variables. Due to difference in operation and information, the tracing functions related to expected receives had to be changed. This patch include these changes. The tracing function changes could not be split into a separate commit without including both tracing variants at the same time. This would have caused other complications and ugliness. Signed-off-by: Mitko Haralanov Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- include/uapi/rdma/hfi/hfi1_user.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index 92be2e373019..a533cecab14f 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -66,7 +66,7 @@ * The major version changes when data structures change in an incompatible * way. The driver must be the same for initialization to succeed. */ -#define HFI1_USER_SWMAJOR 4 +#define HFI1_USER_SWMAJOR 5 /* * Minor version differences are always compatible @@ -241,11 +241,6 @@ struct hfi1_tid_info { __u32 tidcnt; /* length of transfer buffer programmed by this request */ __u32 length; - /* - * pointer to bitmap of TIDs used for this call; - * checked for being large enough at open - */ - __u64 tidmap; }; struct hfi1_cmd { -- cgit v1.2.3 From 7a43b598a3d60affd13d4082b6afaf09b8131815 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Wed, 20 Jan 2016 13:40:02 -0600 Subject: i40iw: add entry in rdma_netlink Add entry for port mapper services. Changes since v2: moved this patch before being used Changes since v1: moved I40IW as last element Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- include/uapi/rdma/rdma_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index c19a5dc1531a..4fa418de4075 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -8,6 +8,7 @@ enum { RDMA_NL_NES, RDMA_NL_C4IW, RDMA_NL_LS, /* RDMA Local Services */ + RDMA_NL_I40IW, RDMA_NL_NUM_CLIENTS }; -- cgit v1.2.3 From 0194621b225348428c212f330c26d194fc77bd15 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:50:24 -0800 Subject: IB/rdmavt: Create module framework and handle driver registration This patch introduces the basics for a new module called rdma_vt. This new driver is a software implementation of the InfiniBand verbs and aims to replace the multiple implementations that exist and duplicate each others' code. While the call to actually register the device with the IB core happens in rdma_vt, most of the work is still done in the drivers themselves. This will be changing in a follow on patch this is just laying the groundwork for this infrastructure. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 include/rdma/rdma_vt.h (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h new file mode 100644 index 000000000000..0438bf229306 --- /dev/null +++ b/include/rdma/rdma_vt.h @@ -0,0 +1,70 @@ +#ifndef DEF_RDMA_VT_H +#define DEF_RDMA_VT_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Structure that low level drivers will populate in order to register with the + * rdmavt layer. + */ + +#include "ib_verbs.h" +struct rvt_dev_info { + struct ib_device ibdev; + int (*port_callback)(struct ib_device *, u8, struct kobject *); + + /* + * TODO: + * need to reflect module parameters that may vary by dev + */ +}; + +int rvt_register_device(struct rvt_dev_info *rvd); +void rvt_unregister_device(struct rvt_dev_info *rvd); + +#endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From 8afd32eb58b6885fc3e268c69b1b1b627aa2afaf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:51:48 -0800 Subject: IB/rdmavt: Add protection domain to rdmavt. Add datastructure for and allocation/deallocation of protection domains for RDMAVT. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 0438bf229306..6bf5fd40081d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -54,16 +54,42 @@ */ #include "ib_verbs.h" + +/* + * Things that are driver specific, module parameters in hfi1 and qib + */ +struct rvt_driver_params { + int max_pds; +}; + +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + struct rvt_dev_info { struct ib_device ibdev; + + /* Driver specific */ + struct rvt_driver_params dparms; int (*port_callback)(struct ib_device *, u8, struct kobject *); - /* - * TODO: - * need to reflect module parameters that may vary by dev - */ + /* Internal use */ + int n_pds_allocated; + spinlock_t n_pds_lock; /* Protect pd allocated count */ }; +static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct rvt_pd, ibpd); +} + +static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) +{ + return container_of(ibdev, struct rvt_dev_info, ibdev); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); -- cgit v1.2.3 From b1070a7a4d304e680eb6c1158d76645cf5a923f1 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:52:19 -0800 Subject: IB/rdmavt: Add ib core device attributes to rvt driver params list Instead of trying to handle each parameter separately, add ib_device_attr to rvt_driver_params. This means drivers will fill this in and pass to the rvt registration function. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 6bf5fd40081d..2990e03bdd9e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -59,7 +59,45 @@ * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { - int max_pds; + /* + * driver required fields: + * node_guid + * phys_port_cnt + * dma_device + * owner + * driver optional fields (rvt will provide generic value if blank): + * name + * node_desc + * rvt fields, driver value ignored: + * uverbs_abi_ver + * node_type + * num_comp_vectors + * uverbs_cmd_mask + */ + struct ib_device_attr props; + + /* + * Drivers will need to support a number of notifications to rvt in + * accordance with certain events. This structure should contain a mask + * of the supported events. Such events that the rvt may need to know + * about include: + * port errors + * port active + * lid change + * sm change + * client reregister + * pkey change + * + * There may also be other events that the rvt layers needs to know + * about this is not an exhaustive list. Some events though rvt does not + * need to rely on the driver for such as completion queue error. + */ + int rvt_signal_supported; + + /* + * Anything driver specific that is not covered by props + * For instance special module parameters. Goes here. + */ }; /* Protection domain */ @@ -69,10 +107,25 @@ struct rvt_pd { }; struct rvt_dev_info { + /* + * Prior to calling for registration the driver will be responsible for + * allocating space for this structure. + * + * The driver will also be responsible for filling in certain members of + * dparms.props + */ + struct ib_device ibdev; - /* Driver specific */ + /* Driver specific properties */ struct rvt_driver_params dparms; + + /* + * The work to create port files in /sys/class Infiniband is different + * depending on the driver. This should not be extracted away and + * instead drivers are responsible for setting the correct callback for + * this. + */ int (*port_callback)(struct ib_device *, u8, struct kobject *); /* Internal use */ -- cgit v1.2.3 From 30588643f95e1bb1239e2568de7a653722832a5e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:54:16 -0800 Subject: IB/rdmavt: Add pkey query stub The pkey table will reside in the rvt structure but it will be modified only when the driver requests then rvt will simply read the value to return in the query. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2990e03bdd9e..bf072a436a34 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -114,12 +114,13 @@ struct rvt_dev_info { * The driver will also be responsible for filling in certain members of * dparms.props */ - struct ib_device ibdev; /* Driver specific properties */ struct rvt_driver_params dparms; + /* PKey Table goes here */ + /* * The work to create port files in /sys/class Infiniband is different * depending on the driver. This should not be extracted away and -- cgit v1.2.3 From b92a7568037e2a28f61c3f79c2320431bb24dfab Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:01:42 -0800 Subject: IB/rdmavt: Move MR datastructures into rvt This patch adds the MR datastructures based on hfi1 into rvt. For now the data structures are defined in include/rdma/rdma_vt.h but once all MR functionality has been moved from the drivers into rvt these should move to rdmavt/mr.h Reviewed-by: Ira Weiny Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index bf072a436a34..f232e39a5d69 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,56 @@ #include "ib_verbs.h" +/* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* End Memmory Region */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -119,6 +169,9 @@ struct rvt_dev_info { /* Driver specific properties */ struct rvt_driver_params dparms; + struct rvt_mregion __rcu *dma_mr; + struct rvt_lkey_table lkey_table; + /* PKey Table goes here */ /* -- cgit v1.2.3 From ca889e8ad3af9f1dfeb827356bc9839fb20f32be Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:41 -0800 Subject: IB/rdmavt: Add queue pair data structure to rdmavt Add queue pair data structure as well as supporting structures to rdmavt. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 233 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index f232e39a5d69..9baa7f04e8d0 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -105,6 +105,239 @@ struct rvt_lkey_table { /* End Memmory Region */ +/* + * Things needed for the Queue Pair definition. Like the MR stuff above the + * following should probably get moved to qp.h once drivers stop trying to make + * and manipulate thier own QPs. For the few instnaces where a driver may need + * to look into a queue pair there should be a pointer to a driver priavte data + * structure that they can look at. + */ + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct rvt_swqe { + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct rvt_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct rvt_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct rvt_rwqe wq[0]; +}; + +struct rvt_rq { + struct rvt_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; +}; + +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +#define RVT_MAX_RDMA_ATOMIC 16 + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct rvt_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct rvt_sge rdma_sge; + u64 atomic_data; + }; +}; + +struct rvt_sge_state { + struct rvt_sge *sg_list; /* next SGE to be used if any */ + struct rvt_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct rvt_qp { + struct ib_qp ibqp; + void *priv; /* Driver private data */ + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct rvt_qp __rcu *next; /* link list for QPN hash table */ + struct rvt_swqe *s_wq; /* send work queue */ + struct rvt_mmap_info *ip; + + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + int srate_mbps; /* s_srate (below) converted to Mbit/s */ + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + + u8 state; /* QP state */ + u8 allowed_ops; /* high order bits of allowed opcodes */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct rvt_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waiting to respond */ + + struct rvt_sge_state r_sge; /* current receive data */ + struct rvt_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + struct rvt_sge_state *s_cur_sge; + u32 s_flags; + struct rvt_swqe *s_wqe; + struct rvt_sge_state s_sge; /* current send request data */ + struct rvt_mregion *s_rdma_mr; + struct sdma_engine *s_sde; /* current sde */ + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + s8 s_ahgidx; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct rvt_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + + /* + * This sge list MUST be last. Do not add anything below here. + */ + struct rvt_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +/* End QP section */ + /* * Things that are driver specific, module parameters in hfi1 and qib */ -- cgit v1.2.3 From aec5778775ac03ee6cfd6480adbbf6b05513d77b Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:52 -0800 Subject: IB/rdmavt: Move driver helper functions to a common structure Drivers are going to need to provide multiple functions for rdmavt to call in to. We already have one, so go ahead and push this into a data structure designated for driver supplied functions. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9baa7f04e8d0..e0beedc6110e 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -383,6 +383,19 @@ struct rvt_driver_params { */ }; +/* + * Functions that drivers are required to support + */ +struct rvt_driver_provided { + /* + * The work to create port files in /sys/class Infiniband is different + * depending on the driver. This should not be extracted away and + * instead drivers are responsible for setting the correct callback for + * this. + */ + int (*port_callback)(struct ib_device *, u8, struct kobject *); +}; + /* Protection domain */ struct rvt_pd { struct ib_pd ibpd; @@ -407,13 +420,8 @@ struct rvt_dev_info { /* PKey Table goes here */ - /* - * The work to create port files in /sys/class Infiniband is different - * depending on the driver. This should not be extracted away and - * instead drivers are responsible for setting the correct callback for - * this. - */ - int (*port_callback)(struct ib_device *, u8, struct kobject *); + /* Driver specific helper functions */ + struct rvt_driver_provided driver_f; /* Internal use */ int n_pds_allocated; -- cgit v1.2.3 From b534875d5ab348fb9193692589e2ee82ae768e3a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:02:59 -0800 Subject: IB/rdmavt: Add device specific info prints Follow hfi1's example for printing information about the driver and incorporate into rdmavt. This requires two new functions to be provided by the driver, one to get_card_name and one to get_pci_dev. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e0beedc6110e..4b83770bc312 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -386,6 +386,7 @@ struct rvt_driver_params { /* * Functions that drivers are required to support */ +struct rvt_dev_info; struct rvt_driver_provided { /* * The work to create port files in /sys/class Infiniband is different @@ -394,6 +395,8 @@ struct rvt_driver_provided { * this. */ int (*port_callback)(struct ib_device *, u8, struct kobject *); + const char * (*get_card_name)(struct rvt_dev_info *rdi); + struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); }; /* Protection domain */ -- cgit v1.2.3 From 0b8a8aae02abfbd724186cffe400fbdbf0cb41d6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:07 -0800 Subject: IB/rdmavt: Add the start of capability flags Drivers will need a set of flags to dictate behavior to rdmavt. This patch adds a placeholder and a spot for it to live, as well as a few flags that will be used. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4b83770bc312..b44ac176217b 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,16 @@ #include "ib_verbs.h" +/* + * For some of the IBTA objects there will likely be some + * initializations required. We need flags to determine whether it is OK + * for rdmavt to do this or not. This does not imply any functions of a + * partiuclar IBTA object are overridden. + */ +#define RVT_FLAG_MR_INIT_DRIVER BIT(1) +#define RVT_FLAG_QP_INIT_DRIVER BIT(2) +#define RVT_FLAG_CQ_INIT_DRIVER BIT(3) + /* * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once * drivers no longer need access to the MR directly. @@ -429,6 +439,8 @@ struct rvt_dev_info { /* Internal use */ int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ + + int flags; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) -- cgit v1.2.3 From 7b1e2099adc8e66f78fee2dd2f10cb8a11362083 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:03:31 -0800 Subject: IB/rdmavt: Move memory registration into rdmavt Use the memory registration routines in hfi1 and move them to rdmavt. A follow on patch will address removing the duplicated code in the hfi1 and qib drivers. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index b44ac176217b..9a479575078f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -391,6 +391,7 @@ struct rvt_driver_params { * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. */ + unsigned int lkey_table_size; }; /* @@ -416,6 +417,8 @@ struct rvt_pd { }; struct rvt_dev_info { + struct ib_device ibdev; /* Keep this first. Nothing above here */ + /* * Prior to calling for registration the driver will be responsible for * allocating space for this structure. @@ -423,7 +426,6 @@ struct rvt_dev_info { * The driver will also be responsible for filling in certain members of * dparms.props */ - struct ib_device ibdev; /* Driver specific properties */ struct rvt_driver_params dparms; @@ -453,7 +455,22 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } +static inline void rvt_put_mr(struct rvt_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + complete(&mr->comp); +} + +static inline void rvt_get_mr(struct rvt_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc); +int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, + struct rvt_sge *isge, struct ib_sge *sge, int acc); #endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From f2f342115ef2b0755abd73573831351e371f6242 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 6 Jan 2016 10:03:47 -0800 Subject: IB/rdmavt: Add common LID defines to rdmavt Original patch is from Kamal Heib . It has been split into separate patches. This patch adds RVT_PERMISSIVE_LID and RVT_MULTICAST_LID_BASE to rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9a479575078f..dbb45bcd1fea 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,9 @@ #include "ib_verbs.h" +#define RVT_MULTICAST_LID_BASE 0xC000 +#define RVT_PERMISSIVE_LID 0xFFFF + /* * For some of the IBTA objects there will likely be some * initializations required. We need flags to determine whether it is OK -- cgit v1.2.3 From 119a8e708d16d38eedfa3d920b89b709dda41a8f Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 6 Jan 2016 10:03:59 -0800 Subject: IB/rdmavt: Add AH to rdmavt Original patch is from Kamal Heib . It has been split into three separate patches. This one for rdmavt, a follow on for qib, and one for hfi1. Create datastructure for address handle and implement the create/destroy/modify/query of address handle for rdmavt. Reviewed-by: Ira Weiny Signed-off-by: Kamal Heib Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dbb45bcd1fea..36cced63af77 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -411,6 +411,7 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); }; /* Protection domain */ @@ -419,6 +420,13 @@ struct rvt_pd { int user; /* non-zero if created from user space */ }; +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; +}; + struct rvt_dev_info { struct ib_device ibdev; /* Keep this first. Nothing above here */ @@ -445,6 +453,9 @@ struct rvt_dev_info { int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ + int n_ahs_allocated; + spinlock_t n_ahs_lock; /* Protect ah allocated count */ + int flags; }; @@ -453,6 +464,11 @@ static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) return container_of(ibpd, struct rvt_pd, ibpd); } +static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah) +{ + return container_of(ibah, struct rvt_ah, ibah); +} + static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) { return container_of(ibdev, struct rvt_dev_info, ibdev); @@ -471,6 +487,7 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v1.2.3 From 70a1a351626073123ab79de24119977c4a297fdf Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:06 -0800 Subject: IB/rdmavt: Move SRQ data structure into rdmavt Patch moves the srq data structure into rdmavt in preparation for removal from qib and hfi1 which will follow in subsequent patches. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 36cced63af77..fcf3ec05da70 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -349,6 +349,14 @@ struct rvt_qp { ____cacheline_aligned_in_smp; }; +struct rvt_srq { + struct ib_srq ibsrq; + struct rvt_rq rq; + struct rvt_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + /* End QP section */ /* @@ -485,6 +493,11 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) atomic_inc(&mr->refcount); } +static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct rvt_srq, ibsrq); +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -- cgit v1.2.3 From f3d01bbcdc47a728336008a9254732c1652aeddd Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:13 -0800 Subject: IB/rdmavt: Add an ibport data structure to rdmavt Converge the ibport data structures of qib and hfi1 into a common ib port structure. Also provides a place to keep track of these ports in case rdmavt needs it. Along with this goes an attach and detach function for drivers to use to notify rdmavt of the ports. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fcf3ec05da70..a3d6a5bd0c02 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -53,6 +53,8 @@ * rdmavt layer. */ +#include +#include #include "ib_verbs.h" #define RVT_MULTICAST_LID_BASE 0xC000 @@ -359,6 +361,65 @@ struct rvt_srq { /* End QP section */ +struct rvt_ibport { + struct rvt_qp __rcu *qp[2]; + struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct rb_root mcast_tree; + spinlock_t lock; /* protect changes in this struct */ + + /* non-zero when timer is set */ + unsigned long mkey_lease_timeout; + unsigned long trap_timeout; + __be64 gid_prefix; /* in network order */ + __be64 mkey; + u64 tid; + u32 port_cap_flags; + u32 pma_sample_start; + u32 pma_sample_interval; + __be16 pma_counter_select[5]; + u16 pma_tag; + u16 mkey_lease_period; + u16 sm_lid; + u8 sm_sl; + u8 mkeyprot; + u8 subnet_timeout; + u8 vl_high_limit; + + /* + * Driver is expected to keep these up to date. These + * counters are informational only and not required to be + * completely accurate. + */ + u64 n_rc_resends; + u64 n_seq_naks; + u64 n_rdma_seq; + u64 n_rnr_naks; + u64 n_other_naks; + u64 n_loop_pkts; + u64 n_pkt_drops; + u64 n_vl15_dropped; + u64 n_rc_timeouts; + u64 n_dmawait; + u64 n_unaligned; + u64 n_rc_dupreq; + u64 n_rc_seqnak; + u16 pkey_violations; + u16 qkey_violations; + u16 mkey_violations; + + /* Hot-path per CPU counters to avoid cacheline trading to update */ + u64 z_rc_acks; + u64 z_rc_qacks; + u64 z_rc_delayed_comp; + u64 __percpu *rc_acks; + u64 __percpu *rc_qacks; + u64 __percpu *rc_delayed_comp; + + void *priv; /* driver private data */ + + /* TODO: Move sm_ah and smi_ah into here as well*/ +}; + /* * Things that are driver specific, module parameters in hfi1 and qib */ @@ -403,6 +464,7 @@ struct rvt_driver_params { * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; + int nports; }; /* @@ -465,6 +527,7 @@ struct rvt_dev_info { spinlock_t n_ahs_lock; /* Protect ah allocated count */ int flags; + struct rvt_ibport **ports; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -501,9 +564,10 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +void rvt_attach_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int portnum); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); - #endif /* DEF_RDMA_VT_H */ -- cgit v1.2.3 From b036db83c0ec8d1e81df19410a494be4cfe0b186 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:23 -0800 Subject: IB/rdmavt: Add driver notification for new AH Drivers may need to do some work once an address handle has been created. Add a driver function for this purpose. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index a3d6a5bd0c02..ef66d2b0ec37 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -467,9 +467,21 @@ struct rvt_driver_params { int nports; }; -/* - * Functions that drivers are required to support - */ +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; + u8 vl; + u8 log_pmtu; +}; + struct rvt_dev_info; struct rvt_driver_provided { /* @@ -478,23 +490,20 @@ struct rvt_driver_provided { * instead drivers are responsible for setting the correct callback for * this. */ + + /* -------------------*/ + /* Required functions */ + /* -------------------*/ int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - int (*check_ah)(struct ib_device *, struct ib_ah_attr *); -}; -/* Protection domain */ -struct rvt_pd { - struct ib_pd ibpd; - int user; /* non-zero if created from user space */ -}; - -/* Address handle */ -struct rvt_ah { - struct ib_ah ibah; - struct ib_ah_attr attr; - atomic_t refcount; + /*--------------------*/ + /* Optional functions */ + /*--------------------*/ + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, + struct rvt_ah *); }; struct rvt_dev_info { -- cgit v1.2.3 From b4e64397dabc946b83ffb1defa1215ede84c3b97 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:31 -0800 Subject: IB/rdmavt: Break rdma_vt main include header file up Until all functionality is moved over to rdmavt drivers still need to access a number of fields in data structures that are predominantly meant to be used by rdmavt. Once these rdmavt_.h header files are no longer being touched by drivers their content should be moved to rdmavt/.h. While here move a couple #defines over to more general IB verbs header files because they fit better. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 + include/rdma/rdma_vt.h | 309 +---------------------------------------------- include/rdma/rdmavt_mr.h | 130 ++++++++++++++++++++ include/rdma/rdmavt_qp.h | 262 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 396 insertions(+), 306 deletions(-) create mode 100644 include/rdma/rdmavt_mr.h create mode 100644 include/rdma/rdmavt_qp.h (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c8fea4..d7d531cf00b7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -613,6 +613,7 @@ enum { }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) +#define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000) enum ib_ah_flags { IB_AH_GRH = 1 diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index ef66d2b0ec37..79da8ee3e2b3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,10 +55,9 @@ #include #include -#include "ib_verbs.h" - -#define RVT_MULTICAST_LID_BASE 0xC000 -#define RVT_PERMISSIVE_LID 0xFFFF +#include +#include +#include /* * For some of the IBTA objects there will likely be some @@ -70,297 +69,6 @@ #define RVT_FLAG_QP_INIT_DRIVER BIT(2) #define RVT_FLAG_CQ_INIT_DRIVER BIT(3) -/* - * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once - * drivers no longer need access to the MR directly. - */ - -/* - * A segment is a linear region of low physical memory. - * Used by the verbs layer. - */ -struct rvt_seg { - void *vaddr; - size_t length; -}; - -/* The number of rvt_segs that fit in a page. */ -#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) - -struct rvt_segarray { - struct rvt_seg segs[RVT_SEGSZ]; -}; - -struct rvt_mregion { - struct ib_pd *pd; /* shares refcnt of ibmr.pd */ - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of rvt_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - u8 page_shift; /* 0 - non unform/non powerof2 sizes */ - u8 lkey_published; /* in global table */ - struct completion comp; /* complete when refcount goes to zero */ - atomic_t refcount; - struct rvt_segarray *map[0]; /* the segments */ -}; - -#define RVT_MAX_LKEY_TABLE_BITS 23 - -struct rvt_lkey_table { - spinlock_t lock; /* protect changes in this struct */ - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ - u32 max; /* size of the table */ - struct rvt_mregion __rcu **table; -}; - -/* End Memmory Region */ - -/* - * Things needed for the Queue Pair definition. Like the MR stuff above the - * following should probably get moved to qp.h once drivers stop trying to make - * and manipulate thier own QPs. For the few instnaces where a driver may need - * to look into a queue pair there should be a pointer to a driver priavte data - * structure that they can look at. - */ - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct rvt_sge { - struct rvt_mregion *mr; - void *vaddr; /* kernel virtual address of segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -/* - * Send work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->s_max_sge. - */ -struct rvt_swqe { - union { - struct ib_send_wr wr; /* don't use wr.sg_list */ - struct ib_ud_wr ud_wr; - struct ib_reg_wr reg_wr; - struct ib_rdma_wr rdma_wr; - struct ib_atomic_wr atomic_wr; - }; - u32 psn; /* first packet sequence number */ - u32 lpsn; /* last packet sequence number */ - u32 ssn; /* send sequence number */ - u32 length; /* total length of data in sg_list */ - struct rvt_sge sg_list[0]; -}; - -/* - * Receive work request queue entry. - * The size of the sg_list is determined when the QP (or SRQ) is created - * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). - */ -struct rvt_rwqe { - u64 wr_id; - u8 num_sge; - struct ib_sge sg_list[0]; -}; - -/* - * This structure is used to contain the head pointer, tail pointer, - * and receive work queue entries as a single memory allocation so - * it can be mmap'ed into user space. - * Note that the wq array elements are variable size so you can't - * just index into the array to get the N'th element; - * use get_rwqe_ptr() instead. - */ -struct rvt_rwq { - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ - struct rvt_rwqe wq[0]; -}; - -struct rvt_rq { - struct rvt_rwq *wq; - u32 size; /* size of RWQE array */ - u8 max_sge; - /* protect changes in this struct */ - spinlock_t lock ____cacheline_aligned_in_smp; -}; - -/* - * This structure is used by rvt_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct rvt_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - -#define RVT_MAX_RDMA_ATOMIC 16 - -/* - * This structure holds the information that the send tasklet needs - * to send a RDMA read response or atomic operation. - */ -struct rvt_ack_entry { - u8 opcode; - u8 sent; - u32 psn; - u32 lpsn; - union { - struct rvt_sge rdma_sge; - u64 atomic_data; - }; -}; - -struct rvt_sge_state { - struct rvt_sge *sg_list; /* next SGE to be used if any */ - struct rvt_sge sge; /* progress state for the current SGE */ - u32 total_len; - u8 num_sge; -}; - -/* - * Variables prefixed with s_ are for the requester (sender). - * Variables prefixed with r_ are for the responder (receiver). - * Variables prefixed with ack_ are for responder replies. - * - * Common variables are protected by both r_rq.lock and s_lock in that order - * which only happens in modify_qp() or changing the QP 'state'. - */ -struct rvt_qp { - struct ib_qp ibqp; - void *priv; /* Driver private data */ - /* read mostly fields above and below */ - struct ib_ah_attr remote_ah_attr; - struct ib_ah_attr alt_ah_attr; - struct rvt_qp __rcu *next; /* link list for QPN hash table */ - struct rvt_swqe *s_wq; /* send work queue */ - struct rvt_mmap_info *ip; - - unsigned long timeout_jiffies; /* computed from timeout */ - - enum ib_mtu path_mtu; - int srate_mbps; /* s_srate (below) converted to Mbit/s */ - u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ - u32 qkey; /* QKEY for this QP (for UD or RD) */ - u32 s_size; /* send work queue size */ - u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ - u32 s_ahgpsn; /* set to the psn in the copy of the header */ - - u8 state; /* QP state */ - u8 allowed_ops; /* high order bits of allowed opcodes */ - u8 qp_access_flags; - u8 alt_timeout; /* Alternate path timeout for this QP */ - u8 timeout; /* Timeout for this QP */ - u8 s_srate; - u8 s_mig_state; - u8 port_num; - u8 s_pkey_index; /* PKEY index to use */ - u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ - u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ - u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ - u8 s_retry_cnt; /* number of times to retry */ - u8 s_rnr_retry_cnt; - u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 s_max_sge; /* size of s_wq->sg_list */ - u8 s_draining; - - /* start of read/write fields */ - atomic_t refcount ____cacheline_aligned_in_smp; - wait_queue_head_t wait; - - struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; - struct rvt_sge_state s_rdma_read_sge; - - spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ - unsigned long r_aflags; - u64 r_wr_id; /* ID for current receive WQE */ - u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ - u32 r_len; /* total length of r_sge */ - u32 r_rcv_len; /* receive data len processed */ - u32 r_psn; /* expected rcv packet sequence number */ - u32 r_msn; /* message sequence number */ - - u8 r_state; /* opcode of last packet received */ - u8 r_flags; - u8 r_head_ack_queue; /* index into s_ack_queue[] */ - - struct list_head rspwait; /* link for waiting to respond */ - - struct rvt_sge_state r_sge; /* current receive data */ - struct rvt_rq r_rq; /* receive work queue */ - - spinlock_t s_lock ____cacheline_aligned_in_smp; - struct rvt_sge_state *s_cur_sge; - u32 s_flags; - struct rvt_swqe *s_wqe; - struct rvt_sge_state s_sge; /* current send request data */ - struct rvt_mregion *s_rdma_mr; - struct sdma_engine *s_sde; /* current sde */ - u32 s_cur_size; /* size of send packet in bytes */ - u32 s_len; /* total length of s_sge */ - u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ - u32 s_last_psn; /* last response PSN processed */ - u32 s_sending_psn; /* lowest PSN that is being sent */ - u32 s_sending_hpsn; /* highest PSN that is being sent */ - u32 s_psn; /* current packet sequence number */ - u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ - u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ - u32 s_tail; /* next entry to process */ - u32 s_cur; /* current work queue entry */ - u32 s_acked; /* last un-ACK'ed entry */ - u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ - u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ - u16 s_rdma_ack_cnt; - s8 s_ahgidx; - u8 s_state; /* opcode of last packet sent */ - u8 s_ack_state; /* opcode of packet to ACK */ - u8 s_nak_state; /* non-zero if NAK is pending */ - u8 r_nak_state; /* non-zero if NAK is pending */ - u8 s_retry; /* requester retry counter */ - u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ - u8 s_tail_ack_queue; /* index into s_ack_queue[] */ - - struct rvt_sge_state s_ack_rdma_sge; - struct timer_list s_timer; - - /* - * This sge list MUST be last. Do not add anything below here. - */ - struct rvt_sge r_sg_list[0] /* verified SGEs */ - ____cacheline_aligned_in_smp; -}; - -struct rvt_srq { - struct ib_srq ibsrq; - struct rvt_rq rq; - struct rvt_mmap_info *ip; - /* send signal when number of RWQEs < limit */ - u32 limit; -}; - -/* End QP section */ - struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -554,17 +262,6 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } -static inline void rvt_put_mr(struct rvt_mregion *mr) -{ - if (unlikely(atomic_dec_and_test(&mr->refcount))) - complete(&mr->comp); -} - -static inline void rvt_get_mr(struct rvt_mregion *mr) -{ - atomic_inc(&mr->refcount); -} - static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct rvt_srq, ibsrq); diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h new file mode 100644 index 000000000000..ea60476c6b6b --- /dev/null +++ b/include/rdma/rdmavt_mr.h @@ -0,0 +1,130 @@ +#ifndef DEF_RDMAVT_INCMR_H +#define DEF_RDMAVT_INCMR_H + +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m]