summaryrefslogtreecommitdiffstats
path: root/include/rdma
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 12:45:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 12:45:55 -0700
commit1684096b1ed813f621fb6cbd06e72235c1c2a0ca (patch)
tree13a228c35d6344f5d23b2c195aa3b026e42aac4b /include/rdma
parent16a12fa9aed176444fc795b09e796be41902bb08 (diff)
parent24b43c99647bf9be4995e6a6c9c3a923c147770a (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "More exchaustive description of primary updates in this release: - Lots of driver fixes and misc fixes across the board. - I had to base on a net-next tree because the IPoIB Accelorator patches needed it. Unfortunately, it was known to Mellanox that there would need to be an IPoIB accelorator patch to the net tree (which left some functions turned off by an #ifdef construct to avoid warnings about defined but unused functions), then one to the RDMA tree, then a fixup that went back and re-enabled the functions in the net tree and enabled their use in the rdma tree Also, a sparse fix was sent to the net tree after I did my pull, and the fixup patch conflicts quite directly with that sparse fix, so I'm going to submit the fixup patch towards the end of the merge window by itself and based upon your master branch at the time. - Two separate rounds of hfi1 fixes, one that got dropped from last release because it came in just a day or two before the end of the merge window and then the one from this release cycle. Of note is that I now have a third series that just landed from Intel yesterday. It is not included in this pull request, but I may submit it by the end of the week. I'll talk to Intel about improving the timing of thier submissions for my workflow. - Changes to our idr usage in the RDMA subsystem that will tie into our cgroup management and also into the upcoming changes for the RDMA kernel<->userspace API. - Addition of support for a netdev to be tied to an RDMA device at the core level - Addition of the VNIC driver from Intel. While IPoIB provides IP over InfiniBand (and *only* IP, no lower layer protocol headers are allowed or supported), the VNIC driver presents a virtual Ethernet device with support for things like varying Ethertypes, VLANs, priorities and other features of Ethernet. The virtual devices are centrally managed by the OPA fabric manager, making this (for the time being) a strictly OPA specific feature. - Improvements to the On-Demand Paging support in the RDMA subsystem. - Addition of three significant OPA changes. While we added OPA support some time ago (via the hfi1 driver), the RDMA subsystem has so far glossed over the areas where OPA and InfiniBand differ. With this release we are starting to add support for the OPA extensions into the RDMA core in the following area: Extended port information for OPA is now supported, extended Address Handle attributes for OPA are now supported, and extended SA Queries to get OPA specific subnet information is now supported. Concise summary from the tag: - idr usage and locking changes - build fix for hns - ipoib debug path record file fix - hfi1 updates - core RDMA netdev addition - Intel VNIC driver addition - Enhanced accelerators for IPoIB addition - Debug cleanups in cxgb3/4 - Trivial cleanups from SF Markus Elfring - Misc rxe fixes from Mellanox - Misc ipoib fixes from Mellanox - Lots of mlx4/mlx5 changes from Mellanox - Misc fixes across the RDMA subsystem - ODP paging fixes and improvements - qedr updates - hfi1 updates - OPA port info patches - OPA AH patches - OPA SA Query patches" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (191 commits) infiniband: avoid dereferencing uninitialized dst on error path IB/SA: Add OPA addr header IB/mlx5: Add port_xmit_wait to counter registers read IB/ocrdma: fix out of bounds access to local buffer IB/mlx4: Fix incorrect order of formal and actual parameters IB/mlx4: Change flush logic so it adheres to the variable name mlx5: Fix mlx5_ib_map_mr_sg mr length IB/rxe: Don't clamp residual length to mtu IB/SA: Add support to query OPA path records IB/SA: Add OPA path record type IB/SA: Split struct sa_path_rec based on IB and ROCE specific fields IB/SA: Introduce path record specific types IB/SA: Rename ib_sa_path_rec to sa_path_rec IB/CM: Add braces when using sizeof IB/core: Define 'opa' rdma_ah_attr type IB/core: Define 'ib' and 'roce' rdma_ah_attr types IB/core: Use rdma_ah_attr accessor functions IB/core: Add accessor functions for rdma_ah_attr fields IB/PVRDMA: Rename ib_ah_attr related functions IB/mthca: Rename to_ib_ah_attr to to_rdma_ah_attr ...
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib_cm.h14
-rw-r--r--include/rdma/ib_hdrs.h66
-rw-r--r--include/rdma/ib_mad.h40
-rw-r--r--include/rdma/ib_marshall.h6
-rw-r--r--include/rdma/ib_pack.h2
-rw-r--r--include/rdma/ib_sa.h304
-rw-r--r--include/rdma/ib_umem.h8
-rw-r--r--include/rdma/ib_umem_odp.h6
-rw-r--r--include/rdma/ib_verbs.h325
-rw-r--r--include/rdma/opa_addr.h79
-rw-r--r--include/rdma/opa_port_info.h3
-rw-r--r--include/rdma/opa_vnic.h141
-rw-r--r--include/rdma/rdma_cm.h4
-rw-r--r--include/rdma/rdma_cm_ib.h2
-rw-r--r--include/rdma/rdma_vt.h11
-rw-r--r--include/rdma/rdmavt_qp.h18
-rw-r--r--include/rdma/uverbs_std_types.h114
-rw-r--r--include/rdma/uverbs_types.h172
18 files changed, 1222 insertions, 93 deletions
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index b49258b16f4e..7979cb04f529 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -117,8 +117,8 @@ struct ib_cm_req_event_param {
u8 port;
- struct ib_sa_path_rec *primary_path;
- struct ib_sa_path_rec *alternate_path;
+ struct sa_path_rec *primary_path;
+ struct sa_path_rec *alternate_path;
__be64 remote_ca_guid;
u32 remote_qkey;
@@ -197,7 +197,7 @@ struct ib_cm_mra_event_param {
};
struct ib_cm_lap_event_param {
- struct ib_sa_path_rec *alternate_path;
+ struct sa_path_rec *alternate_path;
};
enum ib_cm_apr_status {
@@ -363,8 +363,8 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
__be64 service_id);
struct ib_cm_req_param {
- struct ib_sa_path_rec *primary_path;
- struct ib_sa_path_rec *alternate_path;
+ struct sa_path_rec *primary_path;
+ struct sa_path_rec *alternate_path;
__be64 service_id;
u32 qp_num;
enum ib_qp_type qp_type;
@@ -521,7 +521,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
* @private_data_len: Size of the private data buffer, in bytes.
*/
int ib_send_cm_lap(struct ib_cm_id *cm_id,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len);
@@ -565,7 +565,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
u8 private_data_len);
struct ib_cm_sidr_req_param {
- struct ib_sa_path_rec *path;
+ struct sa_path_rec *path;
__be64 service_id;
int timeout_ms;
const void *private_data;
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index c755325f0831..5519f31f043a 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -74,6 +74,12 @@
#define IB_GRH_FLOW_MASK 0xFFFFF
#define IB_GRH_FLOW_SHIFT 0
#define IB_GRH_NEXT_HDR 0x1B
+#define IB_FECN_SHIFT 31
+#define IB_FECN_MASK 1
+#define IB_FECN_SMASK BIT(IB_FECN_SHIFT)
+#define IB_BECN_SHIFT 30
+#define IB_BECN_MASK 1
+#define IB_BECN_SMASK BIT(IB_BECN_SHIFT)
#define IB_AETH_CREDIT_SHIFT 24
#define IB_AETH_CREDIT_MASK 0x1F
@@ -181,4 +187,64 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth)
ib_u64_put(val, &ateth->compare_data);
}
+/*
+ * 9B/IB Packet Format
+ */
+#define IB_LNH_MASK 3
+#define IB_SC_MASK 0xf
+#define IB_SC_SHIFT 12
+#define IB_SL_MASK 0xf
+#define IB_SL_SHIFT 4
+
+static inline u8 ib_get_lnh(struct ib_header *hdr)
+{
+ return (be16_to_cpu(hdr->lrh[0]) & IB_LNH_MASK);
+}
+
+static inline u8 ib_get_sc(struct ib_header *hdr)
+{
+ return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK);
+}
+
+static inline u8 ib_get_sl(struct ib_header *hdr)
+{
+ return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK);
+}
+
+static inline u16 ib_get_dlid(struct ib_header *hdr)
+{
+ return (be16_to_cpu(hdr->lrh[1]));
+}
+
+static inline u16 ib_get_slid(struct ib_header *hdr)
+{
+ return (be16_to_cpu(hdr->lrh[3]));
+}
+
+/*
+ * BTH
+ */
+#define IB_BTH_OPCODE_MASK 0xff
+#define IB_BTH_OPCODE_SHIFT 24
+#define IB_BTH_PAD_MASK 3
+#define IB_BTH_PKEY_MASK 0xffff
+#define IB_BTH_PAD_SHIFT 20
+
+static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
+{
+ return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) &
+ IB_BTH_PAD_MASK);
+}
+
+static inline u16 ib_bth_get_pkey(struct ib_other_headers *ohdr)
+{
+ return (be32_to_cpu(ohdr->bth[0]) & IB_BTH_PKEY_MASK);
+}
+
+static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr)
+{
+ return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_OPCODE_SHIFT) &
+ IB_BTH_OPCODE_MASK);
+}
+
#endif /* IB_HDRS_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 981214b3790c..d67b11b72029 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -262,6 +262,33 @@ struct ib_class_port_info {
__be32 trap_qkey;
};
+#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
+
+struct opa_class_port_info {
+ u8 base_version;
+ u8 class_version;
+ __be16 cap_mask;
+ __be32 cap_mask2_resp_time;
+
+ u8 redirect_gid[16];
+ __be32 redirect_tc_fl;
+ __be32 redirect_lid;
+ __be32 redirect_sl_qp;
+ __be32 redirect_qkey;
+
+ u8 trap_gid[16];
+ __be32 trap_tc_fl;
+ __be32 trap_lid;
+ __be32 trap_hl_qp;
+ __be32 trap_qkey;
+
+ __be16 trap_pkey;
+ __be16 redirect_pkey;
+
+ u8 trap_sl_rsvd;
+ u8 reserved[3];
+} __packed;
+
/**
* ib_get_cpi_resp_time - Returns the resp_time value from
* cap_mask2_resp_time in ib_class_port_info.
@@ -315,6 +342,17 @@ static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi,
IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
}
+/**
+ * opa_get_cpi_capmask2 - Returns the capmask2 value from
+ * cap_mask2_resp_time in ib_class_port_info.
+ * @cpi: A struct opa_class_port_info mad.
+ */
+static inline u32 opa_get_cpi_capmask2(struct opa_class_port_info *cpi)
+{
+ return (be32_to_cpu(cpi->cap_mask2_resp_time) >>
+ IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
+}
+
struct ib_mad_notice_attr {
u8 generic_type;
u8 prod_type_msb;
@@ -673,7 +711,7 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
* After invoking this routine, MAD services are no longer usable by the
* client on the associated QP.
*/
-int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
/**
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h
index db037205c9e8..68cef3bd50fb 100644
--- a/include/rdma/ib_marshall.h
+++ b/include/rdma/ib_marshall.h
@@ -42,12 +42,12 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src);
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
- struct ib_ah_attr *src);
+ struct rdma_ah_attr *src);
void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
- struct ib_sa_path_rec *src);
+ struct sa_path_rec *src);
-void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
struct ib_user_path_rec *src);
#endif /* IB_USER_MARSHALL_H */
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index b13419ce99ff..36655899ee02 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -80,6 +80,8 @@ enum {
IB_OPCODE_UD = 0x60,
/* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
IB_OPCODE_CNP = 0x80,
+ /* Manufacturer specific */
+ IB_OPCODE_MSP = 0xe0,
/* operations -- just used to define real constants */
IB_OPCODE_SEND_FIRST = 0x00,
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index fd0e53219f93..f5f70e345318 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -43,6 +43,8 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_addr.h>
+#include <rdma/opa_addr.h>
enum {
IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */
@@ -56,6 +58,7 @@ enum {
IB_SA_METHOD_GET_TRACE_TBL = 0x13
};
+#define OPA_SA_CLASS_VERSION 0x80
enum {
IB_SA_ATTR_CLASS_PORTINFO = 0x01,
IB_SA_ATTR_NOTICE = 0x02,
@@ -147,13 +150,45 @@ enum ib_sa_mc_join_states {
#define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21)
#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22)
-struct ib_sa_path_rec {
+enum sa_path_rec_type {
+ SA_PATH_REC_TYPE_IB,
+ SA_PATH_REC_TYPE_ROCE_V1,
+ SA_PATH_REC_TYPE_ROCE_V2,
+ SA_PATH_REC_TYPE_OPA
+};
+
+struct sa_path_rec_ib {
__be64 service_id;
- union ib_gid dgid;
- union ib_gid sgid;
__be16 dlid;
__be16 slid;
u8 raw_traffic;
+};
+
+struct sa_path_rec_roce {
+ u8 dmac[ETH_ALEN];
+ /* ignored in IB */
+ int ifindex;
+ /* ignored in IB */
+ struct net *net;
+
+};
+
+struct sa_path_rec_opa {
+ __be64 service_id;
+ __be32 dlid;
+ __be32 slid;
+ u8 raw_traffic;
+ u8 l2_8B;
+ u8 l2_10B;
+ u8 l2_9B;
+ u8 l2_16B;
+ u8 qos_type;
+ u8 qos_priority;
+};
+
+struct sa_path_rec {
+ union ib_gid dgid;
+ union ib_gid sgid;
/* reserved */
__be32 flow_label;
u8 hop_limit;
@@ -170,17 +205,109 @@ struct ib_sa_path_rec {
u8 packet_life_time_selector;
u8 packet_life_time;
u8 preference;
- u8 dmac[ETH_ALEN];
- /* ignored in IB */
- int ifindex;
- /* ignored in IB */
- struct net *net;
- enum ib_gid_type gid_type;
+ union {
+ struct sa_path_rec_ib ib;
+ struct sa_path_rec_roce roce;
+ struct sa_path_rec_opa opa;
+ };
+ enum sa_path_rec_type rec_type;
};
-static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
+static inline enum ib_gid_type
+ sa_conv_pathrec_to_gid_type(struct sa_path_rec *rec)
+{
+ switch (rec->rec_type) {
+ case SA_PATH_REC_TYPE_ROCE_V1:
+ return IB_GID_TYPE_ROCE;
+ case SA_PATH_REC_TYPE_ROCE_V2:
+ return IB_GID_TYPE_ROCE_UDP_ENCAP;
+ default:
+ return IB_GID_TYPE_IB;
+ }
+}
+
+static inline enum sa_path_rec_type
+ sa_conv_gid_to_pathrec_type(enum ib_gid_type type)
+{
+ switch (type) {
+ case IB_GID_TYPE_ROCE:
+ return SA_PATH_REC_TYPE_ROCE_V1;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ return SA_PATH_REC_TYPE_ROCE_V2;
+ default:
+ return SA_PATH_REC_TYPE_IB;
+ }
+}
+
+static inline void path_conv_opa_to_ib(struct sa_path_rec *ib,
+ struct sa_path_rec *opa)
+{
+ if ((be32_to_cpu(opa->opa.dlid) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE)) ||
+ (be32_to_cpu(opa->opa.slid) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE))) {
+ /* Create OPA GID and zero out the LID */
+ ib->dgid.global.interface_id
+ = OPA_MAKE_ID(be32_to_cpu(opa->opa.dlid));
+ ib->dgid.global.subnet_prefix
+ = opa->dgid.global.subnet_prefix;
+ ib->sgid.global.interface_id
+ = OPA_MAKE_ID(be32_to_cpu(opa->opa.slid));
+ ib->dgid.global.subnet_prefix
+ = opa->dgid.global.subnet_prefix;
+ ib->ib.dlid = 0;
+
+ ib->ib.slid = 0;
+ } else {
+ ib->ib.dlid = htons(ntohl(opa->opa.dlid));
+ ib->ib.slid = htons(ntohl(opa->opa.slid));
+ }
+ ib->ib.service_id = opa->opa.service_id;
+ ib->ib.raw_traffic = opa->opa.raw_traffic;
+}
+
+static inline void path_conv_ib_to_opa(struct sa_path_rec *opa,
+ struct sa_path_rec *ib)
{
- return rec->net ? dev_get_by_index(rec->net, rec->ifindex) : NULL;
+ __be32 slid, dlid;
+
+ if ((ib_is_opa_gid(&ib->sgid)) ||
+ (ib_is_opa_gid(&ib->dgid))) {
+ slid = htonl(opa_get_lid_from_gid(&ib->sgid));
+ dlid = htonl(opa_get_lid_from_gid(&ib->dgid));
+ } else {
+ slid = htonl(ntohs(ib->ib.slid));
+ dlid = htonl(ntohs(ib->ib.dlid));
+ }
+ opa->opa.slid = slid;
+ opa->opa.dlid = dlid;
+ opa->opa.service_id = ib->ib.service_id;
+ opa->opa.raw_traffic = ib->ib.raw_traffic;
+}
+
+/* Convert from OPA to IB path record */
+static inline void sa_convert_path_opa_to_ib(struct sa_path_rec *dest,
+ struct sa_path_rec *src)
+{
+ if (src->rec_type != SA_PATH_REC_TYPE_OPA)
+ return;
+
+ *dest = *src;
+ dest->rec_type = SA_PATH_REC_TYPE_IB;
+ path_conv_opa_to_ib(dest, src);
+}
+
+/* Convert from IB to OPA path record */
+static inline void sa_convert_path_ib_to_opa(struct sa_path_rec *dest,
+ struct sa_path_rec *src)
+{
+ if (src->rec_type != SA_PATH_REC_TYPE_IB)
+ return;
+
+ /* Do a structure copy and overwrite the relevant fields */
+ *dest = *src;
+ dest->rec_type = SA_PATH_REC_TYPE_OPA;
+ path_conv_ib_to_opa(dest, src);
}
#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
@@ -322,11 +449,11 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query);
int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
+ struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
- struct ib_sa_path_rec *resp,
+ struct sa_path_rec *resp,
void *context),
void *context,
struct ib_sa_query **query);
@@ -420,27 +547,27 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
- struct ib_ah_attr *ah_attr);
+ struct rdma_ah_attr *ah_attr);
/**
* ib_init_ah_from_path - Initialize address handle attributes based on an SA
* path record.
*/
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
- struct ib_ah_attr *ah_attr);
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr);
/**
* ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
* to IB MAD wire format.
*/
-void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute);
+void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute);
/**
* ib_sa_unpack_path - Convert a path record from MAD format to struct
* ib_sa_path_rec.
*/
-void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
+void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec);
/* Support GuidInfoRecord */
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
@@ -454,14 +581,137 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context,
struct ib_sa_query **sa_query);
-/* Support get SA ClassPortInfo */
-int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_class_port_info *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query);
+bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num);
+
+static inline bool sa_path_is_roce(struct sa_path_rec *rec)
+{
+ return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) ||
+ (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2));
+}
+
+static inline void sa_path_set_service_id(struct sa_path_rec *rec,
+ __be64 service_id)
+{
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+ rec->ib.service_id = service_id;
+ else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+ rec->opa.service_id = service_id;
+}
+
+static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid)
+{
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+ rec->ib.slid = htons(ntohl(slid));
+ else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+ rec->opa.slid = slid;
+}
+
+static inline void sa_path_set_dlid(struct sa_path_rec *rec, __be32 dlid)
+{
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+ rec->ib.dlid = htons(ntohl(dlid));
+ else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+ rec->opa.dlid = dlid;
+}
+
+static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec,
+ u8 raw_traffic)
+{
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+ rec->ib.raw_traffic = raw_traffic;
+ else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+ rec->opa.raw_traffic = raw_traffic;
+}
+
+static inline __be64 sa_path_get_service_id(struct sa_path_rec *rec)
+{
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+ return rec->ib.service_id;
+ else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+ return rec->opa.service_id;
+ return 0;
+}
+
+static inline __be32 sa_path_get_slid(struct sa_path_rec *rec)
+{
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+ return htonl(ntohs(rec->ib.slid));
+ else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+ return rec->opa.slid;
+ return 0;
+}
+
+static inline __be32 sa_path_get_dlid(struct sa_path_rec *rec)
+{
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+ return htonl(ntohs(rec->ib.dlid));
+ else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+ return rec->opa.dlid;
+ return 0;
+}
+
+static inline u8 sa_path_get_raw_traffic(struct sa_path_rec *rec)
+{
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+ return rec->ib.raw_traffic;
+ else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+ return rec->opa.raw_traffic;
+ return 0;
+}
+
+static inline void sa_path_set_dmac(struct sa_path_rec *rec, u8 *dmac)
+{
+ if (sa_path_is_roce(rec))
+ memcpy(rec->roce.dmac, dmac, ETH_ALEN);
+}
+
+static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec)
+{
+ if (sa_path_is_roce(rec))
+ eth_zero_addr(rec->roce.dmac);
+}
+
+static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex)
+{
+ if (sa_path_is_roce(rec))
+ rec->roce.ifindex = ifindex;
+}
+
+static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net)
+{
+ if (sa_path_is_roce(rec))
+ rec->roce.net = net;
+}
+
+static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec)
+{
+ if (sa_path_is_roce(rec))
+ return rec->roce.dmac;
+ return NULL;
+}
+
+static inline int sa_path_get_ifindex(struct sa_path_rec *rec)
+{
+ if (sa_path_is_roce(rec))
+ return rec->roce.ifindex;
+ return 0;
+}
+
+static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec)
+{
+ if (sa_path_is_roce(rec))
+ return rec->roce.net;
+ return NULL;
+}
+
+static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec)
+{
+ return sa_path_get_ndev(rec) ?
+ dev_get_by_index(sa_path_get_ndev(rec),
+ sa_path_get_ifindex(rec))
+ : NULL;
+}
#endif /* IB_SA_H */
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 2d83cfd7e6ce..23159dd5be18 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -44,7 +44,7 @@ struct ib_umem {
struct ib_ucontext *context;
size_t length;
unsigned long address;
- int page_size;
+ int page_shift;
int writable;
int hugetlb;
struct work_struct work;
@@ -60,7 +60,7 @@ struct ib_umem {
/* Returns the offset of the umem start relative to the first page. */
static inline int ib_umem_offset(struct ib_umem *umem)
{
- return umem->address & ((unsigned long)umem->page_size - 1);
+ return umem->address & (BIT(umem->page_shift) - 1);
}
/* Returns the first page of an ODP umem. */
@@ -72,12 +72,12 @@ static inline unsigned long ib_umem_start(struct ib_umem *umem)
/* Returns the address of the page after the last one of an ODP umem. */
static inline unsigned long ib_umem_end(struct ib_umem *umem)
{
- return PAGE_ALIGN(umem->address + umem->length);
+ return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
}
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{
- return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
+ return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift;
}
#ifdef CONFIG_INFINIBAND_USER_MEM
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 542cd8b3414c..fb67554aabd6 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -84,7 +84,8 @@ struct ib_umem_odp {
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
+ int access);
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
unsigned long addr,
size_t size);
@@ -154,7 +155,8 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline int ib_umem_odp_get(struct ib_ucontext *context,
- struct ib_umem *umem)
+ struct ib_umem *umem,
+ int access)
{
return -EINVAL;
}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 99e4423eb2b8..f0cb4906478a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -55,6 +55,7 @@
#include <net/ip.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/netdevice.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
@@ -224,6 +225,7 @@ enum ib_device_cap_flags {
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
+ IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
};
enum ib_signature_prot_cap {
@@ -431,7 +433,8 @@ enum ib_port_speed {
IB_SPEED_QDR = 4,
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
- IB_SPEED_EDR = 32
+ IB_SPEED_EDR = 32,
+ IB_SPEED_HDR = 64
};
/**
@@ -498,6 +501,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
/* Address format 0x000FF000 */
#define RDMA_CORE_CAP_AF_IB 0x00001000
#define RDMA_CORE_CAP_ETH_AH 0x00002000
+#define RDMA_CORE_CAP_OPA_AH 0x00004000
/* Protocol 0xFFF00000 */
#define RDMA_CORE_CAP_PROT_IB 0x00100000
@@ -836,15 +840,38 @@ struct ib_mr_status {
*/
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
+enum rdma_ah_attr_type {
+ RDMA_AH_ATTR_TYPE_IB,
+ RDMA_AH_ATTR_TYPE_ROCE,
+ RDMA_AH_ATTR_TYPE_OPA,
+};
+
struct ib_ah_attr {
- struct ib_global_route grh;
u16 dlid;
- u8 sl;
u8 src_path_bits;
+};
+
+struct roce_ah_attr {
+ u8 dmac[ETH_ALEN];
+};
+
+struct opa_ah_attr {
+ u32 dlid;
+ u8 src_path_bits;
+};
+
+struct rdma_ah_attr {
+ struct ib_global_route grh;
+ u8 sl;
u8 static_rate;
- u8 ah_flags;
u8 port_num;
- u8 dmac[ETH_ALEN];
+ u8 ah_flags;
+ enum rdma_ah_attr_type type;
+ union {
+ struct ib_ah_attr ib;
+ struct roce_ah_attr roce;
+ struct opa_ah_attr opa;
+ };
};
enum ib_wc_status {
@@ -1163,8 +1190,8 @@ struct ib_qp_attr {
u32 dest_qp_num;
int qp_access_flags;
struct ib_qp_cap cap;
- struct ib_ah_attr ah_attr;
- struct ib_ah_attr alt_ah_attr;
+ struct rdma_ah_attr ah_attr;
+ struct rdma_ah_attr alt_ah_attr;
u16 pkey_index;
u16 alt_pkey_index;
u8 en_sqd_async_notify;
@@ -1336,6 +1363,7 @@ enum ib_access_flags {
IB_ACCESS_MW_BIND = (1<<4),
IB_ZERO_BASED = (1<<5),
IB_ACCESS_ON_DEMAND = (1<<6),
+ IB_ACCESS_HUGETLB = (1<<7),
};
/*
@@ -1357,6 +1385,17 @@ struct ib_fmr_attr {
struct ib_umem;
+enum rdma_remove_reason {
+ /* Userspace requested uobject deletion. Call could fail */
+ RDMA_REMOVE_DESTROY,
+ /* Context deletion. This call should delete the actual object itself */
+ RDMA_REMOVE_CLOSE,
+ /* Driver is being hot-unplugged. This call should delete the actual object itself */
+ RDMA_REMOVE_DRIVER_REMOVE,
+ /* Context is being cleaned-up, but commit was just completed */
+ RDMA_REMOVE_DURING_CLEANUP,
+};
+
struct ib_rdmacg_object {
#ifdef CONFIG_CGROUP_RDMA
struct rdma_cgroup *cg; /* owner rdma cgroup */
@@ -1365,19 +1404,16 @@ struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
- struct list_head pd_list;
- struct list_head mr_list;
- struct list_head mw_list;
- struct list_head cq_list;
- struct list_head qp_list;
- struct list_head srq_list;
- struct list_head ah_list;
- struct list_head xrcd_list;
- struct list_head rule_list;
- struct list_head wq_list;
- struct list_head rwq_ind_tbl_list;
+ struct ib_uverbs_file *ufile;
int closing;
+ /* locking the uobjects_list */
+ struct mutex uobjects_lock;
+ struct list_head uobjects;
+ /* protects cleanup process from other actions */
+ struct rw_semaphore cleanup_rwsem;
+ enum rdma_remove_reason cleanup_reason;
+
struct pid *tgid;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct rb_root umem_tree;
@@ -1407,9 +1443,16 @@ struct ib_uobject {
struct ib_rdmacg_object cg_obj; /* rdmacg object */
int id; /* index into kernel idr */
struct kref ref;
- struct rw_semaphore mutex; /* protects .live */
+ atomic_t usecnt; /* protects exclusive access */
struct rcu_head rcu; /* kfree_rcu() overhead */
- int live;
+
+ const struct uverbs_obj_type *type;
+};
+
+struct ib_uobject_file {
+ struct ib_uobject uobj;
+ /* ufile contains the lock between context release and file close */
+ struct ib_uverbs_file *ufile;
};
struct ib_udata {
@@ -1447,6 +1490,7 @@ struct ib_ah {
struct ib_device *device;
struct ib_pd *pd;
struct ib_uobject *uobject;
+ enum rdma_ah_attr_type type;
};
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
@@ -1662,6 +1706,7 @@ enum ib_flow_spec_type {
IB_FLOW_SPEC_INNER = 0x100,
/* Actions */
IB_FLOW_SPEC_ACTION_TAG = 0x1000,
+ IB_FLOW_SPEC_ACTION_DROP = 0x1001,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@@ -1790,6 +1835,11 @@ struct ib_flow_spec_action_tag {
u32 tag_id;
};
+struct ib_flow_spec_action_drop {
+ enum ib_flow_spec_type type;
+ u16 size;
+};
+
union ib_flow_spec {
struct {
u32 type;
@@ -1802,6 +1852,7 @@ union ib_flow_spec {
struct ib_flow_spec_ipv6 ipv6;
struct ib_flow_spec_tunnel tunnel;
struct ib_flow_spec_action_tag flow_tag;
+ struct ib_flow_spec_action_drop drop;
};
struct ib_flow_attr {
@@ -1862,6 +1913,34 @@ struct ib_port_immutable {
u32 max_mad_size;
};
+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+ RDMA_NETDEV_OPA_VNIC,
+ RDMA_NETDEV_IPOIB,
+};
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+ void *clnt_priv;
+ struct ib_device *hca;
+ u8 port_num;
+
+ /* control functions */
+ void (*set_id)(struct net_device *netdev, int id);
+ /* send packet */
+ int (*send)(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn);
+ /* multicast */
+ int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *gid, u16 mlid,
+ int set_qkey, u32 qkey);
+ int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,