summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-09 15:53:03 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-09 15:53:03 -0800
commita50243b1ddcdd766d0d17fbfeeb1a22e62fdc461 (patch)
tree3dbf847105558eaac3658a46c4934df503c866a2 /include
parent2901752c14b8e1b7dd898d2e5245c93e531aa624 (diff)
parentfca22e7e595f1799cfbfdfa13e16d48ece0d136c (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "This has been a slightly more active cycle than normal with ongoing core changes and quite a lot of collected driver updates. - Various driver fixes for bnxt_re, cxgb4, hns, mlx5, pvrdma, rxe - A new data transfer mode for HFI1 giving higher performance - Significant functional and bug fix update to the mlx5 On-Demand-Paging MR feature - A chip hang reset recovery system for hns - Change mm->pinned_vm to an atomic64 - Update bnxt_re to support a new 57500 chip - A sane netlink 'rdma link add' method for creating rxe devices and fixing the various unregistration race conditions in rxe's unregister flow - Allow lookup up objects by an ID over netlink - Various reworking of the core to driver interface: - drivers should not assume umem SGLs are in PAGE_SIZE chunks - ucontext is accessed via udata not other means - start to make the core code responsible for object memory allocation - drivers should convert struct device to struct ib_device via a helper - drivers have more tools to avoid use after unregister problems" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (280 commits) net/mlx5: ODP support for XRC transport is not enabled by default in FW IB/hfi1: Close race condition on user context disable and close RDMA/umem: Revert broken 'off by one' fix RDMA/umem: minor bug fix in error handling path RDMA/hns: Use GFP_ATOMIC in hns_roce_v2_modify_qp cxgb4: kfree mhp after the debug print IB/rdmavt: Fix concurrency panics in QP post_send and modify to error IB/rdmavt: Fix loopback send with invalidate ordering IB/iser: Fix dma_nents type definition IB/mlx5: Set correct write permissions for implicit ODP MR bnxt_re: Clean cq for kernel consumers only RDMA/uverbs: Don't do double free of allocated PD RDMA: Handle ucontext allocations by IB/core RDMA/core: Fix a WARN() message bnxt_re: fix the regression due to changes in alloc_pbl IB/mlx4: Increase the timeout for CM cache IB/core: Abort page fault handler silently during owning process exit IB/mlx5: Validate correct PD before prefetch MR IB/mlx5: Protect against prefetch of invalid MR RDMA/uverbs: Store PR pointer before it is overwritten ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/cgroup_rdma.h2
-rw-r--r--include/linux/mlx5/driver.h5
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/scatterlist.h49
-rw-r--r--include/rdma/ib_hdrs.h14
-rw-r--r--include/rdma/ib_mad.h5
-rw-r--r--include/rdma/ib_umem.h8
-rw-r--r--include/rdma/ib_umem_odp.h34
-rw-r--r--include/rdma/ib_verbs.h274
-rw-r--r--include/rdma/iw_cm.h16
-rw-r--r--include/rdma/iw_portmap.h144
-rw-r--r--include/rdma/rdma_cm.h1
-rw-r--r--include/rdma/rdma_netlink.h11
-rw-r--r--include/rdma/rdma_vt.h30
-rw-r--r--include/rdma/rdmavt_qp.h20
-rw-r--r--include/rdma/restrack.h58
-rw-r--r--include/rdma/tid_rdma_defs.h108
-rw-r--r--include/rdma/uverbs_ioctl.h18
-rw-r--r--include/rdma/uverbs_std_types.h18
-rw-r--r--include/rdma/uverbs_types.h1
-rw-r--r--include/uapi/rdma/bnxt_re-abi.h11
-rw-r--r--include/uapi/rdma/ib_user_verbs.h2
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h18
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h5
-rw-r--r--include/uapi/rdma/rdma_netlink.h74
-rw-r--r--include/uapi/rdma/rdma_user_cm.h4
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h3
27 files changed, 588 insertions, 347 deletions
diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h
index e94290b29e99..ef1bae2983f3 100644
--- a/include/linux/cgroup_rdma.h
+++ b/include/linux/cgroup_rdma.h
@@ -39,7 +39,7 @@ struct rdmacg_device {
* APIs for RDMA/IB stack to publish when a device wants to
* participate in resource accounting
*/
-int rdmacg_register_device(struct rdmacg_device *device);
+void rdmacg_register_device(struct rdmacg_device *device);
void rdmacg_unregister_device(struct rdmacg_device *device);
/* APIs for RDMA/IB stack to charge/uncharge pool specific resources */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5ffb5df1a2c2..022541dc5dbf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -365,6 +365,7 @@ struct mlx5_core_sig_ctx {
enum {
MLX5_MKEY_MR = 1,
MLX5_MKEY_MW,
+ MLX5_MKEY_INDIRECT_DEVX,
};
struct mlx5_core_mkey {
@@ -960,10 +961,6 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
struct mlx5_odp_caps *odp_caps);
int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
u8 port_num, void *out, size_t sz);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
- u32 wq_num, u8 type, int error);
-#endif
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 86e7a7a46353..7eade9132f02 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -411,7 +411,7 @@ struct mm_struct {
unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
- unsigned long pinned_vm; /* Refcount permanently increased */
+ atomic64_t pinned_vm; /* Refcount permanently increased */
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index b96f0d0b5b8f..b4be960c7e5d 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -339,12 +339,12 @@ int sg_alloc_table_chained(struct sg_table *table, int nents,
/*
* sg page iterator
*
- * Iterates over sg entries page-by-page. On each successful iteration,
- * you can call sg_page_iter_page(@piter) and sg_page_iter_dma_address(@piter)
- * to get the current page and its dma address. @piter->sg will point to the
- * sg holding this page and @piter->sg_pgoffset to the page's page offset
- * within the sg. The iteration will stop either when a maximum number of sg
- * entries was reached or a terminating sg (sg_last(sg) == true) was reached.
+ * Iterates over sg entries page-by-page. On each successful iteration, you
+ * can call sg_page_iter_page(@piter) to get the current page and its dma
+ * address. @piter->sg will point to the sg holding this page and
+ * @piter->sg_pgoffset to the page's page offset within the sg. The iteration
+ * will stop either when a maximum number of sg entries was reached or a
+ * terminating sg (sg_last(sg) == true) was reached.
*/
struct sg_page_iter {
struct scatterlist *sg; /* sg holding the page */
@@ -356,7 +356,19 @@ struct sg_page_iter {
* next step */
};
+/*
+ * sg page iterator for DMA addresses
+ *
+ * This is the same as sg_page_iter however you can call
+ * sg_page_iter_dma_address(@dma_iter) to get the page's DMA
+ * address. sg_page_iter_page() cannot be called on this iterator.
+ */
+struct sg_dma_page_iter {
+ struct sg_page_iter base;
+};
+
bool __sg_page_iter_next(struct sg_page_iter *piter);
+bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter);
void __sg_page_iter_start(struct sg_page_iter *piter,
struct scatterlist *sglist, unsigned int nents,
unsigned long pgoffset);
@@ -372,11 +384,13 @@ static inline struct page *sg_page_iter_page(struct sg_page_iter *piter)
/**
* sg_page_iter_dma_address - get the dma address of the current page held by
* the page iterator.
- * @piter: page iterator holding the page
+ * @dma_iter: page iterator holding the page
*/
-static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter)
+static inline dma_addr_t
+sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter)
{
- return sg_dma_address(piter->sg) + (piter->sg_pgoffset << PAGE_SHIFT);
+ return sg_dma_address(dma_iter->base.sg) +
+ (dma_iter->base.sg_pgoffset << PAGE_SHIFT);
}
/**
@@ -385,11 +399,28 @@ static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter)
* @piter: page iterator to hold current page, sg, sg_pgoffset
* @nents: maximum number of sg entries to iterate over
* @pgoffset: starting page offset
+ *
+ * Callers may use sg_page_iter_page() to get each page pointer.
*/
#define for_each_sg_page(sglist, piter, nents, pgoffset) \
for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \
__sg_page_iter_next(piter);)
+/**
+ * for_each_sg_dma_page - iterate over the pages of the given sg list
+ * @sglist: sglist to iterate over
+ * @dma_iter: page iterator to hold current page
+ * @dma_nents: maximum number of sg entries to iterate over, this is the value
+ * returned from dma_map_sg
+ * @pgoffset: starting page offset
+ *
+ * Callers may use sg_page_iter_dma_address() to get each page's DMA address.
+ */
+#define for_each_sg_dma_page(sglist, dma_iter, dma_nents, pgoffset) \
+ for (__sg_page_iter_start(&(dma_iter)->base, sglist, dma_nents, \
+ pgoffset); \
+ __sg_page_iter_dma_next(dma_iter);)
+
/*
* Mapping sg iterator
*
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 6e35416170a3..9a90bd031e8c 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -100,6 +100,8 @@ struct ib_atomic_eth {
__be64 compare_data; /* potentially unaligned */
} __packed;
+#include <rdma/tid_rdma_defs.h>
+
union ib_ehdrs {
struct {
__be32 deth[2];
@@ -117,6 +119,16 @@ union ib_ehdrs {
__be32 aeth;
__be32 ieth;
struct ib_atomic_eth atomic_eth;
+ /* TID RDMA headers */
+ union {
+ struct tid_rdma_read_req r_req;
+ struct tid_rdma_read_resp r_rsp;
+ struct tid_rdma_write_req w_req;
+ struct tid_rdma_write_resp w_rsp;
+ struct tid_rdma_write_data w_data;
+ struct tid_rdma_resync resync;
+ struct tid_rdma_ack ack;
+ } tid_rdma;
} __packed;
struct ib_other_headers {
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index fdef558e3a2d..79ba8219e7dc 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -616,12 +616,11 @@ struct ib_mad_agent {
void *context;
u32 hi_tid;
u32 flags;
+ void *security;
+ struct list_head mad_agent_sec_list;
u8 port_num;
u8 rmpp_version;
- void *security;
bool smp_allowed;
- bool lsm_nb_reg;
- struct notifier_block lsm_nb;
};
/**
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 5d3755ec5afa..73af05db04c7 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -36,6 +36,7 @@
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
+#include <rdma/ib_verbs.h>
struct ib_ucontext;
struct ib_umem_odp;
@@ -80,7 +81,7 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem)
#ifdef CONFIG_INFINIBAND_USER_MEM
-struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
size_t size, int access, int dmasync);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
@@ -91,9 +92,10 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
#include <linux/err.h>
-static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
+static inline struct ib_umem *ib_umem_get(struct ib_udata *udata,
unsigned long addr, size_t size,
- int access, int dmasync) {
+ int access, int dmasync)
+{
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 0b1446fe2fab..dadc96dea39c 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -83,6 +83,19 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
return container_of(umem, struct ib_umem_odp, umem);
}
+/*
+ * The lower 2 bits of the DMA address signal the R/W permissions for
+ * the entry. To upgrade the permissions, provide the appropriate
+ * bitmask to the map_dma_pages function.
+ *
+ * Be aware that upgrading a mapped address might result in change of
+ * the DMA address for the page.
+ */
+#define ODP_READ_ALLOWED_BIT (1<<0ULL)
+#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
+
+#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_ucontext_per_mm {
@@ -103,23 +116,10 @@ struct ib_ucontext_per_mm {
};
int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem,
unsigned long addr, size_t size);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
-/*
- * The lower 2 bits of the DMA address signal the R/W permissions for
- * the entry. To upgrade the permissions, provide the appropriate
- * bitmask to the map_dma_pages function.
- *
- * Be aware that upgrading a mapped address might result in change of
- * the DMA address for the page.
- */
-#define ODP_READ_ALLOWED_BIT (1<<0ULL)
-#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
-
-#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
-
int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bcnt, u64 access_mask,
unsigned long current_seq);
@@ -169,12 +169,6 @@ static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
return -EINVAL;
}
-static inline struct ib_umem_odp *
-ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size)
-{
- return ERR_PTR(-EINVAL);
-}
-
static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 80debf5982ac..9b9e17bcc201 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -238,6 +238,7 @@ enum ib_device_cap_flags {
IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
/* The device supports padding incoming writes to cacheline. */
IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36),
+ IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
};
enum ib_signature_prot_cap {
@@ -268,6 +269,7 @@ enum ib_odp_transport_cap_bits {
IB_ODP_SUPPORT_WRITE = 1 << 2,
IB_ODP_SUPPORT_READ = 1 << 3,
IB_ODP_SUPPORT_ATOMIC = 1 << 4,
+ IB_ODP_SUPPORT_SRQ_RECV = 1 << 5,
};
struct ib_odp_caps {
@@ -276,6 +278,7 @@ struct ib_odp_caps {
uint32_t rc_odp_caps;
uint32_t uc_odp_caps;
uint32_t ud_odp_caps;
+ uint32_t xrc_odp_caps;
} per_transport_caps;
};
@@ -1504,12 +1507,10 @@ struct ib_ucontext {
bool cleanup_retryable;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void (*invalidate_range)(struct ib_umem_odp *umem_odp,
unsigned long start, unsigned long end);
struct mutex per_mm_list_lock;
struct list_head per_mm_list;
-#endif
struct ib_rdmacg_object cg_obj;
/*
@@ -2186,7 +2187,6 @@ struct ib_port_cache {
struct ib_cache {
rwlock_t lock;
struct ib_event_handler event_handler;
- struct ib_port_cache *ports;
};
struct iw_cm_verbs;
@@ -2198,6 +2198,21 @@ struct ib_port_immutable {
u32 max_mad_size;
};
+struct ib_port_data {
+ struct ib_device *ib_dev;
+
+ struct ib_port_immutable immutable;
+
+ spinlock_t pkey_list_lock;
+ struct list_head pkey_list;
+
+ struct ib_port_cache cache;
+
+ spinlock_t netdev_lock;
+ struct net_device __rcu *netdev;
+ struct hlist_node ndev_hash_link;
+};
+
/* rdma netdev type - specifies protocol type */
enum rdma_netdev_t {
RDMA_NETDEV_OPA_VNIC,
@@ -2243,12 +2258,6 @@ struct rdma_netdev_alloc_params {
struct net_device *netdev, void *param);
};
-struct ib_port_pkey_list {
- /* Lock to hold while modifying the list. */
- spinlock_t list_lock;
- struct list_head pkey_list;
-};
-
struct ib_counters {
struct ib_device *device;
struct ib_uobject *uobject;
@@ -2264,6 +2273,19 @@ struct ib_counters_read_attr {
struct uverbs_attr_bundle;
+#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \
+ .size_##ib_struct = \
+ (sizeof(struct drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \
+ BUILD_BUG_ON_ZERO( \
+ !__same_type(((struct drv_struct *)NULL)->member, \
+ struct ib_struct)))
+
+#define rdma_zalloc_drv_obj(ib_dev, ib_type) \
+ ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, GFP_KERNEL))
+
+#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
+
/**
* struct ib_device_ops - InfiniBand device operations
* This structure defines all the InfiniBand device operations, providers will
@@ -2367,15 +2389,14 @@ struct ib_device_ops {
int (*del_gid)(const struct ib_gid_attr *attr, void **context);
int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
u16 *pkey);
- struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device,
- struct ib_udata *udata);
- int (*dealloc_ucontext)(struct ib_ucontext *context);
+ int (*alloc_ucontext)(struct ib_ucontext *context,
+ struct ib_udata *udata);
+ void (*dealloc_ucontext)(struct ib_ucontext *context);
int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
- struct ib_pd *(*alloc_pd)(struct ib_device *device,
- struct ib_ucontext *context,
- struct ib_udata *udata);
- int (*dealloc_pd)(struct ib_pd *pd);
+ int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context,
+ struct ib_udata *udata);
+ void (*dealloc_pd)(struct ib_pd *pd);
struct ib_ah *(*create_ah)(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr, u32 flags,
struct ib_udata *udata);
@@ -2506,34 +2527,57 @@ struct ib_device_ops {
*/
int (*get_hw_stats)(struct ib_device *device,
struct rdma_hw_stats *stats, u8 port, int index);
+ /*
+ * This function is called once for each port when a ib device is
+ * registered.
+ */
+ int (*init_port)(struct ib_device *device, u8 port_num,
+ struct kobject *port_sysfs);
+ /**
+ * Allows rdma drivers to add their own restrack attributes.
+ */
+ int (*fill_res_entry)(struct sk_buff *msg,
+ struct rdma_restrack_entry *entry);
+
+ /* Device lifecycle callbacks */
+ /*
+ * Called after the device becomes registered, before clients are
+ * attached
+ */
+ int (*enable_driver)(struct ib_device *dev);
+ /*
+ * This is called as part of ib_dealloc_device().
+ */
+ void (*dealloc_driver)(struct ib_device *dev);
+
+ DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
};
+struct rdma_restrack_root;
+
struct ib_device {
/* Do not access @dma_device directly from ULP nor from HW drivers. */
struct device *dma_device;
struct ib_device_ops ops;
char name[IB_DEVICE_NAME_MAX];
+ struct rcu_head rcu_head;
struct list_head event_handler_list;
spinlock_t event_handler_lock;
- rwlock_t client_data_lock;
- struct list_head core_list;
- /* Access to the client_data_list is protected by the client_data_lock
- * rwlock and the lists_rwsem read-write semaphore
- */
- struct list_head client_data_list;
+ struct rw_semaphore client_data_rwsem;
+ struct xarray client_data;
+ struct mutex unregistration_lock;
struct ib_cache cache;
/**
- * port_immutable is indexed by port number
+ * port_data is indexed by port number
*/
- struct ib_port_immutable *port_immutable;
+ struct ib_port_data *port_data;
int num_comp_vectors;
- struct ib_port_pkey_list *port_pkey_list;
-
struct iw_cm_verbs *iwcm;
struct module *owner;
@@ -2547,12 +2591,6 @@ struct ib_device {
struct kobject *ports_kobj;
struct list_head port_list;
- enum {
- IB_DEV_UNINITIALIZED,
- IB_DEV_REGISTERED,
- IB_DEV_UNREGISTERED
- } reg_state;
-
int uverbs_abi_ver;
u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask;
@@ -2561,6 +2599,8 @@ struct ib_device {
__be64 node_guid;
u32 local_dma_lkey;
u16 is_switch:1;
+ /* Indicates kernel verbs support, should not be used in drivers */
+ u16 kverbs_provider:1;
u8 node_type;
u8 phys_port_cnt;
struct ib_device_attr attrs;
@@ -2572,10 +2612,7 @@ struct ib_device {
#endif
u32 index;
- /*
- * Implementation details of the RDMA core, don't use in drivers
- */
- struct rdma_restrack_root res;
+ struct rdma_restrack_root *res;
const struct uapi_definition *driver_def;
enum rdma_driver_id driver_id;
@@ -2586,10 +2623,13 @@ struct ib_device {
*/
refcount_t refcount;
struct completion unreg_completion;
+ struct work_struct unregistration_work;
+
+ const struct rdma_link_ops *link_ops;
};
struct ib_client {
- char *name;
+ const char *name;
void (*add) (struct ib_device *);
void (*remove)(struct ib_device *, void *client_data);
@@ -2616,22 +2656,47 @@ struct ib_client {
const struct sockaddr *addr,
void *client_data);
struct list_head list;
+ u32 client_id;
+
+ /* kverbs are not required by the client */
+ u8 no_kverbs_req:1;
};
-struct ib_device *ib_alloc_device(size_t size);
+struct ib_device *_ib_alloc_device(size_t size);
+#define ib_alloc_device(drv_struct, member) \
+ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof( \
+ struct drv_struct, member))), \
+ struct drv_struct, member)
+
void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
-int ib_register_device(struct ib_device *device, const char *name,
- int (*port_callback)(struct ib_device *, u8,
- struct kobject *));
+int ib_register_device(struct ib_device *device, const char *name);
void ib_unregister_device(struct ib_device *device);
+void ib_unregister_driver(enum rdma_driver_id driver_id);
+void ib_unregister_device_and_put(struct ib_device *device);
+void ib_unregister_device_queued(struct ib_device *ib_dev);
int ib_register_client (struct ib_client *client);
void ib_unregister_client(struct ib_client *client);
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns the client context data set with
+ * ib_set_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
+ */
+static inline void *ib_get_client_data(struct ib_device *device,
+ struct ib_client *client)
+{
+ return xa_load(&device->client_data, client->client_id);
+}
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
void ib_set_device_ops(struct ib_device *device,
@@ -2790,6 +2855,16 @@ static inline u8 rdma_start_port(const struct ib_device *device)
}
/**
+ * rdma_for_each_port - Iterate over all valid port numbers of the IB device
+ * @device - The struct ib_device * to iterate over
+ * @iter - The unsigned int to store the port number
+ */
+#define rdma_for_each_port(device, iter) \
+ for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \
+ unsigned int, iter))); \
+ iter <= rdma_end_port(device); (iter)++)
+
+/**
* rdma_end_port - Return the last valid port number for the device
* specified
*
@@ -2812,34 +2887,38 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
static inline bool rdma_is_grh_required(const struct ib_device *device,
u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags &
- RDMA_CORE_PORT_IB_GRH_REQUIRED;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_PORT_IB_GRH_REQUIRED;
}
static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_IB;
}
static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags &
- (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+ return device->port_data[port_num].immutable.core_cap_flags &
+ (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
}
static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
}
static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_ROCE;
}
static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_IWARP;
}
static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
@@ -2850,12 +2929,14 @@ static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_RAW_PACKET;
}
static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_USNIC;
}
/**
@@ -2872,7 +2953,8 @@ static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_n
*/
static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_MAD;
}
/**
@@ -2896,8 +2978,8 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
*/
static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
{
- return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
- == RDMA_CORE_CAP_OPA_MAD;
+ return (device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_OPA_MAD) == RDMA_CORE_CAP_OPA_MAD;
}
/**
@@ -2922,7 +3004,8 @@ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
*/
static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_SMI;
}
/**
@@ -2942,7 +3025,8 @@ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
*/
static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_CM;
}
/**
@@ -2959,7 +3043,8 @@ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
*/
static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IW_CM;
}
/**
@@ -2979,7 +3064,8 @@ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
*/
static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_SA;
}
/**
@@ -3019,7 +3105,8 @@ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num
*/
static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_AF_IB;
}
/**
@@ -3040,7 +3127,8 @@ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
*/
static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_ETH_AH;
}
/**
@@ -3054,7 +3142,7 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
*/
static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
{
- return (device->port_immutable[port_num].core_cap_flags &
+ return (device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
}
@@ -3072,7 +3160,7 @@ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
*/
static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].max_mad_size;
+ return device->port_data[port_num].immutable.max_mad_size;