From 96f87ee1811306d0c8cf94b8c37b0e4f725b01d1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Jan 2019 16:07:23 +0200 Subject: RDMA: Clean structures from CONFIG_INFINIBAND_ON_DEMAND_PAGING CONFIG_INFINIBAND_ON_DEMAND_PAGING is used in general structures to micro-optimize the memory footprint. Remove it, so it will allow us to simplify various ODP device flows. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a3ceed3a040a..3ddd199ba602 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1504,12 +1504,10 @@ struct ib_ucontext { bool cleanup_retryable; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void (*invalidate_range)(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end); struct mutex per_mm_list_lock; struct list_head per_mm_list; -#endif struct ib_rdmacg_object cg_obj; /* -- cgit v1.2.3 From 13859d5df418ea535926e2b57c29d5161c522b9d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Jan 2019 16:07:26 +0200 Subject: RDMA/mlx5: Embed into the code flow the ODP config option Convert various places to more readable code, which embeds CONFIG_INFINIBAND_ON_DEMAND_PAGING into the code flow. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_umem_odp.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 0b1446fe2fab..d3725cf13ecd 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -83,6 +83,19 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) return container_of(umem, struct ib_umem_odp, umem); } +/* + * The lower 2 bits of the DMA address signal the R/W permissions for + * the entry. To upgrade the permissions, provide the appropriate + * bitmask to the map_dma_pages function. + * + * Be aware that upgrading a mapped address might result in change of + * the DMA address for the page. + */ +#define ODP_READ_ALLOWED_BIT (1<<0ULL) +#define ODP_WRITE_ALLOWED_BIT (1<<1ULL) + +#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_ucontext_per_mm { @@ -107,19 +120,6 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, unsigned long addr, size_t size); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); -/* - * The lower 2 bits of the DMA address signal the R/W permissions for - * the entry. To upgrade the permissions, provide the appropriate - * bitmask to the map_dma_pages function. - * - * Be aware that upgrading a mapped address might result in change of - * the DMA address for the page. - */ -#define ODP_READ_ALLOWED_BIT (1<<0ULL) -#define ODP_WRITE_ALLOWED_BIT (1<<1ULL) - -#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) - int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bcnt, u64 access_mask, unsigned long current_seq); -- cgit v1.2.3 From 0ada768517dafa1504ef5986ba04f118b7436960 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Jan 2019 16:07:27 +0200 Subject: RDMA/mlx5: Delete declaration of already removed function The implementation of mlx5_core_page_fault_resume() was removed in commit d5d284b829a6 ("{net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA"). This patch removes declaration too. Fixes: d5d284b829a6 ("{net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..b6f5839f129a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -939,10 +939,6 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, size_t sz); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error); -#endif int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); -- cgit v1.2.3 From b0ea0fa5435f9df7213a9af098558f7dd584d8e8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Jan 2019 11:15:16 +0200 Subject: IB/{core,hw}: Have ib_umem_get extract the ib_ucontext from ib_udata ib_umem_get() can only be called in a method callback, which always has a udata parameter. This allows ib_umem_get() to derive the ucontext pointer directly from the udata without requiring the drivers to find it in some way or another. Signed-off-by: Jason Gunthorpe Signed-off-by: Shamir Rabinovitch --- include/rdma/ib_umem.h | 8 +++++--- include/rdma/ib_verbs.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 5d3755ec5afa..73af05db04c7 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -36,6 +36,7 @@ #include #include #include +#include struct ib_ucontext; struct ib_umem_odp; @@ -80,7 +81,7 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem) #ifdef CONFIG_INFINIBAND_USER_MEM -struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, +struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, size_t size, int access, int dmasync); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); @@ -91,9 +92,10 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, #include -static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context, +static inline struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, size_t size, - int access, int dmasync) { + int access, int dmasync) +{ return ERR_PTR(-EINVAL); } static inline void ib_umem_release(struct ib_umem *umem) { } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3ddd199ba602..aa1f126d3383 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4200,6 +4200,7 @@ void rdma_roce_rescan_device(struct ib_device *ibdev); struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +struct ib_ucontext *rdma_get_ucontext(struct ib_udata *udata); int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); -- cgit v1.2.3 From ea4baf7f116a18382df331db2123d98bc1c3cd83 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 18 Dec 2018 14:28:30 +0200 Subject: RDMA: Rename port_callback to init_port Most provider routines are callback routines which ib core invokes. _callback suffix doesn't convey information about when such callback is invoked. Therefore, rename port_callback to init_port. Additionally, store the init_port function pointer in ib_device_ops, so that it can be accessed in subsequent patches when binding rdma device to net namespace. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 10 +++++++--- include/rdma/rdma_vt.h | 3 --- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index aa1f126d3383..1d1902fd9f87 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2504,6 +2504,12 @@ struct ib_device_ops { */ int (*get_hw_stats)(struct ib_device *device, struct rdma_hw_stats *stats, u8 port, int index); + /* + * This function is called once for each port when a ib device is + * registered. + */ + int (*init_port)(struct ib_device *device, u8 port_num, + struct kobject *port_sysfs); }; struct ib_device { @@ -2620,9 +2626,7 @@ void ib_dealloc_device(struct ib_device *device); void ib_get_device_fw_str(struct ib_device *device, char *str); -int ib_register_device(struct ib_device *device, const char *name, - int (*port_callback)(struct ib_device *, u8, - struct kobject *)); +int ib_register_device(struct ib_device *device, const char *name); void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dd0ed8048bb4..acb3bc96dfa7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -250,9 +250,6 @@ struct rvt_driver_provided { */ void (*do_send)(struct rvt_qp *qp); - /* Passed to ib core registration. Callback to create syfs files */ - int (*port_callback)(struct ib_device *, u8, struct kobject *); - /* * Returns a pointer to the undelying hardware's PCI device. This is * used to display information as to what hardware is being referenced -- cgit v1.2.3 From 54747231150f0dddf68f2ee29ec2970fcc433909 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 18 Dec 2018 14:15:56 +0200 Subject: RDMA: Introduce and use rdma_device_to_ibdev() Introduce and use rdma_device_to_ibdev() API for those drivers which are registering one sysfs group and also use in ib_core. In subsequent patch, device->provider_ibdev one-to-one mapping is no longer holds true during accessing sysfs entries. Therefore, introduce an API rdma_device_to_ibdev() that provides such information. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1d1902fd9f87..94b6e1dd4dab 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4241,4 +4241,27 @@ rdma_set_device_sysfs_group(struct ib_device *dev, dev->groups[1] = group; } +/** + * rdma_device_to_ibdev - Get ib_device pointer from device pointer + * + * @device: device pointer for which ib_device pointer to retrieve + * + * rdma_device_to_ibdev() retrieves ib_device pointer from device. + * + */ +static inline struct ib_device *rdma_device_to_ibdev(struct device *device) +{ + return container_of(device, struct ib_device, dev); +} + +/** + * rdma_device_to_drv_device - Helper macro to reach back to driver's + * ib_device holder structure from device pointer. + * + * NOTE: New drivers should not make use of this API; This API is only for + * existing drivers who have exposed sysfs entries using + * rdma_set_device_sysfs_group(). + */ +#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ + container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 7527a7b157d1191b23562ed70154ae93bd65f845 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 17 Jan 2019 20:14:15 +0200 Subject: IB/core: Simplify rdma cgroup registration RDMA cgroup registration routine always returns success, so simplify function to be void and run clang formatter over whole CONFIG_CGROUP_RDMA art of core_priv.h. This reduces unwinding error path for regular registration and future net namespace change functionality for rdma device. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Acked-by: Tejun Heo Signed-off-by: Jason Gunthorpe --- include/linux/cgroup_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h index e94290b29e99..ef1bae2983f3 100644 --- a/include/linux/cgroup_rdma.h +++ b/include/linux/cgroup_rdma.h @@ -39,7 +39,7 @@ struct rdmacg_device { * APIs for RDMA/IB stack to publish when a device wants to * participate in resource accounting */ -int rdmacg_register_device(struct rdmacg_device *device); +void rdmacg_register_device(struct rdmacg_device *device); void rdmacg_unregister_device(struct rdmacg_device *device); /* APIs for RDMA/IB stack to charge/uncharge pool specific resources */ -- cgit v1.2.3 From 534fd7aac56a7994d16032f32123def9923e339f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 13 Jan 2019 16:01:17 +0200 Subject: IB/mlx5: Manage indirection mkey upon DEVX flow for ODP Manage indirection mkey upon DEVX flow to support ODP. To support a page fault event on the indirection mkey it needs to be part of the device mkey radix tree. Both the creation and the deletion flows for a DEVX object which is indirection mkey were adapted to handle that. Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b6f5839f129a..619d6fee96a1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -364,6 +364,7 @@ struct mlx5_core_sig_ctx { enum { MLX5_MKEY_MR = 1, MLX5_MKEY_MW, + MLX5_MKEY_INDIRECT_DEVX, }; struct mlx5_core_mkey { -- cgit v1.2.3 From da6a496a34f2fdcab14362cdc5068aac385e7b47 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 09:16:08 +0200 Subject: IB/mlx5: Ranges in implicit ODP MR inherit its write access A sub-range in ODP implicit MR should take its write permission from the MR and not be set always to allow. Fixes: d07d1d70ce1a ("IB/umem: Update on demand page (ODP) support") Signed-off-by: Moni Shoua Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_umem_odp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index d3725cf13ecd..d0024f53626e 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -116,7 +116,7 @@ struct ib_ucontext_per_mm { }; int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, +struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem, unsigned long addr, size_t size); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); -- cgit v1.2.3 From 61b2fe3c62e5269408e264b2348f96467246d537 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 09:16:09 +0200 Subject: IB/mlx5: Remove dead code When CONFIG_INFINIBAND_ON_DEMAND_PAGING is not set there is no caller to ib_alloc_odp_umem() so let's remove it. Signed-off-by: Moni Shoua Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_umem_odp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index d0024f53626e..dadc96dea39c 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -169,12 +169,6 @@ static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) return -EINVAL; } -static inline struct ib_umem_odp * -ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size) -{ - return ERR_PTR(-EINVAL); -} - static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -- cgit v1.2.3 From 6bf8f22aea0ddd93af822aed8afeeee4acdf7694 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 22 Jan 2019 08:29:56 +0200 Subject: IB/mlx5: Introduce MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD Introduce MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD and its initial implementation. This object is from type class FD and will be used to read DEVX async commands completion. The core layer should allow the driver to set object from type FD in a safe mode, this option was added with a matching comment in place. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_types.h | 1 + include/uapi/rdma/mlx5_user_ioctl_cmds.h | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index acb1bfa3cc99..175d761695e1 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -157,6 +157,7 @@ struct uverbs_obj_fd_type { extern const struct uverbs_obj_type_class uverbs_idr_class; extern const struct uverbs_obj_type_class uverbs_fd_class; +void uverbs_close_fd(struct file *f); #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index b8d121d457f1..6ceae29d77cd 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -113,11 +113,20 @@ enum mlx5_ib_devx_umem_methods { MLX5_IB_METHOD_DEVX_UMEM_DEREG, }; +enum mlx5_ib_devx_async_cmd_fd_alloc_attrs { + MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_async_cmd_fd_methods { + MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT), +}; + enum mlx5_ib_objects { MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_OBJECT_DEVX_OBJ, MLX5_IB_OBJECT_DEVX_UMEM, MLX5_IB_OBJECT_FLOW_MATCHER, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, }; enum mlx5_ib_flow_matcher_create_attrs { -- cgit v1.2.3 From a124edba26270697540f1058bfcd490c1c65b116 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 22 Jan 2019 08:29:57 +0200 Subject: IB/mlx5: Introduce async DEVX obj query API Introduce async DEVX obj query API to get the command response back to user space once it's ready without blocking when calling the firmware. The event's data includes a header with some meta data then the firmware output command data. The header includes: - The input 'wr_id' to let application recognizing the response. The input FD attribute is used to have the event data ready on. Downstream patches from this series will implement the file ops to let application read it. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 9 +++++++++ include/uapi/rdma/mlx5_user_ioctl_verbs.h | 5 +++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 6ceae29d77cd..8149d224030b 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -84,6 +84,14 @@ enum mlx5_ib_devx_obj_query_attrs { MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, }; +enum mlx5_ib_devx_obj_query_async_attrs { + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN, +}; + enum mlx5_ib_devx_query_eqn_attrs { MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, @@ -94,6 +102,7 @@ enum mlx5_ib_devx_obj_methods { MLX5_IB_METHOD_DEVX_OBJ_DESTROY, MLX5_IB_METHOD_DEVX_OBJ_MODIFY, MLX5_IB_METHOD_DEVX_OBJ_QUERY, + MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY, }; enum mlx5_ib_devx_umem_reg_attrs { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 4ef62c0e8452..4a701033b93f 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -51,5 +51,10 @@ enum mlx5_ib_uapi_flow_action_packet_reformat_type { MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, }; +struct mlx5_ib_uapi_devx_async_cmd_hdr { + __aligned_u64 wr_id; + __u8 out_data[]; +}; + #endif -- cgit v1.2.3 From 0b5cb3300ae59ed7e93b465dfa2384a6a4df8eb4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Jan 2019 10:25:20 -0800 Subject: RDMA/srp: Increase max_segment_size The default behavior of the SCSI core is to set the block layer request queue parameter max_segment_size to 64 KB. That means that elements of scatterlists are limited to 64 KB. Since RDMA adapters support larger sizes, increase max_segment_size for the SRP initiator. Notes: - The SCSI max_segment_size parameter was introduced in kernel v5.0. See also commit 50c2e9107f17 ("scsi: introduce a max_segment_size host_template parameters"). - Some other block drivers already set max_segment_size to UINT_MAX, e.g. nbd and rbd. Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 94b6e1dd4dab..71ea144ec823 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3715,6 +3715,19 @@ static inline unsigned int ib_sg_dma_len(struct ib_device *dev, return sg_dma_len(sg); } +/** + * ib_dma_max_seg_size - Return the size limit of a single DMA transfer + * @dev: The device to query + * + * The returned value represents a size in bytes. + */ +static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) +{ + struct device_dma_parameters *p = dev->dma_device->dma_parms; + + return p ? p->max_segment_size : UINT_MAX; +} + /** * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU * @dev: The device for which the DMA address was created -- cgit v1.2.3 From 459cc69fa4c17caf21de596693d8a07170820a58 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 30 Jan 2019 12:49:11 +0200 Subject: RDMA: Provide safe ib_alloc_device() function All callers to ib_alloc_device() provide a larger size than struct ib_device and rely on the fact that struct ib_device is embedded in their driver specific structure as the first member. Provide a safer variant of ib_alloc_device() that checks and enforces this approach to make sure the drivers are using it right. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 71ea144ec823..a1a1e710642c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2621,7 +2621,13 @@ struct ib_client { struct list_head list; }; -struct ib_device *ib_alloc_device(size_t size); +struct ib_device *_ib_alloc_device(size_t size); +#define ib_alloc_device(drv_struct, member) \ + container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + struct drv_struct, member))), \ + struct drv_struct, member) + void ib_dealloc_device(struct ib_device *device); void ib_get_device_fw_str(struct ib_device *device, char *str); -- cgit v1.2.3 From 6780c4fa9d6e091b2f206ac429a40e2e8d2e45f3 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 22 Jan 2019 10:08:22 +0200 Subject: RDMA: Add indication for in kernel API support to IB device Drivers that do not provide kernel verbs support should not be used by ib kernel clients at all. In case a device does not implement all mandatory verbs for kverbs usage mark it as a non kverbs provider and prevent its usage for all clients except for uverbs. The device is marked as a non kverbs provider using the 'kverbs_provider' flag which should only be set by the core code. The clients can choose whether kverbs are requested for its usage using the 'no_kverbs_req' flag which is currently set for uverbs only. This patch allows drivers to remove mandatory verbs stubs and simply set the callbacks to NULL. The IB device will be registered as a non-kverbs provider. Note that verbs that are required for the device registration process must be implemented. Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a1a1e710642c..4183a03b46b5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2565,6 +2565,8 @@ struct ib_device { __be64 node_guid; u32 local_dma_lkey; u16 is_switch:1; + /* Indicates kernel verbs support, should not be used in drivers */ + u16 kverbs_provider:1; u8 node_type; u8 phys_port_cnt; struct ib_device_attr attrs; @@ -2619,6 +2621,9 @@ struct ib_client { const struct sockaddr *addr, void *client_data); struct list_head list; + + /* kverbs are not required by the client */ + u8 no_kverbs_req:1; }; struct ib_device *_ib_alloc_device(size_t size); -- cgit v1.2.3 From 0ad699c0edc97a864177679dd67f2ccd73b07cb7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 30 Jan 2019 12:48:58 +0200 Subject: RDMA/core: Simplify restrack interface In the current implementation, we have one restrack root per-device and all users are simply providing it directly. Let's simplify the interface and have callers provide the ib_device and internally access the restrack_root. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/restrack.h | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 8f179be9d9a9..f756fc48eee5 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -49,6 +49,7 @@ enum rdma_restrack_type { }; #define RDMA_RESTRACK_HASH_BITS 8 +struct ib_device; struct rdma_restrack_entry; /** @@ -122,25 +123,9 @@ struct rdma_restrack_entry { bool user; }; -/** - * rdma_restrack_init() - initialize resource tracking - * @res: resource tracking root - */ -void rdma_restrack_init(struct rdma_restrack_root *res); - -/** - * rdma_restrack_clean() - clean resource tracking - * @res: resource tracking root - */ -void rdma_restrack_clean(struct rdma_restrack_root *res); - -/** - * rdma_restrack_count() - the current usage of specific object - * @res: resource entry - * @type: actual type of object to operate - * @ns: PID namespace - */ -int rdma_restrack_count(struct rdma_restrack_root *res, +void rdma_restrack_init(struct ib_device *dev); +void rdma_restrack_clean(struct ib_device *dev); +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns); -- cgit v1.2.3 From 02da37509705d3ba6a58fe4799a0caf6b4baecb0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 30 Jan 2019 12:49:02 +0200 Subject: RDMA/core: Use the ops infrastructure to keep all callbacks in one place As preparation to hide rdma_restrack_root, refactor the code to use the ops structure instead of a special callback which is hidden in rdma_restrack_root. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 5 +++++ include/rdma/restrack.h | 7 ------- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4183a03b46b5..5fc3be884444 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2510,6 +2510,11 @@ struct ib_device_ops { */ int (*init_port)(struct ib_device *device, u8 port_num, struct kobject *port_sysfs); + /** + * Allows rdma drivers to add their own restrack attributes. + */ + int (*fill_res_entry)(struct sk_buff *msg, + struct rdma_restrack_entry *entry); }; struct ib_device { diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index f756fc48eee5..cc66cc7a11d3 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -65,13 +65,6 @@ struct rdma_restrack_root { * @hash: global database for all resources per-device */ DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS); - /** - * @fill_res_entry: driver-specific fill function - * - * Allows rdma drivers to add their own restrack attributes. - */ - int (*fill_res_entry)(struct sk_buff *msg, - struct rdma_restrack_entry *entry); }; /** -- cgit v1.2.3 From ddf922c31fedd19c5b89a269c35e5c8b68c64327 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:21:01 -0800 Subject: IB/hfi1, IB/rdmavt: Allow for extending of QP's s_ack_queue The OPFN protocol uses the COMPARE_SWAP request to exchange data between the requester and the responder and therefore needs to be stored in the QP's s_ack_queue when the request is received on the responder side. However, because the user does not know anything about the OPFN protocol, this extra entry in the queue cannot be advertised to the user. This patch adds an extra entry in a QP's s_ack_queue. Reviewed-by: Mike Marciniszyn Signed-off-by: Mitko Haralanov Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index acb3bc96dfa7..168e40be183c 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -182,6 +182,7 @@ struct rvt_driver_params { u32 max_mad_size; u8 qos_shift; u8 max_rdma_atomic; + u8 extra_rdma_atomic; u8 reserved_operations; }; @@ -519,7 +520,14 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) */ static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) { - return rdi->dparms.max_rdma_atomic + 1; + return rdi->dparms.max_rdma_atomic + + rdi->dparms.extra_rdma_atomic + 1; +} + +static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi) +{ + return rdi->dparms.max_rdma_atomic + + rdi->dparms.extra_rdma_atomic; } /* -- cgit v1.2.3 From da82334219bc386ef7ea5b4b185a339a973dd513 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:41 +0200 Subject: IB/core: Allocate a bit for SRQ ODP support The ODP support matrix is per operation and per transport. The support for each transport (RC, UD, etc.) is described with a bit field. ODP for SRQ WQEs is considered a different kind of support from ODP for RQ WQs and therefore needs a different capability bit. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5fc3be884444..5eefdea62831 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -268,6 +268,7 @@ enum ib_odp_transport_cap_bits { IB_ODP_SUPPORT_WRITE = 1 << 2, IB_ODP_SUPPORT_READ = 1 << 3, IB_ODP_SUPPORT_ATOMIC = 1 << 4, + IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, }; struct ib_odp_caps { -- cgit v1.2.3 From 52a72e2a395fa3c5ab5df41058a8511e87215730 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:42 +0200 Subject: IB/uverbs: Expose XRC ODP device capabilities Expose XRC ODP capabilities as part of the extended device capabilities. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 1 + include/uapi/rdma/ib_user_verbs.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5eefdea62831..8219c07340a9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -277,6 +277,7 @@ struct ib_odp_caps { uint32_t rc_odp_caps; uint32_t uc_odp_caps; uint32_t ud_odp_caps; + uint32_t xrc_odp_caps; } per_transport_caps; }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 480d9a60b68e..0474c7400268 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -270,6 +270,8 @@ struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_tm_caps tm_caps; struct ib_uverbs_cq_moderation_caps cq_moderation_caps; __aligned_u64 max_dm_size; + __u32 xrc_odp_caps; + __u32 reserved; }; struct ib_uverbs_query_port { -- cgit v1.2.3 From a163afc88556e099271a7b423295bc5176fcecce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 31 Jan 2019 08:30:34 -0800 Subject: IB/core: Remove ib_sg_dma_address() and ib_sg_dma_len() Keeping single line wrapper functions is not useful. Hence remove the ib_sg_dma_address() and ib_sg_dma_len() functions. This patch does not change any functionality. Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8219c07340a9..f7e8709e48cd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3705,33 +3705,6 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, { dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } -/** - * ib_sg_dma_address - Return the DMA address from a scatter/gather entry - * @dev: The device for which the DMA addresses were created - * @sg: The scatter/gather entry - * - * Note: this function is obsolete. To do: change all occurrences of - * ib_sg_dma_address() into sg_dma_address(). - */ -static inline u64 ib_sg_dma_address(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg_dma_address(sg); -} - -/** - * ib_sg_dma_len - Return the DMA length from a scatter/gather entry - * @dev: The device for which the DMA addresses were created - * @sg: The scatter/gather entry - * - * Note: this function is obsolete. To do: change all occurrences of - * ib_sg_dma_len() into sg_dma_len(). - */ -static inline unsigned int ib_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg_dma_len(sg); -} /** * ib_dma_max_seg_size - Return the size limit of a single DMA transfer -- cgit v1.2.3 From 668aa15b5bf87f156ec805cb7348c785c56b82ab Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 29 Jan 2019 12:08:50 +0200 Subject: RDMA/rxe: Improve loopback marking Currently a packet is marked for loopback only if the source and destination addresses equals. This is not enough when multiple gids are present in rxe device's gid table and the traffic is from one gid to another. Fix it by marking the packet for loopback if the destination MAC address is equal to the source MAC address. Signed-off-by: Kamal Heib Reviewed-by: Yuval Shaia Tested-by: Yuval Shaia Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_user_rxe.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index 44ef6a3b7afc..aae2e696bb38 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -58,8 +58,7 @@ struct rxe_global_route { struct rxe_av { __u8 port_num; __u8 network_type; - __u16 reserved1; - __u32 reserved2; + __u8 dmac[6]; struct rxe_global_route grh; union { struct sockaddr_in _sockaddr_in; -- cgit v1.2.3 From f76903d574b26bc596951a5c5e757eb02c67abbd Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 29 Jan 2019 13:33:11 -0800 Subject: RDMA/IWPM: refactor the IWPM message attribute names In order to add new IWPM_NL attributes, the enums for the IWPM commands attributes are refactored such that a new attribute can be added without breaking ABI version 3. Instead of sharing nl attribute enums for both request and response messages, we create separate enums for each IWPM message request and reply. This allows us to extend any given IWPM message by adding new attributes for just that message. These new enums are created, though, in a way to avoid breaking ABI version 3. Signed-off-by: Steve Wise Reviewed-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_netlink.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 2e18b77a817f..42d53e182d5f 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -83,13 +83,20 @@ enum { IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0, IWPM_NLA_MANAGE_MAPPING_SEQ, IWPM_NLA_MANAGE_ADDR, - IWPM_NLA_MANAGE_MAPPED_LOC_ADDR, + IWPM_NLA_MANAGE_MAPPING_MAX +}; + +enum { + IWPM_NLA_RMANAGE_MAPPING_UNSPEC = 0, + IWPM_NLA_RMANAGE_MAPPING_SEQ, + IWPM_NLA_RMANAGE_ADDR, + IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR, + /* The following maintains bisectability of rdma-core */ + IWPM_NLA_MANAGE_MAPPED_LOC_ADDR = IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR, IWPM_NLA_RMANAGE_MAPPING_ERR, IWPM_NLA_RMANAGE_MAPPING_MAX }; -#define IWPM_NLA_MANAGE_MAPPING_MAX 3 -#define IWPM_NLA_QUERY_MAPPING_MAX 4 #define IWPM_NLA_MAPINFO_SEND_MAX 3 enum { @@ -97,6 +104,14 @@ enum { IWPM_NLA_QUERY_MAPPING_SEQ, IWPM_NLA_QUERY_LOCAL_ADDR, IWPM_NLA_QUERY_REMOTE_ADDR, + IWPM_NLA_QUERY_MAPPING_MAX, +}; + +enum { + IWPM_NLA_RQUERY_MAPPING_UNSPEC = 0, + IWPM_NLA_RQUERY_MAPPING_SEQ, + IWPM_NLA_RQUERY_LOCAL_ADDR, + IWPM_NLA_RQUERY_REMOTE_ADDR, IWPM_NLA_RQUERY_MAPPED_LOC_ADDR, IWPM_NLA_RQUERY_MAPPED_REM_ADDR, IWPM_NLA_RQUERY_MAPPING_ERR, -- cgit v1.2.3 From b0bad9ad514fc1dd8890f1749f5d2425a73270e3 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 29 Jan 2019 13:33:16 -0800 Subject: RDMA/IWPM: Support no port mapping requirements A soft iwarp driver that uses the host TCP stack via a kernel mode socket does not need port mapping. In fact, if the port map daemon, iwpmd, is running, then iwpmd must not try and create/bind a socket to the actual port for a soft iwarp connection, since the driver already has that socket bound. Yet if the soft iwarp driver wants to interoperate with hard iwarp devices that -are- using port mapping, then the soft iwarp driver's mappings still need to be maintained and advertised by the iwpm protocol. This patch enhances the rdma driver<->iwcm interface to allow an iwarp driver to specify that it does not want port mapping. The iwpm kernel<->iwpmd interface is also enhanced to pass up this information on map requests. Care is taken to interoperate with the current iwpmd version (ABI version 3) and only use the new NL attributes if iwpmd supports ABI version 4. The ABI version define has also been created in rdma_netlink.h so both kernel and user code can share it. The iwcm and iwpmd negotiate the ABI version to use with a new HELLO netlink message. Signed-off-by: Steve Wise Reviewed-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- include/rdma/iw_cm.h | 13 +++++++++++++ include/rdma/iw_portmap.h | 15 ++++++++++++++- include/uapi/rdma/rdma_netlink.h | 24 ++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 5cd7701db148..48512abd3162 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -105,6 +105,18 @@ struct iw_cm_conn_param { u32 qpn; }; +enum iw_flags { + + /* + * This flag allows the iwcm and iwpmd to still advertise + * mappings but the real and mapped port numbers are the + * same. Further, iwpmd will not bind any user socket to + * reserve the port. This is required for soft iwarp + * to play in the port mapped iwarp space. + */ + IW_F_NO_PORT_MAP = (1 << 0), +}; + struct iw_cm_verbs { void (*add_ref)(struct ib_qp *qp); @@ -127,6 +139,7 @@ struct iw_cm_verbs { int (*destroy_listen)(struct iw_cm_id *cm_id); char ifname[IFNAMSIZ]; + enum iw_flags driver_flags; }; /** diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h index fda31673a562..84fac196ef80 100644 --- a/include/rdma/iw_portmap.h +++ b/include/rdma/iw_portmap.h @@ -58,6 +58,7 @@ struct iwpm_sa_data { struct sockaddr_storage mapped_loc_addr; struct sockaddr_storage rem_addr; struct sockaddr_storage mapped_rem_addr; + u32 flags; }; /** @@ -205,9 +206,11 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, * @local_addr: Local ip/tcp address * @mapped_addr: Mapped local ip/tcp address * @nl_client: The index of the netlink client + * @map_flags: IWPM mapping flags */ int iwpm_create_mapinfo(struct sockaddr_storage *local_addr, - struct sockaddr_storage *mapped_addr, u8 nl_client); + struct sockaddr_storage *mapped_addr, u8 nl_client, + u32 map_flags); /** * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address @@ -221,4 +224,14 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_addr, int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr, struct sockaddr_storage *mapped_addr); +/** + * iwpm_hello_cb - Process a hello message from iwpmd + * + * @skb: + * @cb: Contains the received message (payload and netlink header) + * + * Using the received port mapper pid, send the kernel's abi_version + * after adjusting it to support the iwpmd version. + */ +int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb); #endif /* _IW_PORTMAP_H */ diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 42d53e182d5f..0f5263767fb4 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -35,6 +35,19 @@ enum { RDMA_NL_RDMA_CM_NUM_ATTR, }; +/* The minimum version that the iwpm kernel supports */ +#define IWPM_UABI_VERSION_MIN 3 + +/* The latest version that the iwpm kernel supports */ +#define IWPM_UABI_VERSION 4 + +/* iwarp port mapper message flags */ +enum { + + /* Do not map the port for this IWPM request */ + IWPM_FLAGS_NO_PORT_MAP = (1 << 0), +}; + /* iwarp port mapper op-codes */ enum { RDMA_NL_IWPM_REG_PID = 0, @@ -45,6 +58,7 @@ enum { RDMA_NL_IWPM_HANDLE_ERR, RDMA_NL_IWPM_MAPINFO, RDMA_NL_IWPM_MAPINFO_NUM, + RDMA_NL_IWPM_HELLO, RDMA_NL_IWPM_NUM_OPS }; @@ -83,6 +97,7 @@ enum { IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0, IWPM_NLA_MANAGE_MAPPING_SEQ, IWPM_NLA_MANAGE_ADDR, + IWPM_NLA_MANAGE_FLAGS, IWPM_NLA_MANAGE_MAPPING_MAX }; @@ -98,12 +113,14 @@ enum { }; #define IWPM_NLA_MAPINFO_SEND_MAX 3 +#define IWPM_NLA_REMOVE_MAPPING_MAX 3 enum { IWPM_NLA_QUERY_MAPPING_UNSPEC = 0, IWPM_NLA_QUERY_MAPPING_SEQ, IWPM_NLA_QUERY_LOCAL_ADDR, IWPM_NLA_QUERY_REMOTE_ADDR, + IWPM_NLA_QUERY_FLAGS, IWPM_NLA_QUERY_MAPPING_MAX, }; @@ -129,6 +146,7 @@ enum { IWPM_NLA_MAPINFO_UNSPEC = 0, IWPM_NLA_MAPINFO_LOCAL_ADDR, IWPM_NLA_MAPINFO_MAPPED_ADDR, + IWPM_NLA_MAPINFO_FLAGS, IWPM_NLA_MAPINFO_MAX }; @@ -147,6 +165,12 @@ enum { IWPM_NLA_ERR_MAX }; +enum { + IWPM_NLA_HELLO_UNSPEC = 0, + IWPM_NLA_HELLO_ABI_VERSION, + IWPM_NLA_HELLO_MAX +}; + /* * Local service operations: * RESOLVE - The client requests the local service to resolve a path. -- cgit v1.2.3 From a78e8723a50530d15faa25cc0b6f009bcd251c20 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 16 Jan 2019 09:55:41 +0200 Subject: RDMA/cma: Remove CM_ID statistics provided by rdma-cm module Netlink statistics exported by rdma-cm never had any working user space component published to the mailing list or to any open source project. Canvassing various proprietary users, and the original requester, we find that there are no real users of this interface. This patch simply removes all occurrences of RDMA CM netlink in favour of modern nldev implementation, which provides the same information and accompanied by widely used user space component. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_netlink.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 0f5263767fb4..3a9e681e4257 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -5,8 +5,7 @@ #include enum { - RDMA_NL_RDMA_CM = 1, - RDMA_NL_IWCM, + RDMA_NL_IWCM = 2, RDMA_NL_RSVD, RDMA_NL_LS, /* RDMA Local Services */ RDMA_NL_NLDEV, /* RDMA device interface */ @@ -14,8 +13,7 @@ enum { }; enum { - RDMA_NL_GROUP_CM = 1, - RDMA_NL_GROUP_IWPM, + RDMA_NL_GROUP_IWPM = 2, RDMA_NL_GROUP_LS, RDMA_NL_NUM_GROUPS }; @@ -24,17 +22,6 @@ enum { #define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1)) #define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op) -enum { - RDMA_NL_RDMA_CM_ID_STATS = 0, - RDMA_NL_RDMA_CM_NUM_OPS -}; - -enum { - RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1, - RDMA_NL_RDMA_CM_ATTR_DST_ADDR, - RDMA_NL_RDMA_CM_NUM_ATTR, -}; - /* The minimum version that the iwpm kernel supports */ #define IWPM_UABI_VERSION_MIN 3 -- cgit v1.2.3 From a2bfd708b17adb6e597e70d4eca824667f2d4e3c Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 5 Feb 2019 11:33:22 -0800 Subject: RDMA/iwpm: move kdoc comments to functions Move the iwpm kdoc comments from the prototype declarations to above the function bodies. There are no functional changes in this patch. Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- include/rdma/iw_portmap.h | 149 ---------------------------------------------- 1 file changed, 149 deletions(-) (limited to 'include') diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h index 84fac196ef80..b9fee7feeeb5 100644 --- a/include/rdma/iw_portmap.h +++ b/include/rdma/iw_portmap.h @@ -61,177 +61,28 @@ struct iwpm_sa_data { u32 flags; }; -/** - * iwpm_init - Allocate resources for the iwarp port mapper - * - * Should be called when network interface goes up. - */ int iwpm_init(u8); - -/** - * iwpm_exit - Deallocate resources for the iwarp port mapper - * - * Should be called when network interface goes down. - */ int iwpm_exit(u8); - -/** - * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid - * - * Returns true if the pid is greater than zero, otherwise returns false - */ int iwpm_valid_pid(void); - -/** - * iwpm_register_pid - Send a netlink query to userspace - * to get the iwarp port mapper pid - * @pm_msg: Contains driver info to send to the userspace port mapper - * @nl_client: The index of the netlink client - */ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client); - -/** - * iwpm_add_mapping - Send a netlink add mapping request to - * the userspace port mapper - * @pm_msg: Contains the local ip/tcp address info to send - * @nl_client: The index of the netlink client - * - * If the request is successful, the pm_msg stores - * the port mapper response (mapped address info) - */ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client); - -/** - * iwpm_add_and_query_mapping - Send a netlink add and query mapping request - * to the userspace port mapper - * @pm_msg: Contains the local and remote ip/tcp address info to send - * @nl_client: The index of the netlink client - * - * If the request is successful, the pm_msg stores the - * port mapper response (mapped local and remote address info) - */ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client); - -/** - * iwpm_remove_mapping - Send a netlink remove mapping request - * to the userspace port mapper - * - * @local_addr: Local ip/tcp address to remove - * @nl_client: The index of the netlink client - */ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client); - -/** - * iwpm_register_pid_cb - Process the port mapper response to - * iwpm_register_pid query - * @skb: - * @cb: Contains the received message (payload and netlink header) - * - * If successful, the function receives the userspace port mapper pid - * which is used in future communication with the port mapper - */ int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *); - -/** - * iwpm_add_mapping_cb - Process the port mapper response to - * iwpm_add_mapping request - * @skb: - * @cb: Contains the received message (payload and netlink header) - */ int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *); - -/** - * iwpm_add_and_query_mapping_cb - Process the port mapper response to - * iwpm_add_and_query_mapping request - * @skb: - * @cb: Contains the received message (payload and netlink header) - */ int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *); - -/** - * iwpm_remote_info_cb - Process remote connecting peer address info, which - * the port mapper has received from the connecting peer - * - * @cb: Contains the received message (payload and netlink header) - * - * Stores the IPv4/IPv6 address info in a hash table - */ int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *); - -/** - * iwpm_mapping_error_cb - Process port mapper notification for error - * - * @skb: - * @cb: Contains the received message (payload and netlink header) - */ int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *); - -/** - * iwpm_mapping_info_cb - Process a notification that the userspace - * port mapper daemon is started - * @skb: - * @cb: Contains the received message (payload and netlink header) - * - * Using the received port mapper pid, send all the local mapping - * info records to the userspace port mapper - */ int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *); - -/** - * iwpm_ack_mapping_info_cb - Process the port mapper ack for - * the provided local mapping info records - * @skb: - * @cb: Contains the received message (payload and netlink header) - */ int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *); - -/** - * iwpm_get_remote_info - Get the remote connecting peer address info - * - * @mapped_loc_addr: Mapped local address of the listening peer - * @mapped_rem_addr: Mapped remote address of the connecting peer - * @remote_addr: To store the remote address of the connecting peer - * @nl_client: The index of the netlink client - * - * The remote address info is retrieved and provided to the client in - * the remote_addr. After that it is removed from the hash table - */ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, struct sockaddr_storage *mapped_rem_addr, struct sockaddr_storage *remote_addr, u8 nl_client); - -/** - * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address - * info in a hash table - * @local_addr: Local ip/tcp address - * @mapped_addr: Mapped local ip/tcp address - * @nl_client: The index of the netlink client - * @map_flags: IWPM mapping flags - */ int iwpm_create_mapinfo(struct sockaddr_storage *local_addr, struct sockaddr_storage *mapped_addr, u8 nl_client, u32 map_flags); - -/** - * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address - * info from the hash table - * @local_addr: Local ip/tcp address - * @mapped_addr: Mapped local ip/tcp address - * - * Returns err code if mapping info is not found in the hash table, - * otherwise returns 0 - */ int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr, struct sockaddr_storage *mapped_addr); -/** - * iwpm_hello_cb - Process a hello message from iwpmd - * - * @skb: - * @cb: Contains the received message (payload and netlink header) - * - * Using the received port mapper pid, send the kernel's abi_version - * after adjusting it to support the iwpmd version. - */ int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb); #endif /* _IW_PORTMAP_H */ -- cgit v1.2.3 From 385156c5f2a61834666f079ee66338f177c65c28 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:29:44 -0800 Subject: IB/hfi: Move RC functions into a header file This patch moves some RC helper functions into a header file so that they can be called from both RC and TID RDMA functions. In addition, a common function for rewinding a request is created in rdmavt so that it can be shared between qib and hfi1 driver. Reviewed-by: Mike Marciniszyn Signed-off-by: Mitko Haralanov Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index cbafb1878669..56a9221378d9 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -628,6 +628,16 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp); */ void rvt_get_credit(struct rvt_qp *qp, u32 aeth); +/** + * rvt_restart_sge - rewind the sge state for a wqe + * @ss: the sge state pointer + * @wqe: the wqe to rewind + * @len: the data length from the start of the wqe in bytes + * + * Returns the remaining data length. + */ +u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len); + /** * @qp - the qp pair * @len - the length -- cgit v1.2.3 From 838b6fd2d9ca29998869e4d1ecf4566efe807666 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:30:07 -0800 Subject: IB/hfi1: TID RDMA RcvArray programming and TID allocation TID entries are used by hfi1 hardware to receive data payload from incoming packets directly into a user buffer and thus avoid data copying by software. This patch implements the functions for TID allocation, freeing, and programming TID RcvArray entries in hardware for kernel clients. TID entries are managed via lists of TID groups similar to PSM. Furthermore, to track TID resource allocation for each request, software flows are also allocated and freed as needed. Since software flows consume large amount of memory for tracking TID allocation and freeing, it is generally desirable to allocate them dynamically in the send queue and only for TID RDMA requests, but pre-allocate them for receive queue because the send queue could have thousands of entries while the receive queue has only a limited number of entries. Signed-off-by: Mitko Haralanov Signed-off-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 56a9221378d9..9095a0b71250 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -174,6 +174,7 @@ struct rvt_swqe { u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ u32 length; /* total length of data in sg_list */ + void *priv; /* driver dependent field */ struct rvt_sge sg_list[0]; }; @@ -235,6 +236,7 @@ struct rvt_ack_entry { u32 lpsn; u8 opcode; u8 sent; + void *priv; }; #define RC_QP_SCALING_INTERVAL 5 -- cgit v1.2.3 From 742a3826cf82395e304df99f6494d04b0dd03a84 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:30:40 -0800 Subject: IB/hfi1: Add functions to build TID RDMA READ request This patch adds the helper functions to build the TID RDMA READ request on the requester side. The key is to allocate TID resources (TID flow and TID entries) and send the resource information to the responder side along with the read request. Since the TID resources are limited, each TID RDMA READ request has to be split into segments with a default segment size of 256K. A software flow is allocated to track the data transaction for each segment. The work request opcode, packet opcode, and packet formats for TID RDMA READ protocol are also defined in this patch. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/ib_hdrs.h | 9 +++++++- include/rdma/tid_rdma_defs.h | 52 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 include/rdma/tid_rdma_defs.h (limited to 'include') diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 6e35416170a3..58a0a0f99e7f 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -100,6 +100,8 @@ struct ib_atomic_eth { __be64 compare_data; /* potentially unaligned */ } __packed; +#include + union ib_ehdrs { struct { __be32 deth[2]; @@ -117,6 +119,11 @@ union ib_ehdrs { __be32 aeth; __be32 ieth; struct ib_atomic_eth atomic_eth; + /* TID RDMA headers */ + union { + struct tid_rdma_read_req r_req; + struct tid_rdma_read_resp r_rsp; + } tid_rdma; } __packed; struct ib_other_headers { diff --git a/include/rdma/tid_rdma_defs.h b/include/rdma/tid_rdma_defs.h new file mode 100644 index 000000000000..1c431ea32b52 --- /dev/null +++ b/include/rdma/tid_rdma_defs.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2018 Intel Corporation. + * + */ + +#ifndef TID_RDMA_DEFS_H +#define TID_RDMA_DEFS_H + +#include + +struct tid_rdma_read_req { + __le32 kdeth0; + __le32 kdeth1; + struct ib_reth reth; + __be32 tid_flow_psn; + __be32 tid_flow_qp; + __be32 verbs_qp; +}; + +struct tid_rdma_read_resp { + __le32 kdeth0; + __le32 kdeth1; + __be32 aeth; + __be32 reserved[4]; + __be32 verbs_psn; + __be32 verbs_qp; +}; + +/* + * TID RDMA Opcodes + */ +#define IB_OPCODE_TID_RDMA 0xe0 +enum { + IB_OPCODE_READ_REQ = 0x4, + IB_OPCODE_READ_RESP = 0x5, + + IB_OPCODE(TID_RDMA, READ_REQ), + IB_OPCODE(TID_RDMA, READ_RESP), +}; + +#define TID_OP(x) IB_OPCODE_TID_RDMA_##x + +/* + * Define TID RDMA specific WR opcodes. The ib_wr_opcode + * enum already provides some reserved values for use by + * low level drivers. Two of those are used but renamed + * to be more descriptive. + */ +#define IB_WR_TID_RDMA_READ IB_WR_RESERVED2 + +#endif /* TID_RDMA_DEFS_H */ -- cgit v1.2.3 From 039cd3daf19b9acbf080054d765cbceac842b6a0 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 19:31:57 -0800 Subject: IB/hfi1: Increment the retry timeout value for TID RDMA READ request The RC retry timeout value is based on the estimated time for the response packet to come back. However, for TID RDMA READ request, due to the use of header suppression, the driver is normally not notified for each incoming response packet until the last TID RDMA READ response packet. Consequently, the retry timeout value should be extended to cover the transaction time for the entire length of a segment (default 256K) instead of that for a single packet. This patch addresses the issue by introducing new retry timer functions to account for multiple packets and wrapper functions for backward compatibility. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 12 +++++++++--- include/rdma/rdmavt_qp.h | 6 +++++- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 168e40be183c..87d66c9630d7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -574,9 +574,10 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, /** * rvt_mod_retry_timer - mod a retry timer * @qp - the QP + * @shift - timeout shift to wait for multiple packets * Modify a potentially already running retry timer */ -static inline void rvt_mod_retry_timer(struct rvt_qp *qp) +static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) { struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); @@ -584,8 +585,13 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp) lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ - mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + - rdi->busy_jiffies); + mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies + + (qp->timeout_jiffies << shift)); +} + +static inline void rvt_mod_retry_timer(struct rvt_qp *qp) +{ + return rvt_mod_retry_timer_ext(qp, 0); } struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 9095a0b71250..d8d88d023092 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -688,7 +688,11 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t); void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth); void rvt_del_timers_sync(struct rvt_qp *qp); void rvt_stop_rc_timers(struct rvt_qp *qp); -void rvt_add_retry_timer(struct rvt_qp *qp); +void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift); +static inline void rvt_add_retry_timer(struct rvt_qp *qp) +{ + rvt_add_retry_timer_ext(qp, 0); +} void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, void *data, u32 length, -- cgit v1.2.3 From c098bbb00cd1986cbb58ed1712643f80ed00fcc3 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 23 Jan 2019 21:48:28 -0800 Subject: IB/hfi1: Build TID RDMA WRITE request This patch adds the functions to build TID RDMA WRITE request. The work request opcode, packet opcode, and packet formats for TID RDMA WRITE protocol are also defined in this patch. Signed-off-by: Mitko Haralanov Signed-off-by: Mike Marciniszyn Signed-off-by: Ashutosh Dixit Signed-off-by: Kaike Wan