summaryrefslogtreecommitdiffstats
path: root/include/rdma/ib_verbs.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma/ib_verbs.h')
-rw-r--r--include/rdma/ib_verbs.h257
1 files changed, 163 insertions, 94 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6c003995347a..e950c2a68f06 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -94,6 +94,7 @@ enum ib_gid_type {
struct ib_gid_attr {
struct net_device *ndev;
struct ib_device *device;
+ union ib_gid gid;
enum ib_gid_type gid_type;
u16 index;
u8 port_num;
@@ -148,13 +149,13 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
return IB_GID_TYPE_IB;
}
-static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
- union ib_gid *gid)
+static inline enum rdma_network_type
+rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
{
- if (gid_type == IB_GID_TYPE_IB)
+ if (attr->gid_type == IB_GID_TYPE_IB)
return RDMA_NETWORK_IB;
- if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+ if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
return RDMA_NETWORK_IPV4;
else
return RDMA_NETWORK_IPV6;
@@ -344,7 +345,8 @@ struct ib_device_attr {
int max_qp;
int max_qp_wr;
u64 device_cap_flags;
- int max_sge;
+ int max_send_sge;
+ int max_recv_sge;
int max_sge_rd;
int max_cq;
int max_cqe;
@@ -430,33 +432,6 @@ enum ib_port_state {
IB_PORT_ACTIVE_DEFER = 5
};
-enum ib_port_cap_flags {
- IB_PORT_SM = 1 << 1,
- IB_PORT_NOTICE_SUP = 1 << 2,
- IB_PORT_TRAP_SUP = 1 << 3,
- IB_PORT_OPT_IPD_SUP = 1 << 4,
- IB_PORT_AUTO_MIGR_SUP = 1 << 5,
- IB_PORT_SL_MAP_SUP = 1 << 6,
- IB_PORT_MKEY_NVRAM = 1 << 7,
- IB_PORT_PKEY_NVRAM = 1 << 8,
- IB_PORT_LED_INFO_SUP = 1 << 9,
- IB_PORT_SM_DISABLED = 1 << 10,
- IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
- IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
- IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
- IB_PORT_CM_SUP = 1 << 16,
- IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
- IB_PORT_REINIT_SUP = 1 << 18,
- IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
- IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
- IB_PORT_DR_NOTICE_SUP = 1 << 21,
- IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
- IB_PORT_BOOT_MGMT_SUP = 1 << 23,
- IB_PORT_LINK_LATENCY_SUP = 1 << 24,
- IB_PORT_CLIENT_REG_SUP = 1 << 25,
- IB_PORT_IP_BASED_GIDS = 1 << 26,
-};
-
enum ib_port_width {
IB_WIDTH_1X = 1,
IB_WIDTH_4X = 2,
@@ -554,6 +529,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_CAP_AF_IB 0x00001000
#define RDMA_CORE_CAP_ETH_AH 0x00002000
#define RDMA_CORE_CAP_OPA_AH 0x00004000
+#define RDMA_CORE_CAP_IB_GRH_REQUIRED 0x00008000
/* Protocol 0xFFF00000 */
#define RDMA_CORE_CAP_PROT_IB 0x00100000
@@ -563,6 +539,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
+#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
+ | RDMA_CORE_CAP_PROT_ROCE \
+ | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
+
#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
| RDMA_CORE_CAP_IB_MAD \
| RDMA_CORE_CAP_IB_SMI \
@@ -595,6 +575,8 @@ struct ib_port_attr {
enum ib_mtu max_mtu;
enum ib_mtu active_mtu;
int gid_tbl_len;
+ unsigned int ip_gids:1;
+ /* This is the value from PortInfo CapabilityMask, defined by IBA */
u32 port_cap_flags;
u32 max_msg_sz;
u32 bad_pkey_cntr;
@@ -610,7 +592,6 @@ struct ib_port_attr {
u8 active_width;
u8 active_speed;
u8 phys_state;
- bool grh_required;
};
enum ib_device_modify_flags {
@@ -689,6 +670,7 @@ struct ib_event_handler {
} while (0)
struct ib_global_route {
+ const struct ib_gid_attr *sgid_attr;
union ib_gid dgid;
u32 flow_label;
u8 sgid_index;
@@ -1370,7 +1352,7 @@ struct ib_rdma_wr {
u32 rkey;
};
-static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr)
+static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr)
{
return container_of(wr, struct ib_rdma_wr, wr);
}
@@ -1385,7 +1367,7 @@ struct ib_atomic_wr {
u32 rkey;
};
-static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr)
+static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr)
{
return container_of(wr, struct ib_atomic_wr, wr);
}
@@ -1402,7 +1384,7 @@ struct ib_ud_wr {
u8 port_num; /* valid for DR SMPs on switch only */
};
-static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr)
+static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
{
return container_of(wr, struct ib_ud_wr, wr);
}
@@ -1414,7 +1396,7 @@ struct ib_reg_wr {
int access;
};
-static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
+static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
{
return container_of(wr, struct ib_reg_wr, wr);
}
@@ -1427,7 +1409,8 @@ struct ib_sig_handover_wr {
struct ib_sge *prot;
};
-static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
+static inline const struct ib_sig_handover_wr *
+sig_handover_wr(const struct ib_send_wr *wr)
{
return container_of(wr, struct ib_sig_handover_wr, wr);
}
@@ -1443,14 +1426,16 @@ struct ib_recv_wr {
};
enum ib_access_flags {
- IB_ACCESS_LOCAL_WRITE = 1,
- IB_ACCESS_REMOTE_WRITE = (1<<1),
- IB_ACCESS_REMOTE_READ = (1<<2),
- IB_ACCESS_REMOTE_ATOMIC = (1<<3),
- IB_ACCESS_MW_BIND = (1<<4),
- IB_ZERO_BASED = (1<<5),
- IB_ACCESS_ON_DEMAND = (1<<6),
- IB_ACCESS_HUGETLB = (1<<7),
+ IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
+ IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
+ IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
+ IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
+ IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
+ IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
+ IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
+ IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+
+ IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
};
/*
@@ -1473,14 +1458,17 @@ struct ib_fmr_attr {
struct ib_umem;
enum rdma_remove_reason {
- /* Userspace requested uobject deletion. Call could fail */
+ /*
+ * Userspace requested uobject deletion or initial try
+ * to remove uobject via cleanup. Call could fail
+ */
RDMA_REMOVE_DESTROY,
/* Context deletion. This call should delete the actual object itself */
RDMA_REMOVE_CLOSE,
/* Driver is being hot-unplugged. This call should delete the actual object itself */
RDMA_REMOVE_DRIVER_REMOVE,
- /* Context is being cleaned-up, but commit was just completed */
- RDMA_REMOVE_DURING_CLEANUP,
+ /* uobj is being cleaned-up before being committed */
+ RDMA_REMOVE_ABORT,
};
struct ib_rdmacg_object {
@@ -1492,14 +1480,14 @@ struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
+ /*
+ * 'closing' can be read by the driver only during a destroy callback,
+ * it is set when we are closing the file descriptor and indicates
+ * that mm_sem may be locked.
+ */
int closing;
- /* locking the uobjects_list */
- struct mutex uobjects_lock;
- struct list_head uobjects;
- /* protects cleanup process from other actions */
- struct rw_semaphore cleanup_rwsem;
- enum rdma_remove_reason cleanup_reason;
+ bool cleanup_retryable;
struct pid *tgid;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -1524,6 +1512,9 @@ struct ib_ucontext {
struct ib_uobject {
u64 user_handle; /* handle given to us by userspace */
+ /* ufile & ucontext owning this object */
+ struct ib_uverbs_file *ufile;
+ /* FIXME, save memory: ufile->context == context */
struct ib_ucontext *context; /* associated user context */
void *object; /* containing object */
struct list_head list; /* link to context's list */
@@ -1533,13 +1524,7 @@ struct ib_uobject {
atomic_t usecnt; /* protects exclusive access */
struct rcu_head rcu; /* kfree_rcu() overhead */
- const struct uverbs_obj_type *type;
-};
-
-struct ib_uobject_file {
- struct ib_uobject uobj;
- /* ufile contains the lock between context release and file close */
- struct ib_uverbs_file *ufile;
+ const struct uverbs_api_object *uapi_object;
};
struct ib_udata {
@@ -1578,6 +1563,7 @@ struct ib_ah {
struct ib_device *device;
struct ib_pd *pd;
struct ib_uobject *uobject;
+ const struct ib_gid_attr *sgid_attr;
enum rdma_ah_attr_type type;
};
@@ -1776,6 +1762,9 @@ struct ib_qp {
struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
+ /* sgid_attrs associated with the AV's */
+ const struct ib_gid_attr *av_sgid_attr;
+ const struct ib_gid_attr *alt_path_sgid_attr;
u32 qp_num;
u32 max_write_sge;
u32 max_read_sge;
@@ -2098,6 +2087,7 @@ struct ib_flow_attr {
struct ib_flow {
struct ib_qp *qp;
+ struct ib_device *device;
struct ib_uobject *uobject;
};
@@ -2213,7 +2203,11 @@ struct rdma_netdev {
struct ib_device *hca;
u8 port_num;
- /* cleanup function must be specified */
+ /*
+ * cleanup function must be specified.
+ * FIXME: This is only used for OPA_VNIC and that usage should be
+ * removed too.
+ */
void (*free_rdma_netdev)(struct net_device *netdev);
/* control functions */
@@ -2242,11 +2236,6 @@ struct ib_counters {
atomic_t usecnt;
};
-enum ib_read_counters_flags {
- /* prefer read values from driver cache */
- IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0,
-};
-
struct ib_counters_read_attr {
u64 *counters_buff;
u32 ncounters;
@@ -2341,8 +2330,7 @@ struct ib_device {
* concurrently for different ports. This function is only called when
* roce_gid_table is used.
*/
- int (*add_gid)(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
+ int (*add_gid)(const struct ib_gid_attr *attr,
void **context);
/* When calling del_gid, the HW vendor's driver should delete the
* gid of device @device at gid index gid_index of port port_num
@@ -2389,8 +2377,8 @@ struct ib_device {
struct ib_srq_attr *srq_attr);
int (*destroy_srq)(struct ib_srq *srq);
int (*post_srq_recv)(struct ib_srq *srq,
- struct ib_recv_wr *recv_wr,
- struct ib_recv_wr **bad_recv_wr);
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
struct ib_qp * (*create_qp)(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata);
@@ -2404,11 +2392,11 @@ struct ib_device {
struct ib_qp_init_attr *qp_init_attr);
int (*destroy_qp)(struct ib_qp *qp);
int (*post_send)(struct ib_qp *qp,
- struct ib_send_wr *send_wr,
- struct ib_send_wr **bad_send_wr);
+ const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
int (*post_recv)(struct ib_qp *qp,
- struct ib_recv_wr *recv_wr,
- struct ib_recv_wr **bad_recv_wr);
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
struct ib_cq * (*create_cq)(struct ib_device *device,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -2592,7 +2580,7 @@ struct ib_device {
const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
int comp_vector);
- struct uverbs_root_spec *specs_root;
+ const struct uverbs_object_tree_def *const *driver_specs;
enum rdma_driver_id driver_id;
};
@@ -2679,6 +2667,46 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
}
/**
+ * ib_is_destroy_retryable - Check whether the uobject destruction
+ * is retryable.
+ * @ret: The initial destruction return code
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * This function is a helper function that IB layer and low-level drivers
+ * can use to consider whether the destruction of the given uobject is
+ * retry-able.
+ * It checks the original return code, if it wasn't success the destruction
+ * is retryable according to the ucontext state (i.e. cleanup_retryable) and
+ * the remove reason. (i.e. why).
+ * Must be called with the object locked for destroy.
+ */
+static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
+ struct ib_uobject *uobj)
+{
+ return ret && (why == RDMA_REMOVE_DESTROY ||
+ uobj->context->cleanup_retryable);
+}
+
+/**
+ * ib_destroy_usecnt - Called during destruction to check the usecnt
+ * @usecnt: The usecnt atomic
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * Non-zero usecnts will block destruction unless destruction was triggered by
+ * a ucontext cleanup.
+ */
+static inline int ib_destroy_usecnt(atomic_t *usecnt,
+ enum rdma_remove_reason why,
+ struct ib_uobject *uobj)
+{
+ if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
+ return -EBUSY;
+ return 0;
+}
+
+/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
@@ -2755,6 +2783,13 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
port <= rdma_end_port(device));
}
+static inline bool rdma_is_grh_required(const struct ib_device *device,
+ u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags &
+ RDMA_CORE_PORT_IB_GRH_REQUIRED;
+}
+
static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
{
return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
@@ -3046,10 +3081,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
return rdma_protocol_iwarp(dev, port_num);
}
-int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid,
- struct ib_gid_attr *attr);
-
int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state);
int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
@@ -3148,6 +3179,13 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
* ignored unless the work completion indicates that the GRH is valid.
* @ah_attr: Returned attributes that can be used when creating an address
* handle for replying to the message.
+ * When ib_init_ah_attr_from_wc() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_wc().
+ *
*/
int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
@@ -3247,10 +3285,12 @@ int ib_destroy_srq(struct ib_srq *srq);
* the work request that failed to be posted on the QP.
*/
static inline int ib_post_srq_recv(struct ib_srq *srq,
- struct ib_recv_wr *recv_wr,
- struct ib_recv_wr **bad_recv_wr)
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr)
{
- return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
+ const struct ib_recv_wr *dummy;
+
+ return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
}
/**
@@ -3348,10 +3388,12 @@ int ib_close_qp(struct ib_qp *qp);
* earlier work requests in the list.
*/
static inline int ib_post_send(struct ib_qp *qp,
- struct ib_send_wr *send_wr,
- struct ib_send_wr **bad_send_wr)
+ const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr)
{
- return qp->device->post_send(qp, send_wr, bad_send_wr);
+ const struct ib_send_wr *dummy;
+
+ return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
}
/**
@@ -3363,10 +3405,12 @@ static inline int ib_post_send(struct ib_qp *qp,
* the work request that failed to be posted on the QP.
*/
static inline int ib_post_recv(struct ib_qp *qp,
- struct ib_recv_wr *recv_wr,
- struct ib_recv_wr **bad_recv_wr)
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr)
{
- return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
+ const struct ib_recv_wr *dummy;
+
+ return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
@@ -3801,10 +3845,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
*/
int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr, int domain);
-int ib_destroy_flow(struct ib_flow *flow_id);
-
static inline int ib_check_mr_access(int flags)
{
/*
@@ -4033,8 +4073,19 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
grh->sgid_index = sgid_index;
grh->hop_limit = hop_limit;
grh->traffic_class = traffic_class;
+ grh->sgid_attr = NULL;
}
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+ u32 flow_label, u8 hop_limit, u8 traffic_class,
+ const struct ib_gid_attr *sgid_attr);
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+ const struct rdma_ah_attr *src);
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+ const struct rdma_ah_attr *new);
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
+
/**
* rdma_ah_find_type - Return address handle type.
*
@@ -4102,6 +4153,20 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
}
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+ struct ib_qp *qp, struct ib_device *device)
+{
+ uobj->object = ibflow;
+ ibflow->uobject = uobj;
+
+ if (qp) {
+ atomic_inc(&qp->usecnt);
+ ibflow->qp = qp;
+ }
+
+ ibflow->device = device;
+}
+
/**
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
@@ -4110,4 +4175,8 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
+struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+
+int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs);
#endif /* IB_VERBS_H */