From ecdc428e4c5d821a07baf4f8b1718faf67b9026f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 12 Nov 2009 11:14:13 -0800 Subject: IB/mlx4: Remove unneeded code There is no such flag DE - the field is reserved and should be zero. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 219b10397b4d..518d561970aa 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -897,7 +897,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(ibqp->qp_type) << 16)); - context->flags |= cpu_to_be32(1 << 8); /* DE? */ if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); -- cgit v1.2.3 From 417608c20a4c8397bc5307d949ec01ea0a0dd8e5 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 12 Nov 2009 11:19:44 -0800 Subject: IB/mlx4: Remove limitation on LSO header size Current code has a limitation: an LSO header is not allowed to cross a 64 byte boundary. This patch removes this limitation by setting the WQE RR for large headers thus allowing LSO headers of any size. The extra buffer reserved for MLX4_IB_QP_LSO QPs has been doubled, from 64 to 128 bytes, assuming this is reasonable upper limit for header length. Also, this patch will cause IB_DEVICE_UD_TSO to be set only for HCA FW versions that set MLX4_DEV_CAP_FLAG_BLH; e.g. FW version 2.6.000 and higher. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx4/qp.c | 24 ++++++++++++------------ drivers/net/mlx4/fw.c | 1 + include/linux/mlx4/device.h | 1 + 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3cb3f47a10b8..e596537ff353 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; - if (dev->dev->caps.max_gso_sz) + if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH) props->device_cap_flags |= IB_DEVICE_UD_TSO; if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 518d561970aa..847030c89a8d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -54,7 +54,8 @@ enum { /* * Largest possible UD header: send with GRH and immediate data. */ - MLX4_IB_UD_HEADER_SIZE = 72 + MLX4_IB_UD_HEADER_SIZE = 72, + MLX4_IB_LSO_HEADER_SPARE = 128, }; struct mlx4_ib_sqp { @@ -67,7 +68,8 @@ struct mlx4_ib_sqp { }; enum { - MLX4_IB_MIN_SQ_STRIDE = 6 + MLX4_IB_MIN_SQ_STRIDE = 6, + MLX4_IB_CACHE_LINE_SIZE = 64, }; static const __be32 mlx4_ib_opcode[] = { @@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) case IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg) + - ((flags & MLX4_IB_QP_LSO) ? 64 : 0); + ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); case IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); @@ -1466,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len, - __be32 *lso_hdr_sz) + __be32 *lso_hdr_sz, __be32 *blh) { unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); - /* - * This is a temporary limitation and will be removed in - * a forthcoming FW release: - */ - if (unlikely(halign > 64)) - return -EINVAL; + if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) + *blh = cpu_to_be32(1 << 6); if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && wr->num_sge > qp->sq.max_gs - (halign >> 4))) @@ -1521,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 dummy; __be32 *lso_wqe; __be32 uninitialized_var(lso_hdr_sz); + __be32 blh; int i; spin_lock_irqsave(&qp->sq.lock, flags); @@ -1529,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, for (nreq = 0; wr; ++nreq, wr = wr->next) { lso_wqe = &dummy; + blh = 0; if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { err = -ENOMEM; @@ -1615,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (wr->opcode == IB_WR_LSO) { - err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz); + err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -1686,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | - (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); + (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 3c16602172fc..7194be3a2894 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags) [ 9] = "Q_Key violation counter", [10] = "VMM", [12] = "DPDP", + [15] = "Big LSO headers", [16] = "MW support", [17] = "APM support", [18] = "Atomic ops support", diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ce7cc6c7bcbb..e92d1bfdb330 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -61,6 +61,7 @@ enum { MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8, MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9, MLX4_DEV_CAP_FLAG_DPDP = 1 << 12, + MLX4_DEV_CAP_FLAG_BLH = 1 << 15, MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16, MLX4_DEV_CAP_FLAG_APM = 1 << 17, MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18, -- cgit v1.2.3 From c1ccaf2478f84c2665cf57f981db143aa582d646 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 12 Nov 2009 11:32:27 -0800 Subject: IB/iser: Rewrite SG handling for RDMA logic After dma-mapping an SG list provided by the SCSI midlayer, iser has to make sure the mapped SG is "aligned for RDMA" in the sense that its possible to produce one mapping in the HCA IOMMU which represents the whole SG. Next, the mapped SG is formatted for registration with the HCA. This patch re-writes the logic that does the above, to make it clearer and simpler. It also fixes a bug in the being aligned for RDMA checks, where a "start" check wasn't done but rather only "end" check. Signed-off-by: Alexander Nezhinsky Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 122 ++++++++++++++---------------- 1 file changed, 56 insertions(+), 66 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index b9453d068e9d..274c883ef3ea 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -209,6 +209,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, mem_copy->copy_buf = NULL; } +#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) + /** * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses * and returns the length of resulting physical address array (may be less than @@ -221,62 +223,52 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, * where --few fragments of the same page-- are present in the SG as * consecutive elements. Also, it handles one entry SG. */ + static int iser_sg_to_page_vec(struct iser_data_buf *data, struct iser_page_vec *page_vec, struct ib_device *ibdev) { - struct scatterlist *sgl = (struct scatterlist *)data->buf; - struct scatterlist *sg; - u64 first_addr, last_addr, page; - int end_aligned; - unsigned int cur_page = 0; + struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf; + u64 start_addr, end_addr, page, chunk_start = 0; unsigned long total_sz = 0; - int i; + unsigned int dma_len; + int i, new_chunk, cur_page, last_ent = data->dma_nents - 1; /* compute the offset of first element */ page_vec->offset = (u64) sgl[0].offset & ~MASK_4K; + new_chunk = 1; + cur_page = 0; for_each_sg(sgl, sg, data->dma_nents, i) { - unsigned int dma_len = ib_sg_dma_len(ibdev, sg); - + start_addr = ib_sg_dma_address(ibdev, sg); + if (new_chunk) + chunk_start = start_addr; + dma_len = ib_sg_dma_len(ibdev, sg); + end_addr = start_addr + dma_len; total_sz += dma_len; - first_addr = ib_sg_dma_address(ibdev, sg); - last_addr = first_addr + dma_len; - - end_aligned = !(last_addr & ~MASK_4K); - - /* continue to collect page fragments till aligned or SG ends */ - while (!end_aligned && (i + 1 < data->dma_nents)) { - sg = sg_next(sg); - i++; - dma_len = ib_sg_dma_len(ibdev, sg); - total_sz += dma_len; - last_addr = ib_sg_dma_address(ibdev, sg) + dma_len; - end_aligned = !(last_addr & ~MASK_4K); + /* collect page fragments until aligned or end of SG list */ + if (!IS_4K_ALIGNED(end_addr) && i < last_ent) { + new_chunk = 0; + continue; } - - /* handle the 1st page in the 1st DMA element */ - if (cur_page == 0) { - page = first_addr & MASK_4K; - page_vec->pages[cur_page] = page; - cur_page++; + new_chunk = 1; + + /* address of the first page in the contiguous chunk; + masking relevant for the very first SG entry, + which might be unaligned */ + page = chunk_start & MASK_4K; + do { + page_vec->pages[cur_page++] = page; page += SIZE_4K; - } else - page = first_addr; - - for (; page < last_addr; page += SIZE_4K) { - page_vec->pages[cur_page] = page; - cur_page++; - } - + } while (page < end_addr); } + page_vec->data_size = total_sz; iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page); return cur_page; } -#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) /** * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned @@ -284,42 +276,40 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, * the number of entries which are aligned correctly. Supports the case where * consecutive SG elements are actually fragments of the same physcial page. */ -static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data, - struct ib_device *ibdev) +static int iser_data_buf_aligned_len(struct iser_data_buf *data, + struct ib_device *ibdev) { - struct scatterlist *sgl, *sg; - u64 end_addr, next_addr; - int i, cnt; - unsigned int ret_len = 0; + struct scatterlist *sgl, *sg, *next_sg = NULL; + u64 start_addr, end_addr; + int i, ret_len, start_check = 0; + + if (data->dma_nents == 1) + return 1; sgl = (struct scatterlist *)data->buf; + start_addr = ib_sg_dma_address(ibdev, sgl); - cnt = 0; for_each_sg(sgl, sg, data->dma_nents, i) { - /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX " - "offset: %ld sz: %ld\n", i, - (unsigned long)sg_phys(sg), - (unsigned long)sg->offset, - (unsigned long)sg->length); */ - end_addr = ib_sg_dma_address(ibdev, sg) + - ib_sg_dma_len(ibdev, sg); - /* iser_dbg("Checking sg iobuf end address " - "0x%08lX\n", end_addr); */ - if (i + 1 < data->dma_nents) { - next_addr = ib_sg_dma_address(ibdev, sg_next(sg)); - /* are i, i+1 fragments of the same page? */ - if (end_addr == next_addr) { - cnt++; - continue; - } else if (!IS_4K_ALIGNED(end_addr)) { - ret_len = cnt + 1; - break; - } - } - cnt++; + if (start_check && !IS_4K_ALIGNED(start_addr)) + break; + + next_sg = sg_next(sg); + if (!next_sg) + break; + + end_addr = start_addr + ib_sg_dma_len(ibdev, sg); + start_addr = ib_sg_dma_address(ibdev, next_sg); + + if (end_addr == start_addr) { + start_check = 0; + continue; + } else + start_check = 1; + + if (!IS_4K_ALIGNED(end_addr)) + break; } - if (i == data->dma_nents) - ret_len = cnt; /* loop ended */ + ret_len = (next_sg) ? i : i+1; iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n", ret_len, data->dma_nents, data); return ret_len; -- cgit v1.2.3 From be504b0b9fbe9ba447c93ef0f5789f377102d555 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Thu, 12 Nov 2009 15:51:16 -0800 Subject: mlx4_core: Fix parsing of reserved EQ cap Value returned by firmware is the actual value, not a log. Signed-off-by: Liran Liss Signed-off-by: Roland Dreier --- drivers/net/mlx4/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 7194be3a2894..04f42ae1eda0 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -236,7 +236,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); dev_cap->max_mpts = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); - dev_cap->reserved_eqs = 1 << (field & 0xf); + dev_cap->reserved_eqs = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); -- cgit v1.2.3 From a7ca1f00ed2921b804d7ebda0f6fca8c9078fa42 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 16 Nov 2009 09:30:33 -0800 Subject: RDMA/ucma: Add option to manually set IB path Export rdma_set_ib_paths to user space to allow applications to manually set the IB path used for connections. This allows alternative ways for a user space application or library to obtain path record information, including retrieving path information from cached data, avoiding direct interaction with the IB SA. The IB SA is a single, centralized entity that can limit scaling on large clusters running MPI applications. Future changes to the rdma cm can expand on this framework to support the full range of features allowed by the IB CM, such as separate forward and reverse paths and APM. Signed-off-by: Sean Hefty Reviewed-By: Jason Gunthorpe Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 6 +++++ drivers/infiniband/core/ucma.c | 49 ++++++++++++++++++++++++++++++++++++++ include/rdma/ib_sa.h | 6 +++++ include/rdma/ib_user_sa.h | 16 +++++++++++++ include/rdma/rdma_user_cm.h | 6 +++-- 5 files changed, 81 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 82543716d59e..7e1ffd8ccd5c 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -604,6 +604,12 @@ retry: return ret ? ret : id; } +void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) +{ + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); +} +EXPORT_SYMBOL(ib_sa_unpack_path); + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index bb96d3c4b0f4..f1cbd26a9de0 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -43,6 +43,7 @@ #include #include #include +#include MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); @@ -812,6 +813,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, return ret; } +static int ucma_set_ib_path(struct ucma_context *ctx, + struct ib_path_rec_data *path_data, size_t optlen) +{ + struct ib_sa_path_rec sa_path; + struct rdma_cm_event event; + int ret; + + if (optlen % sizeof(*path_data)) + return -EINVAL; + + for (; optlen; optlen -= sizeof(*path_data), path_data++) { + if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | + IB_PATH_BIDIRECTIONAL)) + break; + } + + if (!optlen) + return -EINVAL; + + ib_sa_unpack_path(path_data->path_rec, &sa_path); + ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); + if (ret) + return ret; + + memset(&event, 0, sizeof event); + event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + return ucma_event_handler(ctx->cm_id, &event); +} + +static int ucma_set_option_ib(struct ucma_context *ctx, int optname, + void *optval, size_t optlen) +{ + int ret; + + switch (optname) { + case RDMA_OPTION_IB_PATH: + ret = ucma_set_ib_path(ctx, optval, optlen); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + static int ucma_set_option_level(struct ucma_context *ctx, int level, int optname, void *optval, size_t optlen) { @@ -821,6 +867,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level, case RDMA_OPTION_ID: ret = ucma_set_option_id(ctx, optname, optval, optlen); break; + case RDMA_OPTION_IB: + ret = ucma_set_option_ib(ctx, optname, optval, optlen); + break; default: ret = -ENOSYS; } diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 3841c1aff692..1082afaed158 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -379,4 +379,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr); +/** + * ib_sa_unpack_path - Convert a path record from MAD format to struct + * ib_sa_path_rec. + */ +void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec); + #endif /* IB_SA_H */ diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h index 659120157e14..cfc7c9ba781e 100644 --- a/include/rdma/ib_user_sa.h +++ b/include/rdma/ib_user_sa.h @@ -35,6 +35,22 @@ #include +enum { + IB_PATH_GMP = 1, + IB_PATH_PRIMARY = (1<<1), + IB_PATH_ALTERNATE = (1<<2), + IB_PATH_OUTBOUND = (1<<3), + IB_PATH_INBOUND = (1<<4), + IB_PATH_INBOUND_REVERSE = (1<<5), + IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE +}; + +struct ib_path_rec_data { + __u32 flags; + __u32 reserved; + __u32 path_rec[16]; +}; + struct ib_user_path_rec { __u8 dgid[16]; __u8 sgid[16]; diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index c55705460b87..1d165022c02d 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -215,12 +215,14 @@ struct rdma_ucm_event_resp { /* Option levels */ enum { - RDMA_OPTION_ID = 0 + RDMA_OPTION_ID = 0, + RDMA_OPTION_IB = 1 }; /* Option details */ enum { - RDMA_OPTION_ID_TOS = 0 + RDMA_OPTION_ID_TOS = 0, + RDMA_OPTION_IB_PATH = 1 }; struct rdma_ucm_set_option { -- cgit v1.2.3 From 0f9ea5d2ab5cef732d5abbe62b9e9af3007bae81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2009 14:24:34 -0800 Subject: RDMA/addr: Use appropriate locking with for_each_netdev() for_each_netdev() should be used with RTNL or dev_base_lock held, or else we risk a crash. Signed-off-by: Eric Dumazet Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index bd07803e9183..373f1118d57b 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -131,6 +131,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: + read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) { if (ipv6_chk_addr(&init_net, &((struct sockaddr_in6 *) addr)->sin6_addr, @@ -139,6 +140,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) break; } } + read_unlock(&dev_base_lock); break; #endif } @@ -391,14 +393,17 @@ static int addr_resolve_local(struct sockaddr *src_in, { struct in6_addr *a; + read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) if (ipv6_chk_addr(&init_net, &((struct sockaddr_in6 *) dst_in)->sin6_addr, dev, 1)) break; - if (!dev) + if (!dev) { + read_unlock(&dev_base_lock); return -EADDRNOTAVAIL; + } a = &((struct sockaddr_in6 *) src_in)->sin6_addr; @@ -416,6 +421,7 @@ static int addr_resolve_local(struct sockaddr *src_in, if (!ret) memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); } + read_unlock(&dev_base_lock); break; } #endif -- cgit v1.2.3 From 1c9b281997b5876c0c8ed62506b56db89d262b57 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 19 Nov 2009 12:55:21 -0800 Subject: RDMA/cma: Correct detection of SA Created MGID RDMA CM treats AF_INET6 addresses that are either 0 or prefixed with FF1x:A01B::/32 as MGIDs, but the detection for the prefix was buggy; fix it up. Signed-off-by: Jason Gunthorpe Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 075317884b53..8bb2cf4031ae 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2687,7 +2687,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); } else if ((addr->sa_family == AF_INET6) && - ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) == + ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); -- cgit v1.2.3 From e2e626972e652d18520f84d69fc06cfa307d11ff Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 19 Nov 2009 12:55:22 -0800 Subject: RDMA/cma: Fix AF_INET6 support in multicast joining If joining to an AF_INET6 address, we need to map the address to a MGID in the same way as the IP stack. The old code would just fall through to the IPv4 case and generate garbage. Signed-off-by: Jason Gunthorpe Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 8bb2cf4031ae..052b4c01745d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2691,6 +2691,11 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else if ((addr->sa_family == AF_INET6)) { + ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); + if (id_priv->id.ps == RDMA_PS_UDP) + mc_map[7] = 0x01; /* Use RDMA CM signature */ + *mgid = *(union ib_gid *) (mc_map + 4); } else { ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) -- cgit v1.2.3 From 6266ed6e4164466177238b11ecb825a3a108a3e4 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:55:22 -0800 Subject: RDMA/cma: Replace net_device pointer with index Provide the device interface when resolving route information to ensure that the correct outbound device is used. This will also simplify processing of sin6_scope_id for IPv6 support. Based on work from: David Wilder Jason Gunthorpe Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 14 +++++++++++++- drivers/infiniband/core/cma.c | 2 +- include/rdma/ib_addr.h | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 373f1118d57b..788a02ef01dd 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -107,7 +107,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); - dev_addr->src_dev = dev; + dev_addr->bound_dev_if = dev->ifindex; return 0; } EXPORT_SYMBOL(rdma_copy_addr); @@ -117,6 +117,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) struct net_device *dev; int ret = -EADDRNOTAVAIL; + if (dev_addr->bound_dev_if) { + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (!dev) + return -ENODEV; + ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_put(dev); + return ret; + } + switch (addr->sa_family) { case AF_INET: dev = ip_dev_find(&init_net, @@ -231,6 +240,8 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in, memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; + fl.oif = addr->bound_dev_if; + ret = ip_route_output_key(&init_net, &rt, &fl); if (ret) goto out; @@ -279,6 +290,7 @@ static int addr6_resolve_remote(struct sockaddr_in6 *src_in, memset(&fl, 0, sizeof fl); fl.nl_u.ip6_u.daddr = dst_in->sin6_addr; fl.nl_u.ip6_u.saddr = src_in->sin6_addr; + fl.oif = addr->bound_dev_if; dst = ip6_route_output(&init_net, NULL, &fl); if (!dst) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 052b4c01745d..699ad12b3a2f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2820,7 +2820,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id dev_addr = &id_priv->id.route.addr.dev_addr; - if ((dev_addr->src_dev == ndev) && + if ((dev_addr->bound_dev_if == ndev->ifindex) && memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", ndev->name, &id_priv->id); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 483057b2f4b4..27f17cc2c919 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -61,7 +61,7 @@ struct rdma_dev_addr { unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; enum rdma_node_type dev_type; - struct net_device *src_dev; + int bound_dev_if; }; /** -- cgit v1.2.3 From d2e0886245aa9eebc1a4710c861d263b09eac493 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:55:22 -0800 Subject: IB/addr: Verify source and destination address families match If a source address is provided, verify that the address family matches that of the destination address. If the source is not specified, use the same address family as the destination. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 788a02ef01dd..b59ba7ccef0e 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -461,18 +461,27 @@ int rdma_resolve_ip(struct rdma_addr_client *client, if (!req) return -ENOMEM; - if (src_addr) - memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); - memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); + src_in = (struct sockaddr *) &req->src_addr; + dst_in = (struct sockaddr *) &req->dst_addr; + + if (src_addr) { + if (src_addr->sa_family != dst_addr->sa_family) { + ret = -EINVAL; + goto err; + } + + memcpy(src_in, src_addr, ip_addr_size(src_addr)); + } else { + src_in->sa_family = dst_addr->sa_family; + } + + memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; req->client = client; atomic_inc(&client->refcount); - src_in = (struct sockaddr *) &req->src_addr; - dst_in = (struct sockaddr *) &req->dst_addr; - req->status = addr_resolve_local(src_in, dst_in, addr); if (req->status == -EADDRNOTAVAIL) req->status = addr_resolve_remote(src_in, dst_in, addr); @@ -490,10 +499,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client, default: ret = req->status; atomic_dec(&client->refcount); - kfree(req); - break; + goto err; } return ret; +err: + kfree(req); + return ret; } EXPORT_SYMBOL(rdma_resolve_ip); -- cgit v1.2.3 From c4315d85f9b76834289fd503796c01b8311c4b84 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:57:18 -0800 Subject: IB/addr: Store net_device type instead of translating to RDMA transport The struct rdma_dev_addr stores net_device address information: the source device address, destination hardware address, and broadcast address. For consistency, store the net_device type rather than converting it to the rdma_node_type. The type indicates the format of the various hardware addresses, which is what we're concerned with, and not the RDMA node type that the address may map to. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 13 +------------ drivers/infiniband/core/cma.c | 6 +++--- include/rdma/ib_addr.h | 3 ++- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index b59ba7ccef0e..de5fe161a1b9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -92,17 +91,7 @@ EXPORT_SYMBOL(rdma_addr_unregister_client); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr) { - switch (dev->type) { - case ARPHRD_INFINIBAND: - dev_addr->dev_type = RDMA_NODE_IB_CA; - break; - case ARPHRD_ETHER: - dev_addr->dev_type = RDMA_NODE_RNIC; - break; - default: - return -EADDRNOTAVAIL; - } - + dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 699ad12b3a2f..b305b5c17f8d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -330,11 +330,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) union ib_gid gid; int ret = -ENODEV; - switch (rdma_node_get_transport(dev_addr->dev_type)) { - case RDMA_TRANSPORT_IB: + switch (dev_addr->dev_type) { + case ARPHRD_INFINIBAND: ib_addr_get_sgid(dev_addr, &gid); break; - case RDMA_TRANSPORT_IWARP: + case ARPHRD_ETHER: iw_addr_get_sgid(dev_addr, &gid); break; default: diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 27f17cc2c919..3a39c55d2b9a 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -60,7 +61,7 @@ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; - enum rdma_node_type dev_type; + unsigned short dev_type; int bound_dev_if; }; -- cgit v1.2.3 From 6f8372b69c3198e06cecb1df2cb9682d0c55e657 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 13:26:06 -0800 Subject: RDMA/cm: fix loopback address support The RDMA CM is intended to support the use of a loopback address when establishing a connection; however, the behavior of the CM when loopback addresses are used is confusing and does not always work, depending on whether loopback was specified by the server, the client, or both. The defined behavior of rdma_bind_addr is to associate an RDMA device with an rdma_cm_id, as long as the user specified a non- zero address. (ie they weren't just trying to reserve a port) Currently, if the loopback address is passed to rdam_bind_addr, no device is associated with the rdma_cm_id. Fix this. If a loopback address is specified by the client as the destination address for a connection, it will fail to establish a connection. This is true even if the server is listing across all addresses or on the loopback address itself. The issue is that the server tries to translate the IP address carried in the REQ message to a local net_device address, which fails. The translation is not needed in this case, since the REQ carries the actual HW address that should be used. Finally, cleanup loopback support to be more transport neutral. Replace separate calls to get/set the sgid and dgid from the device address to a single call that behaves correctly depending on the format of the device address. And support both IPv4 and IPv6 address formats. Signed-off-by: Sean Hefty [ Fixed RDS build by s/ib_addr_get/rdma_addr_get/ - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 77 +++++++++++++++++++++++------------------- drivers/infiniband/core/ucma.c | 8 ++--- include/rdma/ib_addr.h | 31 ++++++----------- net/rds/ib.c | 4 +-- net/rds/iw.c | 4 +-- 5 files changed, 61 insertions(+), 63 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b305b5c17f8d..38867a46d39e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) union ib_gid gid; int ret = -ENODEV; - switch (dev_addr->dev_type) { - case ARPHRD_INFINIBAND: - ib_addr_get_sgid(dev_addr, &gid); - break; - case ARPHRD_ETHER: - iw_addr_get_sgid(dev_addr, &gid); - break; - default: - return -ENODEV; - } - + rdma_addr_get_sgid(dev_addr, &gid); list_for_each_entry(cma_dev, &dev_list, list) { ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); @@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); - if (ret) - goto destroy_id; + if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { + rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; + rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); + ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey); + } else { + ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, + &rt->addr.dev_addr); + if (ret) + goto destroy_id; + } + rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); - ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); - if (ret) - goto err; + if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { + ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, + &id->route.addr.dev_addr); + if (ret) + goto err; + } id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -1565,8 +1563,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct sockaddr_in6 *sin6; memset(&path_rec, 0, sizeof path_rec); - ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); - ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); + rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); + rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; @@ -1781,7 +1779,11 @@ port_found: if (ret) goto out; - ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); + id_priv->id.route.addr.dev_addr.dev_type = + (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ? + ARPHRD_INFINIBAND : ARPHRD_ETHER; + + rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); @@ -1839,7 +1841,7 @@ out: static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src, *dst; union ib_gid gid; int ret; @@ -1853,14 +1855,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) goto err; } - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); - ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); - if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) { - src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr; - dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr; - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr; + src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; + if (cma_zero_addr(src)) { + dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; + if ((src->sa_family = dst->sa_family) == AF_INET) { + ((struct sockaddr_in *) src)->sin_addr.s_addr = + ((struct sockaddr_in *) dst)->sin_addr.s_addr; + } else { + ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr, + &((struct sockaddr_in6 *) dst)->sin6_addr); + } } work->id = id_priv; @@ -2089,7 +2096,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) return -EINVAL; - if (!cma_any_addr(addr)) { + if (cma_loopback_addr(addr)) { + ret = cma_bind_loopback(id_priv); + } else if (!cma_zero_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); if (ret) goto err1; @@ -2108,7 +2117,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) return 0; err2: - if (!cma_any_addr(addr)) { + if (id_priv->cma_dev) { mutex_lock(&lock); cma_detach_from_dev(id_priv); mutex_unlock(&lock); @@ -2721,7 +2730,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); if (id_priv->id.ps == RDMA_PS_UDP) rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); - ib_addr_get_sgid(dev_addr, &rec.port_gid); + rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index f1cbd26a9de0..b2e16c332d5b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -563,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, switch (route->num_paths) { case 0: dev_addr = &route->addr.dev_addr; - ib_addr_get_dgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].dgid); - ib_addr_get_sgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].sgid); + rdma_addr_get_dgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].dgid); + rdma_addr_get_sgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); break; case 2: diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 3a39c55d2b9a..fa0d52b8e622 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -122,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } -static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { - memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid); + return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } -static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid); + memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } -static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid); + memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } -static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid); + memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } -static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) -{ - memcpy(gid, dev_addr->src_dev_addr, sizeof *gid); -} - -static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid); + memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } #endif /* IB_ADDR_H */ diff --git a/net/rds/ib.c b/net/rds/ib.c index 536ebe5d3f6b..3b8992361042 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, ic = conn->c_transport_data; dev_addr = &ic->i_cm_id->route.addr.dev_addr; - ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); - ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); iinfo->max_send_wr = ic->i_send_ring.w_nr; diff --git a/net/rds/iw.c b/net/rds/iw.c index db224f7c2937..b28fa8525b24 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn, ic = conn->c_transport_data; dev_addr = &ic->i_cm_id->route.addr.dev_addr; - ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); - ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); iinfo->max_send_wr = ic->i_send_ring.w_nr; -- cgit v1.2.3 From 923c100ef019bf15fb89b6fa3d3ad0485d25d59b Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 13:26:51 -0800 Subject: IB/addr: Simplify resolving IPv4 addresses Merge resolve local/remote address resolution into a single data flow to ensure consistent access and use of the local routing tables. Based on work from: David Wilder Jason Gunthorpe Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 81 ++++++++++++------------------------------ 1 file changed, 23 insertions(+), 58 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index de5fe161a1b9..38a7184ea745 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -184,17 +184,6 @@ static void addr_send_arp(struct sockaddr *dst_in) memset(&fl, 0, sizeof fl); switch (dst_in->sa_family) { - case AF_INET: - fl.nl_u.ip4_u.daddr = - ((struct sockaddr_in *) dst_in)->sin_addr.s_addr; - - if (ip_route_output_key(&init_net, &rt, &fl)) - return; - - neigh_event_send(rt->u.dst.neighbour, NULL); - ip_rt_put(rt); - break; - #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: { @@ -215,9 +204,9 @@ static void addr_send_arp(struct sockaddr *dst_in) } } -static int addr4_resolve_remote(struct sockaddr_in *src_in, - struct sockaddr_in *dst_in, - struct rdma_dev_addr *addr) +static int addr4_resolve(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct rdma_dev_addr *addr) { __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; @@ -235,6 +224,16 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in, if (ret) goto out; + src_in->sin_family = AF_INET; + src_in->sin_addr.s_addr = rt->rt_src; + + if (rt->idev->dev->flags & IFF_LOOPBACK) { + ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); + goto put; + } + /* If the device does ARP internally, return 'done' */ if (rt->idev->dev->flags & IFF_NOARP) { rdma_copy_addr(addr, rt->idev->dev, NULL); @@ -242,21 +241,14 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in, } neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); - if (!neigh) { + if (!neigh || !(neigh->nud_state & NUD_VALID)) { + neigh_event_send(rt->u.dst.neighbour, NULL); ret = -ENODATA; + if (neigh) + goto release; goto put; } - if (!(neigh->nud_state & NUD_VALID)) { - ret = -ENODATA; - goto release; - } - - if (!src_ip) { - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = rt->rt_src; - } - ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); release: neigh_release(neigh); @@ -305,12 +297,12 @@ static int addr6_resolve_remote(struct sockaddr_in6 *src_in, } #endif -static int addr_resolve_remote(struct sockaddr *src_in, - struct sockaddr *dst_in, - struct rdma_dev_addr *addr) +static int addr_resolve(struct sockaddr *src_in, + struct sockaddr *dst_in, + struct rdma_dev_addr *addr) { if (src_in->sa_family == AF_INET) { - return addr4_resolve_remote((struct sockaddr_in *) src_in, + return addr4_resolve((struct sockaddr_in *) src_in, (struct sockaddr_in *) dst_in, addr); } else return addr6_resolve_remote((struct sockaddr_in6 *) src_in, @@ -330,8 +322,7 @@ static void process_req(struct work_struct *work) if (req->status == -ENODATA) { src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; - req->status = addr_resolve_remote(src_in, dst_in, - req->addr); + req->status = addr_resolve(src_in, dst_in, req->addr); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) @@ -363,32 +354,6 @@ static int addr_resolve_local(struct sockaddr *src_in, int ret; switch (dst_in->sa_family) { - case AF_INET: - { - __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr; - __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr; - - dev = ip_dev_find(&init_net, dst_ip); - if (!dev) - return -EADDRNOTAVAIL; - - if (ipv4_is_zeronet(src_ip)) { - src_in->sa_family = dst_in->sa_family; - ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip; - ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (ipv4_is_loopback(src_ip)) { - ret = rdma_translate_ip(dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } else { - ret = rdma_translate_ip(src_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } - dev_put(dev); - break; - } - #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: { @@ -473,7 +438,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->status = addr_resolve_local(src_in, dst_in, addr); if (req->status == -EADDRNOTAVAIL) - req->status = addr_resolve_remote(src_in, dst_in, addr); + req->status = addr_resolve(src_in, dst_in, addr); switch (req->status) { case 0: -- cgit v1.2.3 -- cgit v1.2.3 From d14714df61681cfecf945a58436edf197327e87f Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 16:46:25 -0800 Subject: IB/addr: Fix IPv6 routing lookup Include link scope as part of address resolution. Combine local and remote address resolution into a single, simpler code path. Fix error checking in the IPv6 routing lookups. Based on work from: David Wilder Jason Gunthorpe Signed-off-by: Sean Hefty [ Fix up cma_check_linklocal() for !IPV6 case. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 144 ++++++++++++----------------------------- drivers/infiniband/core/cma.c | 47 ++++++++++---- 2 files changed, 74 insertions(+), 117 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 38a7184ea745..abbb06996f9e 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -176,34 +176,6 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -static void addr_send_arp(struct sockaddr *dst_in) -{ - struct rtable *rt; - struct flowi fl; - - memset(&fl, 0, sizeof fl); - - switch (dst_in->sa_family) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - { - struct dst_entry *dst; - - fl.nl_u.ip6_u.daddr = - ((struct sockaddr_in6 *) dst_in)->sin6_addr; - - dst = ip6_route_output(&init_net, NULL, &fl); - if (!dst) - return; - - neigh_event_send(dst->neighbour, NULL); - dst_release(dst); - break; - } -#endif - } -} - static int addr4_resolve(struct sockaddr_in *src_in, struct sockaddr_in *dst_in, struct rdma_dev_addr *addr) @@ -259,39 +231,63 @@ out: } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static int addr6_resolve_remote(struct sockaddr_in6 *src_in, - struct sockaddr_in6 *dst_in, - struct rdma_dev_addr *addr) +static int addr6_resolve(struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, + struct rdma_dev_addr *addr) { struct flowi fl; struct neighbour *neigh; struct dst_entry *dst; - int ret = -ENODATA; + int ret; memset(&fl, 0, sizeof fl); - fl.nl_u.ip6_u.daddr = dst_in->sin6_addr; - fl.nl_u.ip6_u.saddr = src_in->sin6_addr; + ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); + ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); fl.oif = addr->bound_dev_if; dst = ip6_route_output(&init_net, NULL, &fl); - if (!dst) - return ret; + if ((ret = dst->error)) + goto put; + + if (ipv6_addr_any(&fl.fl6_src)) { + ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, + &fl.fl6_dst, 0, &fl.fl6_src); + if (ret) + goto put; + src_in->sin6_family = AF_INET6; + ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); + } + + if (dst->dev->flags & IFF_LOOPBACK) { + ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); + goto put; + } + + /* If the device does ARP internally, return 'done' */ if (dst->dev->flags & IFF_NOARP) { ret = rdma_copy_addr(addr, dst->dev, NULL); - } else { - neigh = dst->neighbour; - if (neigh && (neigh->nud_state & NUD_VALID)) - ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); + goto put; } + neigh = dst->neighbour; + if (!neigh || !(neigh->nud_state & NUD_VALID)) { + neigh_event_send(dst->neighbour, NULL); + ret = -ENODATA; + goto put; + } + + ret = rdma_copy_addr(addr, dst->dev, neigh->ha); +put: dst_release(dst); return ret; } #else -static int addr6_resolve_remote(struct sockaddr_in6 *src_in, - struct sockaddr_in6 *dst_in, - struct rdma_dev_addr *addr) +static int addr6_resolve(struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, + struct rdma_dev_addr *addr) { return -EADDRNOTAVAIL; } @@ -305,7 +301,7 @@ static int addr_resolve(struct sockaddr *src_in, return addr4_resolve((struct sockaddr_in *) src_in, (struct sockaddr_in *) dst_in, addr); } else - return addr6_resolve_remote((struct sockaddr_in6 *) src_in, + return addr6_resolve((struct sockaddr_in6 *) src_in, (struct sockaddr_in6 *) dst_in, addr); } @@ -346,60 +342,6 @@ static void process_req(struct work_struct *work) } } -static int addr_resolve_local(struct sockaddr *src_in, - struct sockaddr *dst_in, - struct rdma_dev_addr *addr) -{ - struct net_device *dev; - int ret; - - switch (dst_in->sa_family) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - { - struct in6_addr *a; - - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) - if (ipv6_chk_addr(&init_net, - &((struct sockaddr_in6 *) dst_in)->sin6_addr, - dev, 1)) - break; - - if (!dev) { - read_unlock(&dev_base_lock); - return -EADDRNOTAVAIL; - } - - a = &((struct sockaddr_in6 *) src_in)->sin6_addr; - - if (ipv6_addr_any(a)) { - src_in->sa_family = dst_in->sa_family; - ((struct sockaddr_in6 *) src_in)->sin6_addr = - ((struct sockaddr_in6 *) dst_in)->sin6_addr; - ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (ipv6_addr_loopback(a)) { - ret = rdma_translate_ip(dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } else { - ret = rdma_translate_ip(src_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } - read_unlock(&dev_base_lock); - break; - } -#endif - - default: - ret = -EADDRNOTAVAIL; - break; - } - - return ret; -} - int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, @@ -436,10 +378,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->client = client; atomic_inc(&client->refcount); - req->status = addr_resolve_local(src_in, dst_in, addr); - if (req->status == -EADDRNOTAVAIL) - req->status = addr_resolve(src_in, dst_in, addr); - + req->status = addr_resolve(src_in, dst_in, addr); switch (req->status) { case 0: req->timeout = jiffies; @@ -448,7 +387,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client, case -ENODATA: req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; queue_req(req); - addr_send_arp(dst_in); break; default: ret = req->status; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 38867a46d39e..fbdd73106000 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1472,15 +1472,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af) -{ - struct sockaddr_storage addr_in; - - memset(&addr_in, 0, sizeof addr_in); - addr_in.ss_family = af; - return rdma_bind_addr(id, (struct sockaddr *) &addr_in); -} - int rdma_listen(struct rdma_cm_id *id, int backlog) { struct rdma_id_private *id_priv; @@ -1488,7 +1479,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == CMA_IDLE) { - ret = cma_bind_any(id, AF_INET); + ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; + ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); if (ret) return ret; } @@ -1885,10 +1877,14 @@ err: static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr) { - if (src_addr && src_addr->sa_family) - return rdma_bind_addr(id, src_addr); - else - return cma_bind_any(id, dst_addr->sa_family); + if (!src_addr || !src_addr->sa_family) { + src_addr = (struct sockaddr *) &id->route.addr.src_addr; + if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) { + ((struct sockaddr_in6 *) src_addr)->sin6_scope_id = + ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; + } + } + return rdma_bind_addr(id, src_addr); } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, @@ -2084,6 +2080,25 @@ static int cma_get_port(struct rdma_id_private *id_priv) return ret; } +static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, + struct sockaddr *addr) +{ +#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE) + struct sockaddr_in6 *sin6; + + if (addr->sa_family != AF_INET6) + return 0; + + sin6 = (struct sockaddr_in6 *) addr; + if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !sin6->sin6_scope_id) + return -EINVAL; + + dev_addr->bound_dev_if = sin6->sin6_scope_id; +#endif + return 0; +} + int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; @@ -2096,6 +2111,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) return -EINVAL; + ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + if (ret) + goto err1; + if (cma_loopback_addr(addr)) { ret = cma_bind_loopback(id_priv); } else if (!cma_zero_addr(addr)) { -- cgit v1.2.3 From 0cd4d0fd9b0a4e10c091fc6316d1bf92885dcd9c Mon Sep 17 00:00:00 2001 From: "David J. Wilder" Date: Wed, 9 Dec 2009 10:03:00 -0800 Subject: IPoIB: Clear ipoib_neigh.dgid in ipoib_neigh_alloc() IPoIB can miss a change in destination GID under some conditions. The problem is caused when ipoib_neigh->dgid contains a stale address. The fix is to set ipoib_neigh->dgid to zero in ipoib_neigh_alloc(). This can happen when a system using bonding on its IPoIB interfaces has switched its active interface from interface A to B and back to A. The system that fails over will not correctly processes the 2nd address change, as described below. When an address has changed neighbor->ha is updated with the new address. Each neighbor has an associated ipoib_neigh. ipoib_neigh->dgid also holds a copy of the remote node's hardware address. When an address changes neighbor->ha is updated by the network layer (arp code) with the new address. IPoIB detects this change in ipoib_start_xmit() by comparing neighbor->ha with ipoib_neigh->dgid. The bug is that ipoib_neigh->dgid may already contain the new address (A) thus the change from B to A is missed by ipoib. Here is the sequence of events: ipoib_neigh->dgid = A and neighbor->ha = A The address is switched to B (the first switch) neighbor->ha = B The change is seen in ipoib_start_xmit() -- neighbor->ha != ipoib_neigh->dgid so ipoib_neigh is released, and a new one is allocated. The allocator may return the same chunk of memory that was just released, therefore ipoib_neigh->dgid still contains A at this point. ipoib_neigh->dgid should be updated in neigh_add_path(), but if the following conditions are true dgid is not updated: 1) __path_find() returns a path 2) path->ah is NULL The remote system now switches from address B to A, neighbor->ha is updated to A. Now we have again : ipoib_neigh->dgid = A and neighbor->ha = A Since the addresses are the same ipoib won't process the change in address. Fix this by zeroing out the dgid field when allocating a new struct ipoib_neigh. Signed-off-by: David Wilder Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2bf5116deec4..df3eb8c9fd96 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -884,6 +884,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, neigh->neighbour = neighbour; neigh->dev = dev; + memset(&neigh->dgid.raw, 0, sizeof (union ib_gid)); *to_ipoib_neigh(neighbour) = neigh; skb_queue_head_init(&neigh->queue); ipoib_cm_set(neigh, NULL); -- cgit v1.2.3 From 598cb6f327c99ceaf81c45c32504669b2028712b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 9 Dec 2009 10:05:28 -0800 Subject: IB/ipath: Use bitmap_weight() Use bitmap_weight() instead of finding all set bits in bitmap by hand. Signed-off-by: Akinobu Mita Cc: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 013d1380e77c..d2787fe80304 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -1697,7 +1698,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, unsigned len, int avail) { unsigned long flags; - unsigned end, cnt = 0, next; + unsigned end, cnt = 0; /* There are two bits per send buffer (busy and generation) */ start *= 2; @@ -1748,12 +1749,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, if (dd->ipath_pioupd_thresh) { end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); - next = find_first_bit(dd->ipath_pioavailkernel, end); - while (next < end) { - cnt++; - next = find_next_bit(dd->ipath_pioavailkernel, end, - next + 1); - } + cnt = bitmap_weight(dd->ipath_pioavailkernel, end); } spin_unlock_irqrestore(&ipath_pioavail_lock, flags); -- cgit v1.2.3 From 9420269428b3dc80c98e52beac60a3976fbef7d2 Mon Sep 17 00:00:00 2001 From: Alexander Schmidt Date: Wed, 9 Dec 2009 10:11:04 -0800 Subject: IB/ehca: Rework destroy_eq() The ibmebus_free_irq() function, which might sleep, was called with interrupts disabled. To fix this, make sure that no interrupts are running by killing the interrupt tasklet. Also lock the shca_list_lock to protect against the poll_eqs_timer running concurrently. Signed-off-by: Alexander Schmidt Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 1 + drivers/infiniband/hw/ehca/ehca_eq.c | 9 ++++++--- drivers/infiniband/hw/ehca/ehca_main.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index c825142a2fb7..0136abd50dd4 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -375,6 +375,7 @@ extern rwlock_t ehca_qp_idr_lock; extern rwlock_t ehca_cq_idr_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; +extern spinlock_t shca_list_lock; extern int ehca_static_rate; extern int ehca_port_act_time; diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 523e733c630e..3b87589b8ea0 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq