// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016-2018 Oracle. All rights reserved.
*
* Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
*/
#include <rdma/rw.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc);
static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
/* Each R/W context contains state for one chain of RDMA Read or
* Write Work Requests.
*
* Each WR chain handles a single contiguous server-side buffer,
* because scatterlist entries after the first have to start on
* page alignment. xdr_buf iovecs cannot guarantee alignment.
*
* Each WR chain handles only one R_key. Each RPC-over-RDMA segment
* from a client may contain a unique R_key, so each WR chain moves
* up to one segment at a time.
*
* The scatterlist makes this data structure over 4KB in size. To
* make it less likely to fail, and to handle the allocation for
* smaller I/O requests without disabling bottom-halves, these
* contexts are created on demand, but cached and reused until the
* controlling svcxprt_rdma is destroyed.
*/
struct svc_rdma_rw_ctxt {
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[];
};
static inline struct svc_rdma_rw_ctxt *
svc_rdma_next_ctxt(struct list_head *list)
{
return list_first_entry_or_null(list, struct svc_rdma_rw_ctxt,
rw_list);
}
static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
struct svc_rdma_rw_ctxt *ctxt;
spin_lock(&rdma->sc_rw_ctxt_lock);
ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts);
if (ctxt) {
list_del(&ctxt->rw_list);
spin_unlock(&rdma->sc_rw_ctxt_lock);
} else {
spin_unlock(&rdma->sc_rw_ctxt_lock);
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
}
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
ctxt->rw_sg_table.sgl,
SG_CHUNK_SIZE))
goto out_free;
return ctxt;
out_free:
kfree(ctxt);
out_noctx:
trace_svcrdma_no_rwctx_err(rdma, sges);
return NULL;
}
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt)
{
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
spin_lock(&rdma->sc_rw_ctxt_lock);
list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
spin_unlock(&rdma->sc_rw_ctxt_lock);
}
/**
* svc_rdma_destroy_rw_ctxts - Free accumulated R/W contexts
* @rdma: transport about to be destroyed
*
*/
void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
{
struct svc_rdma_rw_ctxt *ctxt;
while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) {
list_del(&ctxt->rw_list);
kfree(ctxt);
}
}
/**
* svc_rdma_rw_ctx_init - Prepare a R/W context for I/O
* @rdma: controlling transport instance
* @ctxt: R/W context to prepare
* @offset: RDMA offset
* @handle: RDMA tag/handle
* @direction: I/O direction
*
* Returns on success, the number of WQEs that will be needed
* on the workqueue, or a negative errno.
*/
static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt,
u64 offset, u32 handle,
enum dma_data_direction direction)
{
int ret;
ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num,