summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 11:32:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 11:32:23 -0700
commit8688d9540cc6e17df4cba71615e27f04e0378fe6 (patch)
tree45ab333822188966217f6a3ec7e8289ca7eced72 /net
parent320cd413faefe2d30f4ee9651efddec5141bc95b (diff)
parentb4839ebe21fc5d543b933d83644981ea73e9ba36 (diff)
Merge tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable patches: - Fix a crash in the NFSv4 file locking code. - Fix an fsync() regression, where we were failing to retry I/O in some circumstances. - Fix an infinite loop in NFSv4.0 OPEN stateid recovery - Fix a memory leak when an attempted pnfs fails. - Fix a memory leak in the backchannel code - Large hostnames were not supported correctly in NFSv4.1 - Fix a pNFS/flexfiles bug that was impeding error reporting on I/O. - Fix a couple of credential issues in pNFS/flexfiles Bugfixes + cleanups: - Open flag sanity checks in the NFSv4 atomic open codepath - More NFSv4 delegation related bugfixes - Various NFSv4.1 backchannel bugfixes and cleanups - Fix the NFS swap socket code - Various cleanups of the NFSv4 SETCLIENTID and EXCHANGE_ID code - Fix a UDP transport deadlock issue Features: - More RDMA client transport improvements - NFSv4.2 LAYOUTSTATS functionality for pnfs flexfiles" * tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (87 commits) nfs: Remove invalid tk_pid from debug message nfs: Remove invalid NFS_ATTR_FATTR_V4_REFERRAL checking in nfs4_get_rootfh nfs: Drop bad comment in nfs41_walk_client_list() nfs: Remove unneeded micro checking of CONFIG_PROC_FS nfs: Don't setting FILE_CREATED flags always nfs: Use remove_proc_subtree() instead remove_proc_entry() nfs: Remove unused argument in nfs_server_set_fsinfo() nfs: Fix a memory leak when meeting an unsupported state protect nfs: take extra reference to fl->fl_file when running a LOCKU operation NFSv4: When returning a delegation, don't reclaim an incompatible open mode. NFSv4.2: LAYOUTSTATS is optional to implement NFSv4.2: Fix up a decoding error in layoutstats pNFS/flexfiles: Fix the reset of struct pgio_header when resending pNFS/flexfiles: Turn off layoutcommit for servers that don't need it pnfs/flexfiles: protect ktime manipulation with mirror lock nfs: provide pnfs_report_layoutstat when NFS42 is disabled nfs: verify open flags before allowing open nfs: always update creds in mirror, even when we have an already connected ds nfs: fix potential credential leak in ff_layout_update_mirror_cred pnfs/flexfiles: report layoutstat regularly ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/backchannel_rqst.c134
-rw-r--r--net/sunrpc/bc_svc.c63
-rw-r--r--net/sunrpc/clnt.c109
-rw-r--r--net/sunrpc/debugfs.c78
-rw-r--r--net/sunrpc/svc.c36
-rw-r--r--net/sunrpc/xprt.c7
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c120
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c227
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c14
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/transport.c43
-rw-r--r--net/sunrpc/xprtrdma/verbs.c253
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h38
-rw-r--r--net/sunrpc/xprtsock.c151
15 files changed, 751 insertions, 532 deletions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 936ad0a15371..b512fbd9d79a 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -14,6 +14,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
-sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
+sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 9dd0ea8db463..9825ff0f91d6 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
{
- return xprt->bc_alloc_count > 0;
+ return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots);
}
static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
+ atomic_add(n, &xprt->bc_free_slots);
xprt->bc_alloc_count += n;
}
static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
+ atomic_sub(n, &xprt->bc_free_slots);
return xprt->bc_alloc_count -= n;
}
@@ -60,13 +62,62 @@ static void xprt_free_allocation(struct rpc_rqst *req)
dprintk("RPC: free allocations for req= %p\n", req);
WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
- xbufp = &req->rq_private_buf;
+ xbufp = &req->rq_rcv_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
xbufp = &req->rq_snd_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
kfree(req);
}
+static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
+{
+ struct page *page;
+ /* Preallocate one XDR receive buffer */
+ page = alloc_page(gfp_flags);
+ if (page == NULL)
+ return -ENOMEM;
+ buf->head[0].iov_base = page_address(page);
+ buf->head[0].iov_len = PAGE_SIZE;
+ buf->tail[0].iov_base = NULL;
+ buf->tail[0].iov_len = 0;
+ buf->page_len = 0;
+ buf->len = 0;
+ buf->buflen = PAGE_SIZE;
+ return 0;
+}
+
+static
+struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
+{
+ struct rpc_rqst *req;
+
+ /* Pre-allocate one backchannel rpc_rqst */
+ req = kzalloc(sizeof(*req), gfp_flags);
+ if (req == NULL)
+ return NULL;
+
+ req->rq_xprt = xprt;
+ INIT_LIST_HEAD(&req->rq_list);
+ INIT_LIST_HEAD(&req->rq_bc_list);
+
+ /* Preallocate one XDR receive buffer */
+ if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) {
+ printk(KERN_ERR "Failed to create bc receive xbuf\n");
+ goto out_free;
+ }
+ req->rq_rcv_buf.len = PAGE_SIZE;
+
+ /* Preallocate one XDR send buffer */
+ if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) {
+ printk(KERN_ERR "Failed to create bc snd xbuf\n");
+ goto out_free;
+ }
+ return req;
+out_free:
+ xprt_free_allocation(req);
+ return NULL;
+}
+
/*
* Preallocate up to min_reqs structures and related buffers for use
* by the backchannel. This function can be called multiple times
@@ -87,9 +138,7 @@ static void xprt_free_allocation(struct rpc_rqst *req)
*/
int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
{
- struct page *page_rcv = NULL, *page_snd = NULL;
- struct xdr_buf *xbufp = NULL;
- struct rpc_rqst *req, *tmp;
+ struct rpc_rqst *req;
struct list_head tmp_list;
int i;
@@ -106,7 +155,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
INIT_LIST_HEAD(&tmp_list);
for (i = 0; i < min_reqs; i++) {
/* Pre-allocate one backchannel rpc_rqst */
- req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+ req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
if (req == NULL) {
printk(KERN_ERR "Failed to create bc rpc_rqst\n");
goto out_free;
@@ -115,41 +164,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
/* Add the allocated buffer to the tmp list */
dprintk("RPC: adding req= %p\n", req);
list_add(&req->rq_bc_pa_list, &tmp_list);
-
- req->rq_xprt = xprt;
- INIT_LIST_HEAD(&req->rq_list);
- INIT_LIST_HEAD(&req->rq_bc_list);
-
- /* Preallocate one XDR receive buffer */
- page_rcv = alloc_page(GFP_KERNEL);
- if (page_rcv == NULL) {
- printk(KERN_ERR "Failed to create bc receive xbuf\n");
- goto out_free;
- }
- xbufp = &req->rq_rcv_buf;
- xbufp->head[0].iov_base = page_address(page_rcv);
- xbufp->head[0].iov_len = PAGE_SIZE;
- xbufp->tail[0].iov_base = NULL;
- xbufp->tail[0].iov_len = 0;
- xbufp->page_len = 0;
- xbufp->len = PAGE_SIZE;
- xbufp->buflen = PAGE_SIZE;
-
- /* Preallocate one XDR send buffer */
- page_snd = alloc_page(GFP_KERNEL);
- if (page_snd == NULL) {
- printk(KERN_ERR "Failed to create bc snd xbuf\n");
- goto out_free;
- }
-
- xbufp = &req->rq_snd_buf;
- xbufp->head[0].iov_base = page_address(page_snd);
- xbufp->head[0].iov_len = 0;
- xbufp->tail[0].iov_base = NULL;
- xbufp->tail[0].iov_len = 0;
- xbufp->page_len = 0;
- xbufp->len = 0;
- xbufp->buflen = PAGE_SIZE;
}
/*
@@ -167,7 +181,10 @@ out_free:
/*
* Memory allocation failed, free the temporary list
*/
- list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) {
+ while (!list_empty(&tmp_list)) {
+ req = list_first_entry(&tmp_list,
+ struct rpc_rqst,
+ rq_bc_pa_list);
list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
}
@@ -217,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
struct rpc_rqst *req = NULL;
dprintk("RPC: allocate a backchannel request\n");
- if (list_empty(&xprt->bc_pa_list))
+ if (atomic_read(&xprt->bc_free_slots) <= 0)
goto not_found;
-
+ if (list_empty(&xprt->bc_pa_list)) {
+ req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
+ if (!req)
+ goto not_found;
+ /* Note: this 'free' request adds it to xprt->bc_pa_list */
+ xprt_free_bc_request(req);
+ }
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
req->rq_reply_bytes_recvd = 0;
@@ -245,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req)
req->rq_connect_cookie = xprt->connect_cookie - 1;
smp_mb__before_atomic();
- WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
smp_mb__after_atomic();
- if (!xprt_need_to_requeue(xprt)) {
+ /*
+ * Return it to the list of preallocations so that it
+ * may be reused by a new callback request.
+ */
+ spin_lock_bh(&xprt->bc_pa_lock);
+ if (xprt_need_to_requeue(xprt)) {
+ list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+ xprt->bc_alloc_count++;
+ req = NULL;
+ }
+ spin_unlock_bh(&xprt->bc_pa_lock);
+ if (req != NULL) {
/*
* The last remaining session was destroyed while this
* entry was in use. Free the entry and don't attempt
@@ -260,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req)
xprt_free_allocation(req);
return;
}
-
- /*
- * Return it to the list of preallocations so that it
- * may be reused by a new callback request.
- */
- spin_lock_bh(&xprt->bc_pa_lock);
- list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
}
/*
@@ -311,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
spin_lock(&xprt->bc_pa_lock);
list_del(&req->rq_bc_pa_list);
+ xprt->bc_alloc_count--;
spin_unlock(&xprt->bc_pa_lock);
req->rq_private_buf.len = copied;
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
deleted file mode 100644
index 15c7a8a1c24f..000000000000
--- a/net/sunrpc/bc_svc.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/******************************************************************************
-
-(c) 2007 Network Appliance, Inc. All Rights Reserved.
-(c) 2009 NetApp. All Rights Reserved.
-
-NetApp provides this source code under the GPL v2 License.
-The GPL v2 license is available at
-http://opensource.org/licenses/gpl-license.php.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-******************************************************************************/
-
-/*
- * The NFSv4.1 callback service helper routines.
- * They implement the transport level processing required to send the
- * reply over an existing open connection previously established by the client.
- */
-
-#include <linux/module.h>
-
-#include <linux/sunrpc/xprt.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/sunrpc/bc_xprt.h>
-
-#define RPCDBG_FACILITY RPCDBG_SVCDSP
-
-/* Empty callback ops */
-static const struct rpc_call_ops nfs41_callback_ops = {
-};
-
-
-/*
- * Send the callback reply
- */
-int bc_send(struct rpc_rqst *req)
-{
- struct rpc_task *task;
- int ret;
-
- dprintk("RPC: bc_send req= %p\n", req);
- task = rpc_run_bc_task(req, &nfs41_callback_ops);
- if (IS_ERR(task))
- ret = PTR_ERR(task);
- else {
- WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
- ret = task->tk_status;
- rpc_put_task(task);
- }
- dprintk("RPC: bc_send ret= %d\n", ret);
- return ret;
-}
-
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e6ce1517367f..cbc6af923dd1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
task->tk_flags |= RPC_TASK_SOFT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
- if (sk_memalloc_socks()) {
- struct rpc_xprt *xprt;
-
- rcu_read_lock();
- xprt = rcu_dereference(clnt->cl_xprt);
- if (xprt->swapper)
- task->tk_flags |= RPC_TASK_SWAPPER;
- rcu_read_unlock();
- }
+ if (atomic_read(&clnt->cl_swapper))
+ task->tk_flags |= RPC_TASK_SWAPPER;
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
@@ -1031,15 +1024,14 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
* @req: RPC request
- * @tk_ops: RPC call ops
*/
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
- const struct rpc_call_ops *tk_ops)
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
{
struct rpc_task *task;
struct xdr_buf *xbufp = &req->rq_snd_buf;
struct rpc_task_setup task_setup_data = {
- .callback_ops = tk_ops,
+ .callback_ops = &rpc_default_ops,
+ .flags = RPC_TASK_SOFTCONN,
};
dprintk("RPC: rpc_run_bc_task req= %p\n", req);
@@ -1614,6 +1606,7 @@ call_allocate(struct rpc_task *task)
req->rq_callsize + req->rq_rcvsize);
if (req->rq_buffer != NULL)
return;
+ xprt_inject_disconnect(xprt);
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
@@ -1951,33 +1944,36 @@ call_bc_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- if (!xprt_prepare_transmit(task)) {
- /*
- * Could not reserve the transport. Try again after the
- * transport is released.
- */
- task->tk_status = 0;
- task->tk_action = call_bc_transmit;
- return;
- }
+ if (!xprt_prepare_transmit(task))
+ goto out_retry;
- task->tk_action = rpc_exit_task;
if (task->tk_status < 0) {
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
- return;
+ goto out_done;
}
+ if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
+ req->rq_bytes_sent = 0;
xprt_transmit(task);
+
+ if (task->tk_status == -EAGAIN)
+ goto out_nospace;
+
xprt_end_transmit(task);
dprint_status(task);
switch (task->tk_status) {
case 0:
/* Success */
- break;
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
+ case -ECONNRESET:
+ case -ECONNREFUSED:
+ case -EADDRINUSE:
+ case -ENOTCONN:
+ case -EPIPE:
+ break;
case -ETIMEDOUT:
/*
* Problem reaching the server. Disconnect and let the
@@ -2002,6 +1998,13 @@ call_bc_transmit(struct rpc_task *task)
break;
}
rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
+out_done:
+ task->tk_action = rpc_exit_task;
+ return;
+out_nospace:
+ req->rq_connect_cookie = req->rq_xprt->connect_cookie;
+out_retry:
+ task->tk_status = 0;
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -2476,3 +2479,59 @@ void rpc_show_tasks(struct net *net)
spin_unlock(&sn->rpc_client_lock);
}
#endif
+
+#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
+int
+rpc_clnt_swap_activate(struct rpc_clnt *clnt)
+{
+ int ret = 0;
+ struct rpc_xprt *xprt;
+
+ if (atomic_inc_return(&clnt->cl_swapper) == 1) {
+retry:
+ rcu_read_lock();
+ xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ rcu_read_unlock();
+ if (!xprt) {
+ /*
+ * If we didn't get a reference, then we likely are
+ * racing with a migration event. Wait for a grace
+ * period and try again.
+ */
+ synchronize_rcu();
+ goto retry;
+ }
+
+ ret = xprt_enable_swap(xprt);
+ xprt_put(xprt);
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
+
+void
+rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
+{
+ struct rpc_xprt *xprt;
+
+ if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) {
+retry:
+ rcu_read_lock();
+ xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ rcu_read_unlock();
+ if (!xprt) {
+ /*
+ * If we didn't get a reference, then we likely are
+ * racing with a migration event. Wait for a grace
+ * period and try again.
+ */
+ synchronize_rcu();
+ goto retry;
+ }
+
+ xprt_disable_swap(xprt);
+ xprt_put(xprt);
+ }
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
+#endif /* CONFIG_SUNRPC_SWAP */
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 82962f7e6e88..e7b4d93566df 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -10,9 +10,12 @@
#include "netns.h"
static struct dentry *topdir;
+static struct dentry *rpc_fault_dir;
static struct dentry *rpc_clnt_dir;
static struct dentry *rpc_xprt_dir;
+unsigned int rpc_inject_disconnect;
+
struct rpc_clnt_iter {
struct rpc_clnt *clnt;
loff_t pos;
@@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
debugfs_remove_recursive(xprt->debugfs);
xprt->debugfs = NULL;
}
+
+ atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
}
void
@@ -266,11 +271,79 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
xprt->debugfs = NULL;
}
+static int
+fault_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = kmalloc(128, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+ return 0;
+}
+
+static int
+fault_release(struct inode *inode, struct file *filp)
+{
+ kfree(filp->private_data);
+ return 0;
+}
+
+static ssize_t
+fault_disconnect_read(struct file *filp, char __user *user_buf,
+ size_t len, loff_t *offset)
+{
+ char *buffer = (char *)filp->private_data;
+ size_t size;
+
+ size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
+ return simple_read_from_buffer(user_buf, len, offset, buffer, size);
+}
+
+static ssize_t
+fault_disconnect_write(struct file *filp, const char __user *user_buf,
+ size_t len, loff_t *offset)
+{
+ char buffer[16];
+
+ if (len >= sizeof(buffer))
+ len = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, user_buf, len))
+ return -EFAULT;
+ buffer[len] = '\0';
+ if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
+ return -EINVAL;
+ return len;
+}
+
+static const struct file_operations fault_disconnect_fops = {
+ .owner = THIS_MODULE,
+ .open = fault_open,
+ .read = fault_disconnect_read,
+ .write = fault_disconnect_write,
+ .release = fault_release,
+};
+
+static struct dentry *
+inject_fault_dir(struct dentry *topdir)
+{
+ struct dentry *faultdir;
+
+ faultdir = debugfs_create_dir("inject_fault", topdir);
+ if (!faultdir)
+ return NULL;
+
+ if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
+ NULL, &fault_disconnect_fops))
+ return NULL;
+
+ return faultdir;
+}
+
void __exit
sunrpc_debugfs_exit(void)
{
debugfs_remove_recursive(topdir);
topdir = NULL;
+ rpc_fault_dir = NULL;
rpc_clnt_dir = NULL;
rpc_xprt_dir = NULL;
}
@@ -282,6 +355,10 @@ sunrpc_debugfs_init(void)
if (!topdir)
return;
+ rpc_fault_dir = inject_fault_dir(topdir);
+ if (!rpc_fault_dir)
+ goto out_remove;
+
rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
if (!rpc_clnt_dir)
goto out_remove;
@@ -294,5 +371,6 @@ sunrpc_debugfs_init(void)
out_remove:
debugfs_remove_recursive(topdir);
topdir = NULL;
+ rpc_fault_dir = NULL;
rpc_clnt_dir = NULL;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 852ae606b02a..5a16d8d8c831 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
+ struct rpc_task *task;
+ int proc_error;
+ int error;
+
+ dprintk("svc: %s(%p)\n", __func__, req);
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xprt = serv->sv_bc_xprt;
@@ -1372,21 +1377,36 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
/*
* Skip the next two words because they've already been
- * processed in the trasport
+ * processed in the transport
*/
svc_getu32(argv); /* XID */
svc_getnl(argv); /* CALLDIR */
- /* Returns 1 for send, 0 for drop */
- if (svc_process_common(rqstp, argv, resv)) {
- memcpy(&req->rq_snd_buf, &rqstp->rq_res,
- sizeof(req->rq_snd_buf));
- return bc_send(req);
- } else {
- /* drop request */
+ /* Parse and execute the bc call */
+ proc_error = svc_process_common(rqstp, argv, resv);
+
+ atomic_inc(&req->rq_xprt->bc_free_slots);
+ if (!proc_error) {
+ /* Processing error: drop the request */
xprt_free_bc_request(req);
return 0;
}
+
+ /* Finally, send the reply synchronously */
+ memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
+ task = rpc_run_bc_task(req);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+ goto out;
+ }
+
+ WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
+ error = task->tk_status;
+ rpc_put_task(task);
+
+out:
+ dprintk("svc: %s(), error=%d\n", __func__, error);
+ return error;
}
EXPORT_SYMBOL_GPL(bc_svc_process);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1d4fe24af06a..ab5dd621ae0c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -68,6 +68,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
@@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
}
xprt_clear_locked(xprt);
out_sleep:
+ if (req)
+ __xprt_put_cong(xprt, req);
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
@@ -608,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work)
struct rpc_xprt *xprt =
container_of(work, struct rpc_xprt, task_cleanup);
- xprt->ops->close(xprt);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
}
@@ -967,6 +970,7 @@ void xprt_transmit(struct rpc_task *task)
task->tk_status = status;
return;
}
+ xprt_inject_disconnect(xprt);
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
task->tk_flags |= RPC_TASK_SENT;
@@ -1285,6 +1289,7 @@ void xprt_release(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(req->rq_buffer);
+ xprt_inject_disconnect(xprt);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 302d4ebf6fbf..f1e8dafbd507 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -11,6 +11,21 @@
* can take tens of usecs to complete.
*/
+/* Normal operation
+ *
+ * A Memory Region is prepared for RDMA READ or WRITE using the
+ * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
+ * finished, the Memory Region is unmapped using the ib_unmap_fmr
+ * verb (fmr_op_unmap).
+ */
+
+/* Transport recovery
+ *
+ * After a transport reconnect, fmr_op_map re-uses the MR already
+ * allocated for the RPC, but generates a fresh rkey then maps the
+ * MR again. This process is synchronous.
+ */
+
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -50,19 +65,28 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_mw *r;
int i, rc;
+ spin_lock_init(&buf->rb_mwlock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
- i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
- dprintk("RPC: %s: initializing %d FMRs\n", __func__, i);
+ i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
+ i += 2; /* head + tail */
+ i *= buf->rb_max_requests; /* one set for each RPC slot */
+ dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
+ rc = -ENOMEM;
while (i--) {
r = kzalloc(sizeof(*r), GFP_KERNEL);
if (!r)
- return -ENOMEM;
+ goto out;
- r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
- if (IS_ERR(r->r.fmr))
+ r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
+ sizeof(u64), GFP_KERNEL);
+ if (!r->r.fmr.physaddrs)
+ goto out_free;
+
+ r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
+ if (IS_ERR(r->r.fmr.fmr))
goto out_fmr_err;
list_add(&r->mw_list, &buf->rb_mws);
@@ -71,12 +95,24 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
return 0;
out_fmr_err:
- rc = PTR_ERR(r->r.fmr);
+ rc = PTR_ERR(r->r.fmr.fmr);
dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc);
+ kfree(r->r.fmr.physaddrs);
+out_free:
kfree(r);
+out:
return rc;
}
+static int
+__fmr_unmap(struct rpcrdma_mw *r)
+{
+ LIST_HEAD(l);
+
+ list_add(&r->r.fmr.fmr->list, &l);
+ return ib_unmap_fmr(&l);
+}
+
/* Use the ib_map_phys_fmr() verb to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
*/
@@ -85,12 +121,24 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct ib_device *device = ia->ri_id->device;
+ struct ib_device *device = ia->ri_device;
enum dma_data_direction direction = rpcrdma_data_dir(writing);
struct rpcrdma_mr_seg *seg1 = seg;
- struct rpcrdma_mw *mw = seg1->rl_mw;
- u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
int len, pageoff, i, rc;
+ struct rpcrdma_mw *mw;
+
+ mw = seg1->rl_mw;
+ seg1->rl_mw = NULL;
+ if (!mw) {
+ mw = rpcrdma_get_mw(r_xprt);
+ if (!mw)
+ return -ENOMEM;
+ } else {
+ /* this is a retransmit; generate a fresh rkey */
+ rc = __fmr_unmap(mw);
+ if (rc)
+ return rc;
+ }
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
@@ -100,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) {
rpcrdma_map_one(device, seg, direction);
- physaddrs[i] = seg->mr_dma;
+ mw->r.fmr.physaddrs[i] = seg->mr_dma;
len += seg->mr_len;
++seg;
++i;
@@ -110,11 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
break;
}
- rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
+ rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs,
+ i, seg1->mr_dma);
if (rc)
goto out_maperr;
- seg1->mr_rkey = mw->r.fmr->rkey;
+ seg1->rl_mw = mw;
+ seg1->mr_rkey = mw->r.fmr.fmr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = i;
seg1->mr_len = len;
@@ -137,48 +187,28 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mr_seg *seg1 = seg;
- struct ib_device *device;
+ struct rpcrdma_mw *mw = seg1->rl_mw;
int rc, nsegs = seg->mr_nsegs;
- LIST_HEAD(l);
- list_add(&seg1->rl_mw->r.fmr->list, &l);
- rc = ib_unmap_fmr(&l);
- read_lock(&ia->ri_qplock);
- device = ia->ri_id->device;
+ dprintk("RPC: %s: FMR %p\n", __func__, mw);
+
+ seg1->rl_mw = NULL;
while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(device, seg++);
- read_unlock(&ia->ri_qplock);
+ rpcrdma_unmap_one(ia->ri_device, seg++);
+ rc = __fmr_unmap(mw);
if (rc)
goto out_err;
+ rpcrdma_put_mw(r_xprt, mw);
return nsegs;
out_err:
+ /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
+ * will attempt to release it when the transport is destroyed.
+ */
dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc);
return nsegs;
}
-/* After a disconnect, unmap all FMRs.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_fmr_external().
- */
-static void
-fmr_op_reset(struct rpcrdma_xprt *r_xprt)
-{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct rpcrdma_mw *r;
- LIST_HEAD(list);
- int rc;
-
- list_for_each_entry(r, &buf->rb_all, mw_all)
- list_add(&r->r.fmr->list, &list);
-
- rc = ib_unmap_fmr(&list);
- if (rc)
- dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
- __func__, rc);
-}
-
static void
fmr_op_destroy(struct rpcrdma_buffer *buf)
{
@@ -188,10 +218,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
while (!list_empty(&buf->rb_all)) {
r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
list_del(&r->mw_all);
- rc = ib_dealloc_fmr(r->r.fmr);
+ kfree(r->r.fmr.physaddrs);
+
+ rc = ib_dealloc_fmr(r->r.fmr.fmr);
if (rc)
dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
__func__, rc);
+
kfree(r);
}
}
@@ -202,7 +235,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
.ro_init = fmr_op_init,
- .ro_reset = fmr_op_reset,
.ro_destroy = fmr_op_destroy,
.ro_displayname = "fmr",
};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index d234521320a4..04ea914201b2 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -11,12 +11,136 @@
* but most complex memory registration mode.
*/