From 5b833825fd0012aec00f8fa5769d5400c18d59d8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 11 Mar 2015 17:56:23 +0100 Subject: NFSv4.1: don't export static symbol The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ type T; identifier f; @@ static T f (...) { ... } @@ identifier r.f; declarer name EXPORT_SYMBOL_GPL; @@ -EXPORT_SYMBOL_GPL(f); // Signed-off-by: Julia Lawall Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4f802b02fbb9..0b2bca9ae281 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1902,7 +1902,6 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) pnfs_put_lseg(hdr->lseg); nfs_pgio_header_free(hdr); } -EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) @@ -2032,7 +2031,6 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) pnfs_put_lseg(hdr->lseg); nfs_pgio_header_free(hdr); } -EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) -- cgit v1.2.3 From df137bc125b6155e54e6725faf0462b791a34c31 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Mar 2015 14:37:25 +0100 Subject: nfs: do not export discarded symbols The function nfs4_pnfs_v3_ds_connect_unload is exported so it can be used by other modules, but is also marked '__exit' and will be discarded when built into the kernel, as pointed out by this linker error: `nfs4_pnfs_v3_ds_connect_unload' referenced in section `___ksymtab_gpl+nfs4_pnfs_v3_ds_connect_unload' of fs/built-in.o: defined in discarded section `.exit.text' of fs/built-in.o This removes the __exit annotation to make it safe to call this function. Signed-off-by: Arnd Bergmann Fixes: 5f01d9539496 ("nfs41: create NFSv3 DS connection if specified") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 54e36b38fb5f..8582dae5ae99 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -561,7 +561,7 @@ static bool load_v3_ds_connect(void) return(get_v3_ds_connect != NULL); } -void __exit nfs4_pnfs_v3_ds_connect_unload(void) +void nfs4_pnfs_v3_ds_connect_unload(void) { if (get_v3_ds_connect) { symbol_put(nfs3_set_ds_client); -- cgit v1.2.3 From 09a330f4b9324e40947cc4fff13606719382c580 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 5 Dec 2014 21:52:49 -0500 Subject: NFS: remount with security change should return EINVAL A remount that alters security flavors can appear to succeed when it should instead return -EINVAL. Check to see if the current security flavor exists within the flavors specified in the remount options, and if not fail the remount. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 322b2de02988..54a079a465e1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2193,7 +2193,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, data->version != nfss->nfs_client->rpc_ops->version || data->minorversion != nfss->nfs_client->cl_minorversion || data->retrans != nfss->client->cl_timeout->to_retries || - data->selected_flavor != nfss->client->cl_auth->au_flavor || + !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) || data->acregmin != nfss->acregmin / HZ || data->acregmax != nfss->acregmax / HZ || data->acdirmin != nfss->acdirmin / HZ || @@ -2241,7 +2241,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->wsize = nfss->wsize; data->retrans = nfss->client->cl_timeout->to_retries; data->selected_flavor = nfss->client->cl_auth->au_flavor; - data->auth_info = nfss->auth_info; data->acregmin = nfss->acregmin / HZ; data->acregmax = nfss->acregmax / HZ; data->acdirmin = nfss->acdirmin / HZ; -- cgit v1.2.3 From f0eede10fd401a1c5193e020cd372d7b7014d9f3 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Tue, 3 Mar 2015 04:24:25 -0500 Subject: SUNRPC: use jiffies_to_msecs for converting jiffies Use jiffies_to_msecs for converting jiffies as it handles all of the corner cases reliably and also helps readability. Signed-off-by: Nicholas Mc Guire Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index b91fd9c597b4..c234e7ff2bf0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -90,7 +90,7 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) return; dprintk("RPC: %5u setting alarm for %lu ms\n", - task->tk_pid, task->tk_timeout * 1000 / HZ); + task->tk_pid, jiffies_to_msecs(task->tk_timeout)); task->u.tk_wait.expires = jiffies + task->tk_timeout; if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) -- cgit v1.2.3 From 38942ba204639030b3f3cfe5748a6aff3c9dcc81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Mar 2015 15:59:05 -0500 Subject: NFSv4: Append delegations to the per-client list instead of prepending Do so on the assumption that for most use cases, that list will turn into a more or less LRU-ordered list, and so the list traversals in nfs_client_return_marked_delegations() are likely to be shorter before hitting a candidate to return. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a6ad68865880..540ee260318c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -378,7 +378,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct if (freeme == NULL) goto out; } - list_add_rcu(&delegation->super_list, &server->delegations); + list_add_tail_rcu(&delegation->super_list, &server->delegations); rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; -- cgit v1.2.3 From 55cc1d7806cd4827d38a18ddf3bcf04706758218 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 13 Mar 2015 04:25:34 -0400 Subject: SUNRPC: fix build-warning due to format missmatch fix build-warning introduced by commit: f0eede10fd4 ("SUNRPC: use jiffies_to_msecs for converting jiffies") which did not fixup the format properly (my bad). Signed-off-by: Nicholas Mc Guire Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c234e7ff2bf0..337ca851a350 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -89,8 +89,8 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) if (!task->tk_timeout) return; - dprintk("RPC: %5u setting alarm for %lu ms\n", - task->tk_pid, jiffies_to_msecs(task->tk_timeout)); + dprintk("RPC: %5u setting alarm for %u ms\n", + task->tk_pid, jiffies_to_msecs(task->tk_timeout)); task->u.tk_wait.expires = jiffies + task->tk_timeout; if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) -- cgit v1.2.3 From 2854475f6c612d59901d51c358abd05643278b53 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 21 Jan 2015 00:20:11 +0800 Subject: nfs: clean up nfs_direct_IO This follows up "nfs: fix dio deadlock when O_DIRECT flag is flipped" and removes the unnecessary CONFIG_NFS_SWAP switch. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e907c8cf732e..51f617e45fd1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -259,18 +259,11 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t if (!IS_SWAPFILE(inode)) return 0; -#ifndef CONFIG_NFS_SWAP - dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", - iocb->ki_filp, (long long) pos, iter->nr_segs); - - return -EINVAL; -#else VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); if (rw == READ) return nfs_file_direct_read(iocb, iter, pos); return nfs_file_direct_write(iocb, iter, pos); -#endif /* CONFIG_NFS_SWAP */ } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) -- cgit v1.2.3 From 0695314ef0920b745423510f1e96bf60415a404a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 23 Mar 2015 16:10:00 -0400 Subject: SUNRPC: Fix a regression when reconnecting If the task needs to give up the socket lock in order to allow a reconnect to occur, then it must also clear the 'rq_bytes_sent' field so that when it retransmits, it knows to start from the beginning. Fixes: 718ba5b87343 ("SUNRPC: Add helpers to prevent socket create from racing") Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e3015aede0d9..ed4aa786c240 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -326,6 +326,15 @@ out_unlock: xprt_clear_locked(xprt); } +static void xprt_task_clear_bytes_sent(struct rpc_task *task) +{ + if (task != NULL) { + struct rpc_rqst *req = task->tk_rqstp; + if (req != NULL) + req->rq_bytes_sent = 0; + } +} + /** * xprt_release_xprt - allow other requests to use a transport * @xprt: transport with other tasks potentially waiting @@ -336,11 +345,7 @@ out_unlock: void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { - if (task != NULL) { - struct rpc_rqst *req = task->tk_rqstp; - if (req != NULL) - req->rq_bytes_sent = 0; - } + xprt_task_clear_bytes_sent(task); xprt_clear_locked(xprt); __xprt_lock_write_next(xprt); } @@ -358,11 +363,7 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { - if (task != NULL) { - struct rpc_rqst *req = task->tk_rqstp; - if (req != NULL) - req->rq_bytes_sent = 0; - } + xprt_task_clear_bytes_sent(task); xprt_clear_locked(xprt); __xprt_lock_write_next_cong(xprt); } @@ -700,6 +701,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt, goto out; if (xprt->snd_task != task) goto out; + xprt_task_clear_bytes_sent(task); xprt->snd_task = cookie; ret = true; out: -- cgit v1.2.3 From 5fcdfacc01f3a3415b75acb172cd75c9fe1cbda5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 13:19:42 -0400 Subject: NFSv4: Return delegations synchronously in evict_inode Kinglong Mee reports that asynchronous delegations are being killed by the call to rpc_shutdown_client() when unmounting. This can lead to state leakage on the server until the client lease expires. Reported-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a6ad68865880..08c624448750 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -514,7 +514,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) delegation = nfs_inode_detach_delegation(inode); if (delegation != NULL) - nfs_do_return_delegation(inode, delegation, 0); + nfs_do_return_delegation(inode, delegation, 1); } /** -- cgit v1.2.3 From 84a80f62f71beac20a426709c04b49f2bd352291 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Mar 2015 15:23:35 -0400 Subject: NFSv4.1: Convert pNFS deviceid to use kfree_rcu() Use of synchronize_rcu() when unmounting and potentially freeing a lot of deviceids is problematic. There really is no reason why we can't just use kfree_rcu() here. Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/dev.c | 2 +- fs/nfs/filelayout/filelayoutdev.c | 2 +- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- fs/nfs/objlayout/objio_osd.c | 2 +- fs/nfs/pnfs.h | 1 + fs/nfs/pnfs_dev.c | 6 ++---- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 5aed4f98df41..e535599a0719 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -33,7 +33,7 @@ bl_free_deviceid_node(struct nfs4_deviceid_node *d) container_of(d, struct pnfs_block_dev, node); bl_free_device(dev); - kfree(dev); + kfree_rcu(dev, node.rcu); } static int diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4f372e224603..4946ef40ba87 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -55,7 +55,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) nfs4_pnfs_ds_put(ds); } kfree(dsaddr->stripe_indices); - kfree(dsaddr); + kfree_rcu(dsaddr, id_node.rcu); } /* Decode opaque device data and return the result */ diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e2c01f204a95..77a2d026aa12 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -30,7 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { nfs4_print_deviceid(&mirror_ds->id_node.deviceid); nfs4_pnfs_ds_put(mirror_ds->ds); - kfree(mirror_ds); + kfree_rcu(mirror_ds, id_node.rcu); } /* Decode opaque device data and construct new_ds using it */ diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 24e1d7403c0b..8b5e0e687d5e 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -57,7 +57,7 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) dprintk("%s: free od=%p\n", __func__, de->od.od); osduld_put_device(de->od.od); - kfree(de); + kfree_rcu(d, rcu); } struct objio_segment { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 635f0865671c..a1fc16c971a7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -302,6 +302,7 @@ struct nfs4_deviceid_node { unsigned long flags; unsigned long timestamp_unavailable; struct nfs4_deviceid deviceid; + struct rcu_head rcu; atomic_t ref; }; diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index aa2ec0015183..bf23ac97d57d 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -175,8 +175,8 @@ __nfs4_find_get_deviceid(struct nfs_server *server, rcu_read_lock(); d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, hash); - if (d != NULL) - atomic_inc(&d->ref); + if (d != NULL && !atomic_inc_not_zero(&d->ref)) + d = NULL; rcu_read_unlock(); return d; } @@ -236,7 +236,6 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, } hlist_del_init_rcu(&d->node); spin_unlock(&nfs4_deviceid_lock); - synchronize_rcu(); /* balance the initial ref set in pnfs_insert_deviceid */ if (atomic_dec_and_test(&d->ref)) @@ -321,7 +320,6 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash) if (hlist_empty(&tmp)) return; - synchronize_rcu(); while (!hlist_empty(&tmp)) { d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); hlist_del(&d->tmpnode); -- cgit v1.2.3 From fb1458f4578c8dc78dc124de2a58950a6d4e3492 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Mar 2015 15:37:00 -0400 Subject: NFSv4.1: Cleanup - don't opencode nfs4_put_deviceid_node() There really is no reason to do so. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index bf23ac97d57d..7e07f4ba4822 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -238,8 +238,7 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, spin_unlock(&nfs4_deviceid_lock); /* balance the initial ref set in pnfs_insert_deviceid */ - if (atomic_dec_and_test(&d->ref)) - d->ld->free_deviceid_node(d); + nfs4_put_deviceid_node(d); } EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); @@ -323,8 +322,7 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash) while (!hlist_empty(&tmp)) { d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); hlist_del(&d->tmpnode); - if (atomic_dec_and_test(&d->ref)) - d->ld->free_deviceid_node(d); + nfs4_put_deviceid_node(d); } } -- cgit v1.2.3 From 4e59080397faadee59d39ffa2116dc8607adc9c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Mar 2015 14:01:25 -0400 Subject: NFSv4.1: Allow getdeviceinfo to return notification info back to caller We are only allowed to cache deviceinfo if the server supports notifications and actually promises to call us back when changes occur. Right now, we request those notifications, but then we don't check the server's reply. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++++ fs/nfs/nfs4xdr.c | 14 +++++--------- include/linux/nfs_xdr.h | 2 ++ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 627f37c44456..ba8b2b5e98a1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7944,6 +7944,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, { struct nfs4_getdeviceinfo_args args = { .pdev = pdev, + .notify_types = NOTIFY_DEVICEID4_CHANGE | + NOTIFY_DEVICEID4_DELETE, }; struct nfs4_getdeviceinfo_res res = { .pdev = pdev, @@ -7958,6 +7960,9 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, dprintk("--> %s\n", __func__); status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + if (res.notification & ~args.notify_types) + dprintk("%s: unsupported notification\n", __func__); + dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5c399ec41079..6b28a605c697 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1920,7 +1920,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr, p = reserve_space(xdr, 4 + 4); *p++ = cpu_to_be32(1); /* bitmap length */ - *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE); + *p++ = cpu_to_be32(args->notify_types); } static void @@ -5753,8 +5753,9 @@ out_overflow: #if defined(CONFIG_NFS_V4_1) static int decode_getdeviceinfo(struct xdr_stream *xdr, - struct pnfs_device *pdev) + struct nfs4_getdeviceinfo_res *res) { + struct pnfs_device *pdev = res->pdev; __be32 *p; uint32_t len, type; int status; @@ -5802,12 +5803,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, if (unlikely(!p)) goto out_overflow; - if (be32_to_cpup(p++) & - ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) { - dprintk("%s: unsupported notification\n", - __func__); - } - + res->notification = be32_to_cpup(p++); for (i = 1; i < len; i++) { if (be32_to_cpup(p++)) { dprintk("%s: unsupported notification\n", @@ -7061,7 +7057,7 @@ static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, status = decode_sequence(xdr, &res->seq_res, rqstp); if (status != 0) goto out; - status = decode_getdeviceinfo(xdr, res->pdev); + status = decode_getdeviceinfo(xdr, res); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4cb3eaa89cf7..3d88908fd140 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -255,11 +255,13 @@ struct nfs4_layoutget { struct nfs4_getdeviceinfo_args { struct nfs4_sequence_args seq_args; struct pnfs_device *pdev; + __u32 notify_types; }; struct nfs4_getdeviceinfo_res { struct nfs4_sequence_res seq_res; struct pnfs_device *pdev; + __u32 notification; }; struct nfs4_layoutcommit_args { -- cgit v1.2.3 From df52699e4fcefe30ebe4f1db48bd161254a0b102 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Mar 2015 14:48:32 -0400 Subject: NFSv4.1: Don't cache deviceids that have no notifications The spec says that once all layouts that reference a given deviceid have been returned, then we are only allowed to continue to cache the deviceid if the metadata server supports notifications. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/pnfs.h | 2 ++ fs/nfs/pnfs_dev.c | 9 +++++++++ 3 files changed, 13 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ba8b2b5e98a1..9ff8c63c9cac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7962,6 +7962,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (res.notification & ~args.notify_types) dprintk("%s: unsupported notification\n", __func__); + if (res.notification != args.notify_types) + pdev->nocache = 1; dprintk("<-- %s status=%d\n", __func__, status); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a1fc16c971a7..b5654e8da936 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -203,6 +203,7 @@ struct pnfs_device { struct page **pages; unsigned int pgbase; unsigned int pglen; /* reply buffer length */ + unsigned char nocache : 1;/* May not be cached */ }; #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 @@ -291,6 +292,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ + NFS_DEVICEID_NOCACHE, /* device may not be cached */ }; /* pnfs_dev.c */ diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 7e07f4ba4822..2961fcd7a2df 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -149,6 +149,8 @@ nfs4_get_device_info(struct nfs_server *server, */ d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, gfp_flags); + if (d && pdev->nocache) + set_bit(NFS_DEVICEID_NOCACHE, &d->flags); out_free_pages: for (i = 0; i < max_pages; i++) @@ -235,6 +237,7 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, return; } hlist_del_init_rcu(&d->node); + clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); spin_unlock(&nfs4_deviceid_lock); /* balance the initial ref set in pnfs_insert_deviceid */ @@ -269,6 +272,11 @@ EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) { + if (test_bit(NFS_DEVICEID_NOCACHE, &d->flags)) { + if (atomic_add_unless(&d->ref, -1, 2)) + return false; + nfs4_delete_deviceid(d->ld, d->nfs_client, &d->deviceid); + } if (!atomic_dec_and_test(&d->ref)) return false; d->ld->free_deviceid_node(d); @@ -312,6 +320,7 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash) if (d->nfs_client == clp && atomic_read(&d->ref)) { hlist_del_init_rcu(&d->node); hlist_add_head(&d->tmpnode, &tmp); + clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); } rcu_read_unlock(); spin_unlock(&nfs4_deviceid_lock); -- cgit v1.2.3 From fc87701b918c05c2d78f8191f5bc3c6c178bdb3d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Mar 2015 17:25:14 -0400 Subject: NFS: Fix free_deveiceid -> free_deviceid Make it easier to grep for these functions by name. Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 4 ++-- fs/nfs/flexfilelayout/flexfilelayout.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 91e88a7ecef0..5639a2ef671a 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1086,7 +1086,7 @@ filelayout_alloc_deviceid_node(struct nfs_server *server, } static void -filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) +filelayout_free_deviceid_node(struct nfs4_deviceid_node *d) { nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); } @@ -1137,7 +1137,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, .alloc_deviceid_node = filelayout_alloc_deviceid_node, - .free_deviceid_node = filelayout_free_deveiceid_node, + .free_deviceid_node = filelayout_free_deviceid_node, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 315cc68945b9..edfb27ea4ce2 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1414,7 +1414,7 @@ ff_layout_get_ds_info(struct inode *inode) } static void -ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d) +ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) { nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, id_node)); @@ -1498,7 +1498,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .pg_read_ops = &ff_layout_pg_read_ops, .pg_write_ops = &ff_layout_pg_write_ops, .get_ds_info = ff_layout_get_ds_info, - .free_deviceid_node = ff_layout_free_deveiceid_node, + .free_deviceid_node = ff_layout_free_deviceid_node, .mark_request_commit = pnfs_layout_mark_request_commit, .clear_request_commit = pnfs_generic_clear_request_commit, .scan_commit_lists = pnfs_generic_scan_commit_lists, -- cgit v1.2.3 From 81b79afb504870a31ee1f764e81a202685cde795 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 19:09:08 -0400 Subject: NFSv4: Allow tracing of NFSv4 fsync calls I appear to have missed this when adding the ftrace probes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 5 +++++ fs/nfs/nfstrace.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8b46389c4c5b..0de62f7cfebf 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -10,6 +10,8 @@ #include "fscache.h" #include "pnfs.h" +#include "nfstrace.h" + #ifdef CONFIG_NFS_V4_2 #include "nfs42.h" #endif @@ -100,6 +102,8 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ret; struct inode *inode = file_inode(file); + trace_nfs_fsync_enter(inode); + do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) @@ -118,6 +122,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) end = LLONG_MAX; } while (ret == -EAGAIN); + trace_nfs_fsync_exit(inode, ret); return ret; } diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c index 4eb0aead69b6..c74f7af23d77 100644 --- a/fs/nfs/nfstrace.c +++ b/fs/nfs/nfstrace.c @@ -7,3 +7,6 @@ #define CREATE_TRACE_POINTS #include "nfstrace.h" + +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter); +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit); -- cgit v1.2.3 From 415320fc14eca40b564d1797e68895d15b51ac49 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 13:26:18 -0400 Subject: NFSv4: Return the delegation before returning the layout in evict_inode() Minor optimisation for the case where the layout has return-on-close enabled. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 75090feeafad..d91898193b2f 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -91,10 +91,11 @@ static void nfs4_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); /* If we are holding a delegation, return it! */ nfs_inode_return_delegation_noreclaim(inode); + /* Note that above delegreturn would trigger pnfs return-on-close */ + pnfs_return_layout(inode); + pnfs_destroy_layout(NFS_I(inode)); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); } -- cgit v1.2.3 From 29559b11aef072f893cd32280dcec9d7380ca011 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 20:10:20 -0400 Subject: NFSv4.1/pnfs: Fix setting of layoutcommit last write byte If the NFS_INO_LAYOUTCOMMIT flag was unset, then we _must_ ensure that we also reset the last write byte (lwb) for that layout. The current code depends on us clearing the lwb when we clear NFS_INO_LAYOUTCOMMIT, which is not the case when we call pnfs_clear_layoutcommit(). Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4f802b02fbb9..b96736df98e8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2108,16 +2108,16 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) spin_lock(&inode->i_lock); if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { + nfsi->layout->plh_lwb = end_pos; mark_as_dirty = true; dprintk("%s: Set layoutcommit for inode %lu ", __func__, inode->i_ino); - } + } else if (end_pos > nfsi->layout->plh_lwb) + nfsi->layout->plh_lwb = end_pos; if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ pnfs_get_lseg(hdr->lseg); } - if (end_pos > nfsi->layout->plh_lwb) - nfsi->layout->plh_lwb = end_pos; spin_unlock(&inode->i_lock); dprintk("%s: lseg %p end_pos %llu\n", __func__, hdr->lseg, nfsi->layout->plh_lwb); @@ -2137,16 +2137,16 @@ void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) spin_lock(&inode->i_lock); if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { + nfsi->layout->plh_lwb = data->lwb; mark_as_dirty = true; dprintk("%s: Set layoutcommit for inode %lu ", __func__, inode->i_ino); - } + } else if (data->lwb > nfsi->layout->plh_lwb) + nfsi->layout->plh_lwb = data->lwb; if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ pnfs_get_lseg(data->lseg); } - if (data->lwb > nfsi->layout->plh_lwb) - nfsi->layout->plh_lwb = data->lwb; spin_unlock(&inode->i_lock); dprintk("%s: lseg %p end_pos %llu\n", __func__, data->lseg, nfsi->layout->plh_lwb); @@ -2216,7 +2216,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) pnfs_list_write_lseg(inode, &data->lseg_list); end_pos = nfsi->layout->plh_lwb; - nfsi->layout->plh_lwb = 0; nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); spin_unlock(&inode->i_lock); @@ -2233,11 +2232,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) status = ld->prepare_layoutcommit(&data->args); if (status) { spin_lock(&inode->i_lock); - if (end_pos < nfsi->layout->plh_lwb) + set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); + if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; spin_unlock(&inode->i_lock); put_rpccred(data->cred); - set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); goto clear_layoutcommitting; } } -- cgit v1.2.3 From 67af7611ec57dbcbc96f9d9daa4d3c7b0999aa73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 20:40:38 -0400 Subject: NFSv4.1/pnfs: Refactor pnfs_set_layoutcommit() pnfs_set_layoutcommit() and pnfs_commit_set_layoutcommit() are 100% identical except for the function arguments. Refactor to eliminate the difference. Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 5 ++-- fs/nfs/flexfilelayout/flexfilelayout.c | 5 ++-- fs/nfs/pnfs.c | 43 ++++++---------------------------- fs/nfs/pnfs.h | 3 +-- 4 files changed, 14 insertions(+), 42 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 5639a2ef671a..a317b007e436 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -258,7 +258,8 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) hdr->res.verf->committed != NFS_DATA_SYNC) return; - pnfs_set_layoutcommit(hdr); + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, + hdr->mds_offset + hdr->res.count); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } @@ -373,7 +374,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, } if (data->verf.committed == NFS_UNSTABLE) - pnfs_commit_set_layoutcommit(data); + pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); return 0; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index edfb27ea4ce2..92d2943e54f8 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -891,7 +891,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, static void ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) { - pnfs_set_layoutcommit(hdr); + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, + hdr->mds_offset + hdr->res.count); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } @@ -1074,7 +1075,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, } if (data->verf.committed == NFS_UNSTABLE) - pnfs_commit_set_layoutcommit(data); + pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); return 0; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b96736df98e8..ea83f3c03c65 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1841,7 +1841,8 @@ void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); if (!hdr->pnfs_error) { - pnfs_set_layoutcommit(hdr); + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, + hdr->mds_offset + hdr->res.count); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); } else pnfs_ld_handle_write_error(hdr); @@ -2099,11 +2100,10 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) +pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, + loff_t end_pos) { - struct inode *inode = hdr->inode; struct nfs_inode *nfsi = NFS_I(inode); - loff_t end_pos = hdr->mds_offset + hdr->res.count; bool mark_as_dirty = false; spin_lock(&inode->i_lock); @@ -2114,13 +2114,13 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) __func__, inode->i_ino); } else if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { + if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ - pnfs_get_lseg(hdr->lseg); + pnfs_get_lseg(lseg); } spin_unlock(&inode->i_lock); dprintk("%s: lseg %p end_pos %llu\n", - __func__, hdr->lseg, nfsi->layout->plh_lwb); + __func__, lseg, nfsi->layout->plh_lwb); /* if pnfs_layoutcommit_inode() runs between inode locks, the next one * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ @@ -2129,35 +2129,6 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); -void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) -{ - struct inode *inode = data->inode; - struct nfs_inode *nfsi = NFS_I(inode); - bool mark_as_dirty = false; - - spin_lock(&inode->i_lock); - if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { - nfsi->layout->plh_lwb = data->lwb; - mark_as_dirty = true; - dprintk("%s: Set layoutcommit for inode %lu ", - __func__, inode->i_ino); - } else if (data->lwb > nfsi->layout->plh_lwb) - nfsi->layout->plh_lwb = data->lwb; - if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { - /* references matched in nfs4_layoutcommit_release */ - pnfs_get_lseg(data->lseg); - } - spin_unlock(&inode->i_lock); - dprintk("%s: lseg %p end_pos %llu\n", - __func__, data->lseg, nfsi->layout->plh_lwb); - - /* if pnfs_layoutcommit_inode() runs between inode locks, the next one - * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ - if (mark_as_dirty) - mark_inode_dirty_sync(inode); -} -EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); - void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) { struct nfs_server *nfss = NFS_SERVER(data->args.inode); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index b5654e8da936..66bf5e1cf93d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -264,8 +264,7 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); -void pnfs_set_layoutcommit(struct nfs_pgio_header *); -void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); +void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); -- cgit v1.2.3 From 4d346bea8f0bf6d6351037899586a85435c15b9b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 16:38:33 -0400 Subject: NFS: Add a helper to sync both O_DIRECT and buffered writes Then apply it to nfs_setattr() and nfs_getattr(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d42dff6d5e98..60f907666697 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -133,6 +133,12 @@ void nfs_evict_inode(struct inode *inode) nfs_clear_inode(inode); } +static int nfs_sync_inode(struct inode *inode) +{ + nfs_inode_dio_wait(inode); + return nfs_wb_all(inode); +} + /** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk */ @@ -525,10 +531,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) trace_nfs_setattr_enter(inode); /* Write all dirty data */ - if (S_ISREG(inode->i_mode)) { - nfs_inode_dio_wait(inode); - nfs_wb_all(inode); - } + if (S_ISREG(inode->i_mode)) + nfs_sync_inode(inode); fattr = nfs_alloc_fattr(); if (fattr == NULL) @@ -644,8 +648,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) trace_nfs_getattr_enter(inode); /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { - nfs_inode_dio_wait(inode); - err = filemap_write_and_wait(inode->i_mapping); + err = nfs_sync_inode(inode); if (err) goto out; } -- cgit v1.2.3 From d9dabc1a01656868d53a1d08309ca24d922b1376 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Mar 2015 15:55:49 -0400 Subject: NFS: File unlock needs to be a metadata synchronisation point File unlock needs to update both data and metadata on the NFS server in order to act as a synchronisation point for other clients. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e679d24c39d3..6959cb76744b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -780,7 +780,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * Flush all pending writes before doing anything * with locks.. */ - nfs_sync_mapping(filp->f_mapping); + vfs_fsync(filp, 0); l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); if (!IS_ERR(l_ctx)) { -- cgit v1.2.3 From 9e1681c2e74bdd84bad6f74b47a4d88ad2f433df Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 17:23:31 -0400 Subject: NFSv4: Truncating file opens should also sync O_DIRECT writes We don't just want to sync out buffered writes, but also O_DIRECT ones. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 ++- fs/nfs/nfs4file.c | 2 +- include/linux/nfs_fs.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 60f907666697..f1fc0e4c1c02 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -133,11 +133,12 @@ void nfs_evict_inode(struct inode *inode) nfs_clear_inode(inode); } -static int nfs_sync_inode(struct inode *inode) +int nfs_sync_inode(struct inode *inode) { nfs_inode_dio_wait(inode); return nfs_wb_all(inode); } +EXPORT_SYMBOL_GPL(nfs_sync_inode); /** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 0de62f7cfebf..8da5409e6f1a 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -59,7 +59,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) if (openflags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; attr.ia_size = 0; - nfs_wb_all(inode); + nfs_sync_inode(inode); } inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b01ccf371fdc..b638eb6727c6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -512,6 +512,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned * Try to write back everything synchronously (but check the * return value!) */ +extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -- cgit v1.2.3 From a0815d556d1cfb686b46995f86fb081f623fa720 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 18:58:53 -0400 Subject: NFSv4.1/pnfs: Ensure that writes respect the O_SYNC flag when doing O_DIRECT If the caller does not specify the O_SYNC flag, then it is legitimate to return from O_DIRECT without doing a pNFS layoutcommit operation. However if the file is opened O_DIRECT|O_SYNC then we'd better get it right. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 1 + fs/nfs/file.c | 1 + fs/nfs/nfs4file.c | 1 + 3 files changed, 3 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 51f617e45fd1..018a06a062bd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -1034,6 +1034,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, if (i_size_read(inode) < iocb->ki_pos) i_size_write(inode, iocb->ki_pos); spin_unlock(&inode->i_lock); + generic_write_sync(file, pos, result); } } nfs_direct_req_release(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6959cb76744b..28228f381266 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -281,6 +281,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_enter(inode); + nfs_inode_dio_wait(inode); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8da5409e6f1a..befe7a2c6284 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -104,6 +104,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_enter(inode); + nfs_inode_dio_wait(inode); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) -- cgit v1.2.3 From 7140171ea9be4143736c35acf6f31b6feb195ca0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 12:36:13 -0400 Subject: NFSv4.1/pnfs: Ensure we send layoutcommit before return-on-close We must not send a close or delegreturn that would result in a return-on-close of the layout without ensuring that we've also sent the necessary layoutcommit. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ea83f3c03c65..c2ce2db771b7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1090,6 +1090,7 @@ bool pnfs_roc(struct inode *ino) pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); + pnfs_layoutcommit_inode(ino, true); return true; out_noroc: @@ -1104,8 +1105,10 @@ out_noroc: } } spin_unlock(&ino->i_lock); - if (layoutreturn) + if (layoutreturn) { + pnfs_layoutcommit_inode(ino, true); pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); + } return false; } -- cgit v1.2.3 From 5bb89b4702e22981445ae01af733a57d1cae2018 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 14:14:42 -0400 Subject: NFSv4.1/pnfs: Separate out metadata and data consistency for pNFS The LAYOUTCOMMIT operation means different things to different layout types. For blocks and objects, it is both a data and metadata consistency operation. For files and flexfiles, it is only a metadata consistency operation. This patch separates out the 2 cases, allowing the files/flexfiles layout drivers to optimise away the data consistency calls to layoutcommit. Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 1 + fs/nfs/filelayout/filelayout.c | 1 + fs/nfs/flexfilelayout/flexfilelayout.c | 1 + fs/nfs/nfs4file.c | 2 +- fs/nfs/objlayout/objio_osd.c | 2 ++ fs/nfs/pnfs.c | 7 +++++++ fs/nfs/pnfs.h | 18 ++++++++++++++++++ fs/nfs/pnfs_nfs.c | 10 ++++++++++ fs/nfs/write.c | 13 ++++++------- 9 files changed, 47 insertions(+), 8 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 1cac3c175d18..d2554fe140a3 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -890,6 +890,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = { .free_deviceid_node = bl_free_deviceid_node, .pg_read_ops = &bl_pg_read_ops, .pg_write_ops = &bl_pg_write_ops, + .sync = pnfs_generic_sync, }; static int __init nfs4blocklayout_init(void) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index a317b007e436..a46bf6de9ce4 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1139,6 +1139,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .write_pagelist = filelayout_write_pagelist, .alloc_deviceid_node = filelayout_alloc_deviceid_node, .free_deviceid_node = filelayout_free_deviceid_node, + .sync = pnfs_nfs_generic_sync, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 92d2943e54f8..f3ff66e4fb6a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1509,6 +1509,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .write_pagelist = ff_layout_write_pagelist, .alloc_deviceid_node = ff_layout_alloc_deviceid_node, .encode_layoutreturn = ff_layout_encode_layoutreturn, + .sync = pnfs_nfs_generic_sync, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index befe7a2c6284..866842b048ef 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -112,7 +112,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); if (!ret) - ret = pnfs_layoutcommit_inode(inode, true); + ret = pnfs_sync_inode(inode, !!datasync); mutex_unlock(&inode->i_mutex); /* * If nfs_file_fsync_commit detected a server reboot, then diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8b5e0e687d5e..5aaed363556a 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -637,6 +637,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .pg_read_ops = &objio_pg_read_ops, .pg_write_ops = &objio_pg_write_ops, + .sync = pnfs_generic_sync, + .free_deviceid_node = objio_free_deviceid_node, .encode_layoutcommit = objlayout_encode_layoutcommit, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c2ce2db771b7..3f0affebcf05 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2231,6 +2231,13 @@ clear_layoutcommitting: } EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); +int +pnfs_generic_sync(struct inode *inode, bool datasync) +{ + return pnfs_layoutcommit_inode(inode, true); +} +EXPORT_SYMBOL_GPL(pnfs_generic_sync); + struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) { struct nfs4_threshold *thp; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 66bf5e1cf93d..231eb23c22da 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -155,6 +155,8 @@ struct pnfs_layoutdriver_type { int how, struct nfs_commit_info *cinfo); + int (*sync)(struct inode *inode, bool datasync); + /* * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS @@ -267,6 +269,8 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); +int pnfs_generic_sync(struct inode *inode, bool datasync); +int pnfs_nfs_generic_sync(struct inode *inode, bool datasync); int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); void pnfs_ld_write_done(struct nfs_pgio_header *); @@ -488,6 +492,14 @@ pnfs_ld_read_whole_page(struct inode *inode) return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; } +static inline int +pnfs_sync_inode(struct inode *inode, bool datasync) +{ + if (!pnfs_enabled_sb(NFS_SERVER(inode))) + return 0; + return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync); +} + static inline bool pnfs_layoutcommit_outstanding(struct inode *inode) { @@ -570,6 +582,12 @@ pnfs_ld_read_whole_page(struct inode *inode) return false; } +static inline int +pnfs_sync_inode(struct inode *inode, bool datasync) +{ + return 0; +} + static inline bool pnfs_roc(struct inode *ino) { diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 54e36b38fb5f..64d2a5932f7b 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -868,3 +868,13 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, nfs_request_add_commit_list(req, list, cinfo); } EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); + +int +pnfs_nfs_generic_sync(struct inode *inode, bool datasync) +{ + if (datasync) + return 0; + return pnfs_layoutcommit_inode(inode, true); +} +EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync); + diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 849ed784d6ac..7f933a39f385 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1840,17 +1840,16 @@ EXPORT_SYMBOL_GPL(nfs_write_inode); */ int nfs_wb_all(struct inode *inode) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = 0, - .range_end = LLONG_MAX, - }; int ret; trace_nfs_writeback_inode_enter(inode); - ret = sync_inode(inode, &wbc); + ret = filemap_write_and_wait(inode->i_mapping); + if (!ret) { + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (!ret) + pnfs_sync_inode(inode, true); + } trace_nfs_writeback_inode_exit(inode, ret); return ret; -- cgit v1.2.3 From 8c18d76bcba874e872410ca63c7e59b10aafa17d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 16:40:07 -0400 Subject: NFS: Block new writes while syncing data in nfs_getattr() Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f1fc0e4c1c02..0c3be2658546 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -649,7 +649,9 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) trace_nfs_getattr_enter(inode); /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { + mutex_lock(&inode->i_mutex); err = nfs_sync_inode(inode); + mutex_unlock(&inode->i_mutex); if (err) goto out; } -- cgit v1.2.3 From dbbc14775ae395a50d91f22904670ca4c9297542 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Mar 2015 14:33:33 -0400 Subject: SUNRPC: Introduce missing well-known netids Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/msg_prot.h | 8 +++++++- include/linux/sunrpc/xprtrdma.h | 5 ----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index aadc6a04e1ac..807371357160 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -142,12 +142,18 @@ typedef __be32 rpc_fraghdr; (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) /* - * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. + * Well-known netids. See: + * + * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml */ #define RPCBIND_NETID_UDP "udp" #define RPCBIND_NETID_TCP "tcp" +#define RPCBIND_NETID_RDMA "rdma" +#define RPCBIND_NETID_SCTP "sctp" #define RPCBIND_NETID_UDP6 "udp6" #define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_RDMA6 "rdma6" +#define RPCBIND_NETID_SCTP6 "sctp6" #define RPCBIND_NETID_LOCAL "local" /* diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 64a0a0a97b23..c984c85981ea 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -40,11 +40,6 @@ #ifndef _LINUX_SUNRPC_XPRTRDMA_H #define _LINUX_SUNRPC_XPRTRDMA_H -/* - * rpcbind (v3+) RDMA netid. - */ -#define RPCBIND_NETID_RDMA "rdma" - /* * Constants. Max RPC/NFS header is big enough to account for * additional marshaling buffers passed down by Linux client. -- cgit v1.2.3 From 0dd39cae26f3990789b4a558f9abafe59adc6fc1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Mar 2015 14:33:43 -0400 Subject: xprtrdma: Display IPv6 addresses and port numbers correctly Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Tested-by: Devesh Sharma Tested-by: Meghana Cheripady Tested-by: Veeresh U. Kokatnur Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 47 +++++++++++++++++++++++++++++++++-------- net/sunrpc/xprtrdma/verbs.c | 21 ++++++++---------- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2e192baa59f3..9be7f97205ba 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -156,13 +156,48 @@ static struct ctl_table sunrpc_table[] = { static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ +static void +xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + char buf[20]; + + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); + + xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; +} + +static void +xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + char buf[40]; + + snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); + + xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; +} + static void xprt_rdma_format_addresses(struct rpc_xprt *xprt) { struct sockaddr *sap = (struct sockaddr *) &rpcx_to_rdmad(xprt).addr; - struct sockaddr_in *sin = (struct sockaddr_in *)sap; - char buf[64]; + char buf[128]; + + switch (sap->sa_family) { + case AF_INET: + xprt_rdma_format_addresses4(xprt, sap); + break; + case AF_INET6: + xprt_rdma_format_addresses6(xprt, sap); + break; + default: + pr_err("rpcrdma: Unrecognized address family\n"); + return; + } (void)rpc_ntop(sap, buf, sizeof(buf)); xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); @@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); - xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - - snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); - snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); - /* netid */ - xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; + xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; } static void diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 124676c13780..1aa55b74b1b7 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include "xprt_rdma.h" @@ -424,7 +425,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; + struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; #endif struct ib_qp_attr *attr = &ia->ri_qp_attr; struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; @@ -480,9 +481,8 @@ connected: wake_up_all(&ep->rep_connect_wait); /*FALLTHROUGH*/ default: - dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n", - __func__, &addr->sin_addr.s_addr, - ntohs(addr->sin_port), ep, + dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", + __func__, sap, rpc_get_port(sap), ep, CONNECTION_MSG(event->event)); break; } @@ -491,19 +491,16 @@ connected: if (connstate == 1) { int ird = attr->max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; - printk(KERN_INFO "rpcrdma: connection to %pI4:%u " - "on %s, memreg %d slots %d ird %d%s\n", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), + + pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d slots %d ird %d%s\n", + sap, rpc_get_port(sap), ia->ri_id->device->name, ia->ri_memreg_strategy, xprt->rx_buf.rb_max_requests, ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); } else if (connstate < 0) { - printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), - connstate); + pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n", + sap, rpc_get_port(sap), connstate); } #endif -- cgit v1.2.3 From e23779451ee05e824fedbb68bd17fc5c77e40166 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Mar 2015 14:33:53 -0400 Subject: xprtrdma: Perform a full marshal on retransmit Commit 6ab59945f292 ("xprtrdma: Update rkeys after transport reconnect" added logic in the ->send_request path to update the chunk list when an RPC/RDMA request is retransmitted. Note that rpc_xdr_encode() resets and re-encodes the entire RPC send buffer for each retransmit of an RPC. The RPC send buffer is not preserved from the previous transmission of an RPC. Revert 6ab59945f292, and instead, just force each request to be fully marshaled every time through ->send_request. This should preserve the fix from 6ab59945f292, while also performing pullup during retransmits. Signed-off-by: Chuck Lever Acked-by: Sagi Grimberg Tested-by: Devesh Sharma Tested-by: Meghana Cheripady Tested-by: Veeresh U. Kokatnur Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 71 +++++++++++++++++++---------------------- net/sunrpc/xprtrdma/transport.c | 5 +-- net/sunrpc/xprtrdma/xprt_rdma.h | 10 ------ 3 files changed, 34 insertions(+), 52 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 91ffde82fa0c..41456d9e5a7d 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -53,6 +53,14 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif +enum rpcrdma_chunktype { + rpcrdma_noch = 0, + rpcrdma_readch, + rpcrdma_areadch, + rpcrdma_writech, + rpcrdma_replych +}; + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char transfertypes[][12] = { "pure inline", /* no chunks */ @@ -283,28 +291,6 @@ out: return n; } -/* - * Marshal chunks. This routine returns the header length - * consumed by marshaling. - * - * Returns positive RPC/RDMA header size, or negative errno. - */ - -ssize_t -rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) -{ - struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); - - if (req->rl_rtype != rpcrdma_noch) - result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, - headerp, req->rl_rtype); - else if (req->rl_wtype != rpcrdma_noch) - result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, - headerp, req->rl_wtype); - return result; -} - /* * Copy write data inline. * This function is used for "small" requests. Data which is passed @@ -397,6 +383,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) char *base; size_t rpclen, padlen; ssize_t hdrlen; + enum rpcrdma_chunktype rtype, wtype; struct rpcrdma_msg *headerp; /* @@ -433,13 +420,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * into pages; otherwise use reply chunks. */ if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) - req->rl_wtype = rpcrdma_noch; + wtype = rpcrdma_noch; else if (rqst->rq_rcv_buf.page_len == 0) - req->rl_wtype = rpcrdma_replych; + wtype = rpcrdma_replych; else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) - req->rl_wtype = rpcrdma_writech; + wtype = rpcrdma_writech; else - req->rl_wtype = rpcrdma_replych; + wtype = rpcrdma_replych; /* * Chunks needed for arguments? @@ -456,16 +443,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * TBD check NFSv4 setacl */ if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) - req->rl_rtype = rpcrdma_noch; + rtype = rpcrdma_noch; else if (rqst->rq_snd_buf.page_len == 0) - req->rl_rtype = rpcrdma_areadch; + rtype = rpcrdma_areadch; else - req->rl_rtype = rpcrdma_readch; + rtype = rpcrdma_readch; /* The following simplification is not true forever */ - if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych) - req->rl_wtype = rpcrdma_noch; - if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) { + if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) + wtype = rpcrdma_noch; + if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { dprintk("RPC: %s: cannot marshal multiple chunk lists\n", __func__); return -EIO; @@