summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-30 16:33:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-30 16:33:25 -0700
commit7f155c702677d057d03b192ce652311de5434697 (patch)
treedcee0fbb463ec3e55cb50181180c7d175d5895c3 /fs
parentd761f3ed6e71bcca724a6e9e39efcac65b7b4ac1 (diff)
parent944171cbf499d3445c749f7c13c46de0a564a905 (diff)
Merge tag 'nfs-for-4.8-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable bugfixes: - nfs: don't create zero-length requests - several LAYOUTGET bugfixes Features: - several performance related features - more aggressive caching when we can rely on close-to-open cache consistency - remove serialisation of O_DIRECT reads and writes - optimise several code paths to not flush to disk unnecessarily. However allow for the idiosyncracies of pNFS for those layout types that need to issue a LAYOUTCOMMIT before the metadata can be updated on the server. - SUNRPC updates to the client data receive path - pNFS/SCSI support RH/Fedora dm-mpath device nodes - pNFS files/flexfiles can now use unprivileged ports when the generic NFS mount options allow it. Bugfixes: - Don't use RDMA direct data placement together with data integrity or privacy security flavours - Remove the RDMA ALLPHYSICAL memory registration mode as it has potential security holes. - Several layout recall fixes to improve NFSv4.1 protocol compliance. - Fix an Oops in the pNFS files and flexfiles connection setup to the DS - Allow retry of operations that used a returned delegation stateid - Don't mark the inode as revalidated if a LAYOUTCOMMIT is outstanding - Fix writeback races in nfs4_copy_range() and nfs42_proc_deallocate()" * tag 'nfs-for-4.8-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (104 commits) pNFS: Actively set attributes as invalid if LAYOUTCOMMIT is outstanding NFSv4: Clean up lookup of SECINFO_NO_NAME NFSv4.2: Fix warning "variable ‘stateids’ set but not used" NFSv4: Fix warning "no previous prototype for ‘nfs4_listxattr’" SUNRPC: Fix a compiler warning in fs/nfs/clnt.c pNFS: Remove redundant smp_mb() from pnfs_init_lseg() pNFS: Cleanup - do layout segment initialisation in one place pNFS: Remove redundant stateid invalidation pNFS: Remove redundant pnfs_mark_layout_returned_if_empty() pNFS: Clear the layout metadata if the server changed the layout stateid pNFS: Cleanup - don't open code pnfs_mark_layout_stateid_invalid() NFS: pnfs_mark_matching_lsegs_return() should match the layout sequence id pNFS: Do not set plh_return_seq for non-callback related layoutreturns pNFS: Ensure layoutreturn acts as a completion for layout callbacks pNFS: Fix CB_LAYOUTRECALL stateid verification pNFS: Always update the layout barrier seqid on LAYOUTGET pNFS: Always update the layout stateid if NFS_LAYOUT_INVALID_STID is set pNFS: Clear the layout return tracking on layout reinitialisation pNFS: LAYOUTRETURN should only update the stateid if the layout is valid nfs: don't create zero-length requests ...
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/Makefile2
-rw-r--r--fs/nfs/blocklayout/dev.c110
-rw-r--r--fs/nfs/blocklayout/extent_tree.c27
-rw-r--r--fs/nfs/callback_proc.c64
-rw-r--r--fs/nfs/callback_xdr.c6
-rw-r--r--fs/nfs/client.c22
-rw-r--r--fs/nfs/dir.c52
-rw-r--r--fs/nfs/direct.c93
-rw-r--r--fs/nfs/file.c100
-rw-r--r--fs/nfs/filelayout/filelayout.c18
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c23
-rw-r--r--fs/nfs/inode.c138
-rw-r--r--fs/nfs/internal.h62
-rw-r--r--fs/nfs/io.c147
-rw-r--r--fs/nfs/nfs3client.c14
-rw-r--r--fs/nfs/nfs42proc.c24
-rw-r--r--fs/nfs/nfs42xdr.c12
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c26
-rw-r--r--fs/nfs/nfs4file.c16
-rw-r--r--fs/nfs/nfs4proc.c153
-rw-r--r--fs/nfs/nfs4xdr.c11
-rw-r--r--fs/nfs/nfstrace.h1
-rw-r--r--fs/nfs/pnfs.c191
-rw-r--r--fs/nfs/pnfs.h34
-rw-r--r--fs/nfs/pnfs_nfs.c13
-rw-r--r--fs/nfs/super.c14
-rw-r--r--fs/nfs/write.c44
28 files changed, 866 insertions, 552 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8664417955a2..6abdda209642 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o
CFLAGS_nfstrace.o += -I$(src)
nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
- direct.o pagelist.o read.o symlink.o unlink.o \
+ io.o direct.o pagelist.o read.o symlink.o unlink.o \
write.o namespace.o mount_clnt.o nfstrace.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index e5b89675263e..a69ef4e9c24c 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -65,8 +65,8 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
if (!p)
return -EIO;
b->simple.nr_sigs = be32_to_cpup(p++);
- if (!b->simple.nr_sigs) {
- dprintk("no signature\n");
+ if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) {
+ dprintk("Bad signature count: %d\n", b->simple.nr_sigs);
return -EIO;
}
@@ -89,7 +89,8 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
memcpy(&b->simple.sigs[i].sig, p,
b->simple.sigs[i].sig_len);
- b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
+ b->simple.len += 8 + 4 + \
+ (XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
}
break;
case PNFS_BLOCK_VOLUME_SLICE:
@@ -104,7 +105,12 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_inline_decode(xdr, 4);
if (!p)
return -EIO;
+
b->concat.volumes_count = be32_to_cpup(p++);
+ if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
+ dprintk("Too many volumes: %d\n", b->concat.volumes_count);
+ return -EIO;
+ }
p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
if (!p)
@@ -116,8 +122,13 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_inline_decode(xdr, 8 + 4);
if (!p)
return -EIO;
+
p = xdr_decode_hyper(p, &b->stripe.chunk_size);
b->stripe.volumes_count = be32_to_cpup(p++);
+ if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
+ dprintk("Too many volumes: %d\n", b->stripe.volumes_count);
+ return -EIO;
+ }
p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
if (!p)
@@ -224,18 +235,20 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
+ struct block_device *bdev;
dev_t dev;
dev = bl_resolve_deviceid(server, v, gfp_mask);
if (!dev)
return -EIO;
- d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
- if (IS_ERR(d->bdev)) {
+ bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
+ if (IS_ERR(bdev)) {
printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
- MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
- return PTR_ERR(d->bdev);
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
+ return PTR_ERR(bdev);
}
+ d->bdev = bdev;
d->len = i_size_read(d->bdev->bd_inode);
@@ -287,44 +300,71 @@ bl_validate_designator(struct pnfs_block_volume *v)
}
}
+/*
+ * Try to open the udev path for the WWN. At least on Debian the udev
+ * by-id path will always point to the dm-multipath device if one exists.
+ */
+static struct block_device *
+bl_open_udev_path(struct pnfs_block_volume *v)
+{
+ struct block_device *bdev;
+ const char *devname;
+
+ devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
+ v->scsi.designator_len, v->scsi.designator);
+ if (!devname)
+ return ERR_PTR(-ENOMEM);
+
+ bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
+ if (IS_ERR(bdev)) {
+ pr_warn("pNFS: failed to open device %s (%ld)\n",
+ devname, PTR_ERR(bdev));
+ }
+
+ kfree(devname);
+ return bdev;
+}
+
+/*
+ * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
+ * wwn- links will only point to the first discovered SCSI device there.
+ */
+static struct block_device *
+bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
+{
+ struct block_device *bdev;
+ const char *devname;
+
+ devname = kasprintf(GFP_KERNEL,
+ "/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
+ v->scsi.designator_type,
+ v->scsi.designator_len, v->scsi.designator);
+ if (!devname)
+ return ERR_PTR(-ENOMEM);
+
+ bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
+ kfree(devname);
+ return bdev;
+}
+
static int
bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
+ struct block_device *bdev;
const struct pr_ops *ops;
- const char *devname;
int error;
if (!bl_validate_designator(v))
return -EINVAL;
- switch (v->scsi.designator_len) {
- case 8:
- devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
- v->scsi.designator);
- break;
- case 12:
- devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
- v->scsi.designator);
- break;
- case 16:
- devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
- v->scsi.designator);
- break;
- default:
- return -EINVAL;
- }
-
- d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
- if (IS_ERR(d->bdev)) {
- pr_warn("pNFS: failed to open device %s (%ld)\n",
- devname, PTR_ERR(d->bdev));
- kfree(devname);
- return PTR_ERR(d->bdev);
- }
-
- kfree(devname);
+ bdev = bl_open_dm_mpath_udev_path(v);
+ if (IS_ERR(bdev))
+ bdev = bl_open_udev_path(v);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
+ d->bdev = bdev;
d->len = i_size_read(d->bdev->bd_inode);
d->map = bl_map_simple;
@@ -352,7 +392,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
return 0;
out_blkdev_put:
- blkdev_put(d->bdev, FMODE_READ);
+ blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
return error;
}
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 720b3ff55fa9..992bcb19c11e 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -121,6 +121,16 @@ ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be)
return be;
}
+static void __ext_put_deviceids(struct list_head *head)
+{
+ struct pnfs_block_extent *be, *tmp;
+
+ list_for_each_entry_safe(be, tmp, head, be_list) {
+ nfs4_put_deviceid_node(be->be_device);
+ kfree(be);
+ }
+}
+
static void
__ext_tree_insert(struct rb_root *root,
struct pnfs_block_extent *new, bool merge_ok)
@@ -163,7 +173,8 @@ free_new:
}
static int
-__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
+__ext_tree_remove(struct rb_root *root,
+ sector_t start, sector_t end, struct list_head *tmp)
{
struct pnfs_block_extent *be;
sector_t len1 = 0, len2 = 0;
@@ -223,8 +234,7 @@ __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
struct pnfs_block_extent *next = ext_tree_next(be);
rb_erase(&be->be_node, root);
- nfs4_put_deviceid_node(be->be_device);
- kfree(be);
+ list_add_tail(&be->be_list, tmp);
be = next;
}
@@ -350,16 +360,18 @@ int ext_tree_remove(struct pnfs_block_layout *bl, bool rw,
sector_t start, sector_t end)
{
int err, err2;
+ LIST_HEAD(tmp);
spin_lock(&bl->bl_ext_lock);
- err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
+ err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
if (rw) {
- err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
+ err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
if (!err)
err = err2;
}
spin_unlock(&bl->bl_ext_lock);
+ __ext_put_deviceids(&tmp);
return err;
}
@@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
sector_t end = start + len;
struct pnfs_block_extent *be;
int err = 0;
+ LIST_HEAD(tmp);
spin_lock(&bl->bl_ext_lock);
/*
* First remove all COW extents or holes from written to range.
*/
- err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
+ err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
if (err)
goto out;
@@ -459,6 +472,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
}
out:
spin_unlock(&bl->bl_ext_lock);
+
+ __ext_put_deviceids(&tmp);
return err;
}
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index aaa2e8d3df6f..c92a75e066a6 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -119,27 +119,30 @@ out:
* hashed by filehandle.
*/
static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
- struct nfs_fh *fh, nfs4_stateid *stateid)
+ struct nfs_fh *fh)
{
struct nfs_server *server;
+ struct nfs_inode *nfsi;
struct inode *ino;
struct pnfs_layout_hdr *lo;
+restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
- if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid))
+ nfsi = NFS_I(lo->plh_inode);
+ if (nfs_compare_fh(fh, &nfsi->fh))
continue;
- if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
+ if (nfsi->layout != lo)
continue;
ino = igrab(lo->plh_inode);
if (!ino)
break;
spin_lock(&ino->i_lock);
/* Is this layout in the process of being freed? */
- if (NFS_I(ino)->layout != lo) {
+ if (nfsi->layout != lo) {
spin_unlock(&ino->i_lock);
iput(ino);
- break;
+ goto restart;
}
pnfs_get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
@@ -151,13 +154,13 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
}
static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
- struct nfs_fh *fh, nfs4_stateid *stateid)
+ struct nfs_fh *fh)
{
struct pnfs_layout_hdr *lo;
spin_lock(&clp->cl_lock);
rcu_read_lock();
- lo = get_layout_by_fh_locked(clp, fh, stateid);
+ lo = get_layout_by_fh_locked(clp, fh);
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
@@ -167,17 +170,39 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
/*
* Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
*/
-static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo,
+static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new)
{
u32 oldseq, newseq;
- oldseq = be32_to_cpu(lo->plh_stateid.seqid);
+ /* Is the stateid still not initialised? */
+ if (!pnfs_layout_is_valid(lo))
+ return NFS4ERR_DELAY;
+
+ /* Mismatched stateid? */
+ if (!nfs4_stateid_match_other(&lo->plh_stateid, new))
+ return NFS4ERR_BAD_STATEID;
+
newseq = be32_to_cpu(new->seqid);
+ /* Are we already in a layout recall situation? */
+ if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
+ lo->plh_return_seq != 0) {
+ if (newseq < lo->plh_return_seq)
+ return NFS4ERR_OLD_STATEID;
+ if (newseq > lo->plh_return_seq)
+ return NFS4ERR_DELAY;
+ goto out;
+ }
+ /* Check that the stateid matches what we think it should be. */
+ oldseq = be32_to_cpu(lo->plh_stateid.seqid);
if (newseq > oldseq + 1)
- return false;
- return true;
+ return NFS4ERR_DELAY;
+ /* Crazy server! */
+ if (newseq <= oldseq)
+ return NFS4ERR_OLD_STATEID;
+out:
+ return NFS_OK;
}
static u32 initiate_file_draining(struct nfs_client *clp,
@@ -188,7 +213,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
LIST_HEAD(free_me_list);
- lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
+ lo = get_layout_by_fh(clp, &args->cbl_fh);
if (!lo) {
trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL,
&args->cbl_stateid, -rv);
@@ -196,18 +221,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
}
ino = lo->plh_inode;
+ pnfs_layoutcommit_inode(ino, false);
+
spin_lock(&ino->i_lock);
- if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) {
- rv = NFS4ERR_DELAY;
+ rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid);
+ if (rv != NFS_OK)
goto unlock;
- }
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
- spin_unlock(&ino->i_lock);
-
- pnfs_layoutcommit_inode(ino, false);
- spin_lock(&ino->i_lock);
/*
* Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)
*/
@@ -223,11 +245,13 @@ static u32 initiate_file_draining(struct nfs_client *clp,
goto unlock;
}
+ /* Embrace your forgetfulness! */
+ rv = NFS4ERR_NOMATCHING_LAYOUT;
+
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
&args->cbl_range);
}
- pnfs_mark_layout_returned_if_empty(lo);
unlock:
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d81f96aacd51..656f68f7fe53 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -925,7 +925,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
if (hdr_arg.minorversion == 0) {
cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
- return rpc_drop_reply;
+ goto out_invalidcred;
}
cps.minorversion = hdr_arg.minorversion;
@@ -953,6 +953,10 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
nfs_put_client(cps.clp);
dprintk("%s: done, status = %u\n", __func__, ntohl(status));
return rpc_success;
+
+out_invalidcred:
+ pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n");
+ return rpc_autherr_badcred;
}
/*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 487c5607d52f..003ebce4bbc4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -367,8 +367,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
*/
struct nfs_client *
nfs_get_client(const struct nfs_client_initdata *cl_init,
- const struct rpc_timeout *timeparms,
- const char *ip_addr,
rpc_authflavor_t authflavour)
{
struct nfs_client *clp, *new = NULL;
@@ -399,7 +397,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
new->cl_flags = cl_init->init_flags;
- return rpc_ops->init_client(new, timeparms, ip_addr);
+ return rpc_ops->init_client(new, cl_init);
}
spin_unlock(&nn->nfs_client_lock);
@@ -470,7 +468,7 @@ EXPORT_SYMBOL_GPL(nfs_init_timeout_values);
* Create an RPC client handle
*/
int nfs_create_rpc_client(struct nfs_client *clp,
- const struct rpc_timeout *timeparms,
+ const struct nfs_client_initdata *cl_init,
rpc_authflavor_t flavor)
{
struct rpc_clnt *clnt = NULL;
@@ -479,8 +477,9 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.protocol = clp->cl_proto,
.address = (struct sockaddr *)&clp->cl_addr,
.addrsize = clp->cl_addrlen,
- .timeout = timeparms,
+ .timeout = cl_init->timeparms,
.servername = clp->cl_hostname,
+ .nodename = cl_init->nodename,
.program = &nfs_program,
.version = clp->rpc_ops->version,
.authflavor = flavor,
@@ -591,14 +590,12 @@ EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
* nfs_init_client - Initialise an NFS2 or NFS3 client
*
* @clp: nfs_client to initialise
- * @timeparms: timeout parameters for underlying RPC transport
- * @ip_addr: IP presentation address (not used)
+ * @cl_init: Initialisation parameters
*
* Returns pointer to an NFS client, or an ERR_PTR value.
*/
struct nfs_client *nfs_init_client(struct nfs_client *clp,
- const struct rpc_timeout *timeparms,
- const char *ip_addr)
+ const struct nfs_client_initdata *cl_init)
{
int error;
@@ -612,7 +609,7 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
* Create a client RPC handle for doing FSSTAT with UNIX auth only
* - RFC 2623, sec 2.3.2
*/
- error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
+ error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
if (error < 0)
goto error;
nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -633,6 +630,7 @@ static int nfs_init_server(struct nfs_server *server,
const struct nfs_parsed_mount_data *data,
struct nfs_subversion *nfs_mod)
{
+ struct rpc_timeout timeparms;
struct nfs_client_initdata cl_init = {
.hostname = data->nfs_server.hostname,
.addr = (const struct sockaddr *)&data->nfs_server.address,
@@ -640,8 +638,8 @@ static int nfs_init_server(struct nfs_server *server,
.nfs_mod = nfs_mod,
.proto = data->nfs_server.protocol,
.net = data->net,
+ .timeparms = &timeparms,
};
- struct rpc_timeout timeparms;
struct nfs_client *clp;
int error;
@@ -653,7 +651,7 @@ static int nfs_init_server(struct nfs_server *server,
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
+ clp = nfs_get_client(&cl_init, RPC_AUTH_UNIX);
if (IS_ERR(clp)) {
dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
return PTR_ERR(clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index baaa38859899..177fefb26c18 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2252,21 +2252,37 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st
return NULL;
}
-static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res, bool may_block)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
- int err = -ENOENT;
+ bool retry = true;
+ int err;
spin_lock(&inode->i_lock);
- if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
- goto out_zap;
- cache = nfs_access_search_rbtree(inode, cred);
- if (cache == NULL)
- goto out;
- if (!nfs_have_delegated_attributes(inode) &&
- !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
- goto out_stale;
+ for(;;) {
+ if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+ goto out_zap;
+ cache = nfs_access_search_rbtree(inode, cred);
+ err = -ENOENT;
+ if (cache == NULL)
+ goto out;
+ /* Found an entry, is our attribute cache valid? */
+ if (!nfs_attribute_cache_expired(inode) &&
+ !(nfsi->cache_validity & NFS_INO_INVALID_ATTR))
+ break;
+ err = -ECHILD;
+ if (!may_block)
+ goto out;
+ if (!retry)
+ goto out_zap;
+ spin_unlock(&inode->i_lock);
+ err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (err)
+ return err;
+ spin_lock(&inode->i_lock);
+ retry = false;
+ }
res->jiffies = cache->jiffies;
res->cred = cache->cred;
res->mask = cache->mask;
@@ -2275,12 +2291,6 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
out:
spin_unlock(&inode->i_lock);
return err;
-out_stale:
- rb_erase(&cache->rb_node, &nfsi->access_cache);
- list_del(&cache->lru);
- spin_unlock(&inode->i_lock);
- nfs_access_free_entry(cache);
- return -ENOENT;
out_zap:
spin_unlock(&inode->i_lock);
nfs_access_zap_cache(inode);
@@ -2307,13 +2317,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred,
cache = NULL;
if (cache == NULL)
goto out;
- if (!nfs_have_delegated_attributes(inode) &&
- !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+ err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode);
+ if (err)
goto out;
res->jiffies = cache->jiffies;
res->cred = cache->cred;
res->mask = cache->mask;
- err = 0;
out:
rcu_read_unlock();
return err;
@@ -2402,18 +2411,19 @@ EXPORT_SYMBOL_GPL(nfs_access_set_mask);
static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
{
struct nfs_access_entry cache;
+ bool may_block = (mask & MAY_NOT_BLOCK) == 0;
int status;
trace_nfs_access_enter(inode);
status = nfs_access_get_cached_rcu(inode, cred, &cache);
if (status != 0)
- status = nfs_access_get_cached(inode, cred, &cache);
+ status = nfs_access_get_cached(inode, cred, &cache, may_block);
if (status == 0)
goto out_cached;
status = -ECHILD;
- if (mask & MAY_NOT_BLOCK)
+ if (!may_block)
goto out;
/* Be clever: ask server to check for all possible rights */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e6210ead71d0..72b7d13ee3c6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -196,6 +196,12 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
WARN_ON_ONCE(verfp->committed < 0);
}
+static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1,
+ const struct nfs_writeverf *v2)
+{
+ return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier);
+}
+
/*
* nfs_direct_cmp_hdr_verf - compare verifier for pgio header
* @dreq - direct request possibly spanning multiple servers
@@ -215,7 +221,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
nfs_direct_set_hdr_verf(dreq, hdr);
return 0;
}
- return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+ return nfs_direct_cmp_verf(verfp, &hdr->verf);
}
/*
@@ -238,7 +244,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
if (verfp->committed < 0)
return 1;
- return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+ return nfs_direct_cmp_verf(verfp, &data->verf);
}
/**
@@ -366,22 +372,10 @@ out:
* Synchronous I/O uses a stack-allocated iocb. Thus we can't trust
* the iocb is still valid here if this is a synchronous request.
*/
-static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
{
struct inode *inode = dreq->inode;
- if (dreq->iocb && write) {
- loff_t pos = dreq->iocb->ki_pos + dreq->count;
-
- spin_lock(&inode->i_lock);
- if (i_size_read(inode) < pos)
- i_size_write(inode, pos);
- spin_unlock(&inode->i_lock);
- }
-
- if (write)
- nfs_zap_mapping(inode, inode->i_mapping);
-
inode_dio_end(inode);
if (dreq->iocb) {
@@ -436,7 +430,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
}
out_put:
if (put_dreq(dreq))
- nfs_direct_complete(dreq, false);
+ nfs_direct_complete(dreq);
hdr->release(hdr);
}
@@ -542,7 +536,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
}
if (put_dreq(dreq))
- nfs_direct_complete(dreq, false);
+ nfs_direct_complete(dreq);
return 0;
}
@@ -583,17 +577,12 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
if (!count)
goto out;
- inode_lock(inode);
- result = nfs_sync_mapping(mapping);
- if (result)
- goto out_unlock;
-
task_io_account_read(count);
result = -ENOMEM;
dreq = nfs_direct_req_alloc();
if (dreq == NULL)
- goto out_unlock;
+ goto out;
dreq->inode = inode;
dreq->bytes_left = dreq->max_count = count;
@@ -608,10 +597,12 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
+ nfs_start_io_direct(inode);
+
NFS_I(inode)->read_io += count;
result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
- inode_unlock(inode);
+ nfs_end_io_direct(inode);
if (!result) {
result = nfs_direct_wait(dreq);
@@ -619,13 +610,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
iocb->ki_pos += result;
}
- nfs_direct_req_release(dreq);
- return result;
-
out_release:
nfs_direct_req_release(dreq);
-out_unlock:
- inode_unlock(inode);
out:
return result;
}
@@ -657,6 +643,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
dreq->count = 0;
+ dreq->verf.committed = NFS_INVALID_STABLE_HOW;
+ nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
for (i = 0; i < dreq->mirror_count; i++)
dreq->mirrors[i].count = 0;
get_dreq(dreq);
@@ -775,7 +763,8 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
nfs_direct_write_reschedule(dreq);
break;
default:
- nfs_direct_complete(dreq, true);
+ nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
+ nfs_direct_complete(dreq);
}
}
@@ -991,6 +980,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t result = -EINVAL;
+ size_t count;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
@@ -1001,34 +991,24 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
file, iov_iter_count(iter), (long long) iocb->ki_pos);
- nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES,
- iov_iter_count(iter));
+ result = generic_write_checks(iocb, iter);
+ if (result <= 0)
+ return result;
+ count = result;
+ nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
pos = iocb->ki_pos;
end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
- inode_lock(inode);
-
- result = nfs_sync_mapping(mapping);
- if (result)
- goto out_unlock;
-
- if (mapping->nrpages) {
- result = invalidate_inode_pages2_range(mapping,
- pos >> PAGE_SHIFT, end);