summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-12 14:50:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-12 14:50:42 -0700
commit1fbf3e48123d701584bc75ccac67ef2fe412ac4c (patch)
treecaca042e8d753d4147219820db8497a766b873bd /fs
parentf88c5942cfaf7d55e46d395136cccaca65b2e3bf (diff)
parent4d6c671ace569d4b0d3f8d92ab3aef18a5d166bc (diff)
Merge tag 'nfs-for-5.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable fixes: - Fixes for NFS I/O request leakages - Fix error handling paths in the NFS I/O recoalescing code - Reinitialise NFSv4.1 sequence results before retransmitting a request - Fix a soft lockup in the delegation recovery code - Bulk destroy of layouts needs to be safe w.r.t. umount - Prevent thundering herd issues when the SUNRPC socket is not connected - Respect RPC call timeouts when retrying transmission Features: - Convert rpc auth layer to use xdr_streams - Config option to disable insecure RPCSEC_GSS crypto types - Reduce size of RPC receive buffers - Readdirplus optimization by cache mechanism - Convert SUNRPC socket send code to use iov_iter() - SUNRPC micro-optimisations to avoid indirect calls - Add support for the pNFS LAYOUTERROR operation and use it with the pNFS/flexfiles driver - Add trace events to report non-zero NFS status codes - Various removals of unnecessary dprintks Bugfixes and cleanups: - Fix a number of sparse warnings and documentation format warnings - Fix nfs_parse_devname to not modify it's argument - Fix potential corruption of page being written through pNFS/blocks - fix xfstest generic/099 failures on nfsv3 - Avoid NFSv4.1 "false retries" when RPC calls are interrupted - Abort I/O early if the pNFS/flexfiles layout segment was invalidated - Avoid unnecessary pNFS/flexfiles layout invalidations" * tag 'nfs-for-5.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (90 commits) SUNRPC: Take the transport send lock before binding+connecting SUNRPC: Micro-optimise when the task is known not to be sleeping SUNRPC: Check whether the task was transmitted before rebind/reconnect SUNRPC: Remove redundant calls to RPC_IS_QUEUED() SUNRPC: Clean up SUNRPC: Respect RPC call timeouts when retrying transmission SUNRPC: Fix up RPC back channel transmission SUNRPC: Prevent thundering herd when the socket is not connected SUNRPC: Allow dynamic allocation of back channel slots NFSv4.1: Bump the default callback session slot count to 16 SUNRPC: Convert remaining GFP_NOIO, and GFP_NOWAIT sites in sunrpc NFS/flexfiles: Clean up mirror DS initialisation NFS/flexfiles: Remove dead code in ff_layout_mirror_valid() NFS/flexfile: Simplify nfs4_ff_layout_select_ds_stateid() NFS/flexfile: Simplify nfs4_ff_layout_ds_version() NFS/flexfiles: Simplify ff_layout_get_ds_cred() NFS/flexfiles: Simplify nfs4_ff_find_or_create_ds_client() NFS/flexfiles: Simplify nfs4_ff_layout_select_ds_fh() NFS/flexfiles: Speed up read failover when DSes are down NFS/flexfiles: Don't invalidate DS deviceids for being unresponsive ...
Diffstat (limited to 'fs')
-rw-r--r--fs/lockd/clnt4xdr.c14
-rw-r--r--fs/lockd/clntxdr.c14
-rw-r--r--fs/nfs/callback_xdr.c64
-rw-r--r--fs/nfs/delegation.c22
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c98
-rw-r--r--fs/nfs/direct.c7
-rw-r--r--fs/nfs/file.c44
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c225
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h75
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c161
-rw-r--r--fs/nfs/inode.c33
-rw-r--r--fs/nfs/internal.h5
-rw-r--r--fs/nfs/io.c12
-rw-r--r--fs/nfs/namespace.c8
-rw-r--r--fs/nfs/nfs2xdr.c124
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3xdr.c209
-rw-r--r--fs/nfs/nfs42.h3
-rw-r--r--fs/nfs/nfs42proc.c164
-rw-r--r--fs/nfs/nfs42xdr.c130
-rw-r--r--fs/nfs/nfs4client.c33
-rw-r--r--fs/nfs/nfs4namespace.c5
-rw-r--r--fs/nfs/nfs4proc.c138
-rw-r--r--fs/nfs/nfs4session.c7
-rw-r--r--fs/nfs/nfs4session.h7
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/nfs4trace.h25
-rw-r--r--fs/nfs/nfs4xdr.c530
-rw-r--r--fs/nfs/nfstrace.c1
-rw-r--r--fs/nfs/nfstrace.h85
-rw-r--r--fs/nfs/pagelist.c47
-rw-r--r--fs/nfs/pnfs.c33
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/pnfs_dev.c13
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfs/unlink.c8
-rw-r--r--fs/nfs/write.c19
-rw-r--r--fs/nfsd/nfs4callback.c13
40 files changed, 1291 insertions, 1095 deletions
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 214a2fa1f1e3..7df6324ccb8a 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -75,17 +75,6 @@ static void nlm4_compute_offsets(const struct nlm_lock *lock,
}
/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("lockd: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
-
-/*
* Encode/decode NLMv4 basic data types
*
* Basic NLMv4 data types are defined in Appendix II, section 6.1.4
@@ -176,7 +165,6 @@ out_size:
dprintk("NFS: returned cookie was too long: %u\n", length);
return -EIO;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -236,7 +224,6 @@ out_bad_xdr:
__func__, be32_to_cpup(p));
return -EIO;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -309,7 +296,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
out:
return error;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 747b9c8c940a..4df62f635529 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -71,17 +71,6 @@ static void nlm_compute_offsets(const struct nlm_lock *lock,
}
/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("lockd: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
-
-/*
* Encode/decode NLMv3 basic data types
*
* Basic NLMv3 data types are not defined in an IETF standards
@@ -173,7 +162,6 @@ out_size:
dprintk("NFS: returned cookie was too long: %u\n", length);
return -EIO;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -231,7 +219,6 @@ out_enum:
__func__, be32_to_cpup(p));
return -EIO;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -303,7 +290,6 @@ static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
out:
return error;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a87a56273407..06233bfa6d73 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -72,16 +72,6 @@ static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
return xdr_ressize_check(rqstp, p);
}
-static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes)
-{
- __be32 *p;
-
- p = xdr_inline_decode(xdr, nbytes);
- if (unlikely(p == NULL))
- printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
- return p;
-}
-
static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
const char **str, size_t maxlen)
{
@@ -98,13 +88,13 @@ static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
{
__be32 *p;
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
fh->size = ntohl(*p);
if (fh->size > NFS4_FHSIZE)
return htonl(NFS4ERR_BADHANDLE);
- p = read_buf(xdr, fh->size);
+ p = xdr_inline_decode(xdr, fh->size);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(&fh->data[0], p, fh->size);
@@ -117,11 +107,11 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
__be32 *p;
unsigned int attrlen;
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
attrlen = ntohl(*p);
- p = read_buf(xdr, attrlen << 2);
+ p = xdr_inline_decode(xdr, attrlen << 2);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
if (likely(attrlen > 0))
@@ -135,7 +125,7 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
__be32 *p;
- p = read_buf(xdr, NFS4_STATEID_SIZE);
+ p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(stateid->data, p, NFS4_STATEID_SIZE);
@@ -156,7 +146,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
status = decode_string(xdr, &hdr->taglen, &hdr->tag, CB_OP_TAGLEN_MAXSZ);
if (unlikely(status != 0))
return status;
- p = read_buf(xdr, 12);
+ p = xdr_inline_decode(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
hdr->minorversion = ntohl(*p++);
@@ -176,7 +166,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
{
__be32 *p;
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE_HDR);
*op = ntohl(*p);
@@ -205,7 +195,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp,
status = decode_delegation_stateid(xdr, &args->stateid);
if (unlikely(status != 0))
return status;
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
args->truncate = ntohl(*p);
@@ -227,7 +217,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
__be32 status = 0;
uint32_t iomode;
- p = read_buf(xdr, 4 * sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, 4 * sizeof(uint32_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
@@ -245,14 +235,14 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
if (unlikely(status != 0))
return status;
- p = read_buf(xdr, 2 * sizeof(uint64_t));
+ p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbl_range.offset);
p = xdr_decode_hyper(p, &args->cbl_range.length);
return decode_layout_stateid(xdr, &args->cbl_stateid);
} else if (args->cbl_recall_type == RETURN_FSID) {
- p = read_buf(xdr, 2 * sizeof(uint64_t));
+ p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbl_fsid.major);
@@ -275,7 +265,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
args->ndevs = 0;
/* Num of device notifications */
- p = read_buf(xdr, sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto out;
@@ -298,7 +288,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
for (i = 0; i < n; i++) {
struct cb_devicenotifyitem *dev = &args->devs[i];
- p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE);
+ p = xdr_inline_decode(xdr, (4 * sizeof(uint32_t)) +
+ NFS4_DEVICEID4_SIZE);
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto err;
@@ -329,7 +320,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
- p = read_buf(xdr, sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto err;
@@ -359,7 +350,7 @@ static __be32 decode_sessionid(struct xdr_stream *xdr,
{
__be32 *p;
- p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
@@ -379,13 +370,13 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
goto out;
status = htonl(NFS4ERR_RESOURCE);
- p = read_buf(xdr, sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL))
goto out;
rc_list->rcl_nrefcalls = ntohl(*p++);
if (rc_list->rcl_nrefcalls) {
- p = read_buf(xdr,
+ p = xdr_inline_decode(xdr,
rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
if (unlikely(p == NULL))
goto out;
@@ -418,7 +409,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
if (status)
return status;
- p = read_buf(xdr, 5 * sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, 5 * sizeof(uint32_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
@@ -461,7 +452,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
uint32_t bitmap[2];
__be32 *p, status;
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
args->craa_objs_to_keep = ntohl(*p++);
@@ -480,7 +471,7 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
struct cb_recallslotargs *args = argp;
__be32 *p;
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
args->crsa_target_highest_slotid = ntohl(*p++);
@@ -492,14 +483,14 @@ static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_arg
__be32 *p;
unsigned int len;
- p = read_buf(xdr, 12);
+ p = xdr_inline_decode(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbnl_owner.clientid);
len = be32_to_cpu(*p);
- p = read_buf(xdr, len);
+ p = xdr_inline_decode(xdr, len);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
@@ -537,7 +528,7 @@ static __be32 decode_write_response(struct xdr_stream *xdr,
__be32 *p;
/* skip the always zero field */
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out;
p++;
@@ -577,7 +568,7 @@ static __be32 decode_offload_args(struct svc_rqst *rqstp,
return status;
/* decode status */
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out;
args->error = ntohl(*p++);
@@ -943,10 +934,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
};
unsigned int nops = 0;
- xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+ xdr_init_decode(&xdr_in, &rqstp->rq_arg,
+ rqstp->rq_arg.head[0].iov_base, NULL);
p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
- xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
+ xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL);
status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
if (status == htonl(NFS4ERR_RESOURCE))
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 885363ca8569..2f6b447cdd82 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -229,6 +229,8 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
spin_lock(&delegation->lock);
if (delegation->inode != NULL)
inode = igrab(delegation->inode);
+ if (!inode)
+ set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
spin_unlock(&delegation->lock);
return inode;
}
@@ -681,7 +683,7 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
/**
* nfs_super_return_all_delegations - return delegations for one superblock
- * @sb: sb to process
+ * @server: pointer to nfs_server to process
*
*/
void nfs_server_return_all_delegations(struct nfs_server *server)
@@ -944,10 +946,11 @@ restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
- if (test_bit(NFS_DELEGATION_RETURNING,
- &delegation->flags))
- continue;
- if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
+ if (test_bit(NFS_DELEGATION_INODE_FREEING,
+ &delegation->flags) ||
+ test_bit(NFS_DELEGATION_RETURNING,
+ &delegation->flags) ||
+ test_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags) == 0)
continue;
if (!nfs_sb_active(server->super))
@@ -1053,10 +1056,11 @@ restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
- if (test_bit(NFS_DELEGATION_RETURNING,
- &delegation->flags))
- continue;
- if (test_bit(NFS_DELEGATION_TEST_EXPIRED,
+ if (test_bit(NFS_DELEGATION_INODE_FREEING,
+ &delegation->flags) ||
+ test_bit(NFS_DELEGATION_RETURNING,
+ &delegation->flags) ||
+ test_bit(NFS_DELEGATION_TEST_EXPIRED,
&delegation->flags) == 0)
continue;
if (!nfs_sb_active(server->super))
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index dcbf3394ba0e..35b4b02c1ae0 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -34,6 +34,7 @@ enum {
NFS_DELEGATION_RETURNING,
NFS_DELEGATION_REVOKED,
NFS_DELEGATION_TEST_EXPIRED,
+ NFS_DELEGATION_INODE_FREEING,
};
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 6bf4471850c8..a71d0b42d160 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -139,12 +139,19 @@ struct nfs_cache_array {
struct nfs_cache_array_entry array[0];
};
+struct readdirvec {
+ unsigned long nr;
+ unsigned long index;
+ struct page *pages[NFS_MAX_READDIR_RAPAGES];
+};
+
typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
typedef struct {
struct file *file;
struct page *page;
struct dir_context *ctx;
unsigned long page_index;
+ struct readdirvec pvec;
u64 *dir_cookie;
u64 last_cookie;
loff_t current_index;
@@ -524,6 +531,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
struct nfs_cache_array *array;
unsigned int count = 0;
int status;
+ int max_rapages = NFS_MAX_READDIR_RAPAGES;
+
+ desc->pvec.index = desc->page_index;
+ desc->pvec.nr = 0;
scratch = alloc_page(GFP_KERNEL);
if (scratch == NULL)
@@ -548,20 +559,40 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
if (desc->plus)
nfs_prime_dcache(file_dentry(desc->file), entry);
- status = nfs_readdir_add_to_array(entry, page);
+ status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
+ if (status == -ENOSPC) {
+ desc->pvec.nr++;
+ if (desc->pvec.nr == max_rapages)
+ break;
+ status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
+ }
if (status != 0)
break;
} while (!entry->eof);
+ /*
+ * page and desc->pvec.pages[0] are valid, don't need to check
+ * whether or not to be NULL.
+ */
+ copy_highpage(page, desc->pvec.pages[0]);
+
out_nopages:
if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
- array = kmap(page);
+ array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]);
array->eof_index = array->size;
status = 0;
- kunmap(page);
+ kunmap_atomic(array);
}
put_page(scratch);
+
+ /*
+ * desc->pvec.nr > 0 means at least one page was completely filled,
+ * we should return -ENOSPC. Otherwise function
+ * nfs_readdir_xdr_to_array will enter infinite loop.
+ */
+ if (desc->pvec.nr > 0)
+ return -ENOSPC;
return status;
}
@@ -574,8 +605,8 @@ void nfs_readdir_free_pages(struct page **pages, unsigned int npages)
}
/*
- * nfs_readdir_large_page will allocate pages that must be freed with a call
- * to nfs_readdir_free_pagearray
+ * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
+ * to nfs_readdir_free_pages()
*/
static
int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
@@ -595,6 +626,24 @@ out_freepages:
return -ENOMEM;
}
+/*
+ * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure.
+ */
+static
+void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc)
+{
+ struct nfs_cache_array *array;
+ int max_rapages = NFS_MAX_READDIR_RAPAGES;
+ int index;
+
+ for (index = 0; index < max_rapages; index++) {
+ array = kmap_atomic(desc->pvec.pages[index]);
+ memset(array, 0, sizeof(struct nfs_cache_array));
+ array->eof_index = -1;
+ kunmap_atomic(array);
+ }
+}
+
static
int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
{
@@ -605,6 +654,12 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
int status = -ENOMEM;
unsigned int array_size = ARRAY_SIZE(pages);
+ /*
+ * This means we hit readdir rdpages miss, the preallocated rdpages
+ * are useless, the preallocate rdpages should be reinitialized.
+ */
+ nfs_readdir_rapages_init(desc);
+
entry.prev_cookie = 0;
entry.cookie = desc->last_cookie;
entry.eof = 0;
@@ -664,9 +719,24 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
struct inode *inode = file_inode(desc->file);
int ret;
- ret = nfs_readdir_xdr_to_array(desc, page, inode);
- if (ret < 0)
- goto error;
+ /*
+ * If desc->page_index in range desc->pvec.index and
+ * desc->pvec.index + desc->pvec.nr, we get readdir cache hit.
+ */
+ if (desc->page_index >= desc->pvec.index &&
+ desc->page_index < (desc->pvec.index + desc->pvec.nr)) {
+ /*
+ * page and desc->pvec.pages[x] are valid, don't need to check
+ * whether or not to be NULL.
+ */
+ copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]);
+ ret = 0;
+ } else {
+ ret = nfs_readdir_xdr_to_array(desc, page, inode);
+ if (ret < 0)
+ goto error;
+ }
+
SetPageUptodate(page);
if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
@@ -831,6 +901,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
*desc = &my_desc;
struct nfs_open_dir_context *dir_ctx = file->private_data;
int res = 0;
+ int max_rapages = NFS_MAX_READDIR_RAPAGES;
dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
file, (long long)ctx->pos);
@@ -850,6 +921,12 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->decode = NFS_PROTO(inode)->decode_dirent;
desc->plus = nfs_use_readdirplus(inode, ctx);
+ res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages);
+ if (res < 0)
+ return -ENOMEM;
+
+ nfs_readdir_rapages_init(desc);
+
if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
res = nfs_revalidate_mapping(inode, file->f_mapping);
if (res < 0)
@@ -885,6 +962,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;
} while (!desc->eof);
out:
+ nfs_readdir_free_pages(desc->pvec.pages, max_rapages);
if (res > 0)
res = 0;
dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
@@ -945,7 +1023,7 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
/**
* nfs_force_lookup_revalidate - Mark the directory as having changed
- * @dir - pointer to directory inode
+ * @dir: pointer to directory inode
*
* This forces the revalidation code in nfs_lookup_revalidate() to do a
* full lookup on all child dentries of 'dir' whenever a change occurs
@@ -1649,7 +1727,7 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
reval_dentry:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
+ return nfs_lookup_revalidate_dentry(dir, dentry, inode);
full_reval:
return nfs_do_lookup_revalidate(dir, dentry, flags);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 33824a0a57bf..0fd811ac08b5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -428,7 +428,7 @@ out_put:
hdr->release(hdr);
}
-static void nfs_read_sync_pgio_error(struct list_head *head)
+static void nfs_read_sync_pgio_error(struct list_head *head, int error)
{
struct nfs_page *req;
@@ -664,8 +664,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
if (!nfs_pageio_add_request(&desc, req)) {
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &failed);
+ nfs_list_move_request(req, &failed);
spin_lock(&cinfo.inode->i_lock);
dreq->flags = 0;
if (desc.pg_error < 0)
@@ -821,7 +820,7 @@ out_put:
hdr->release(hdr);
}
-static void nfs_write_sync_pgio_error(struct list_head *head)
+static void nfs_write_sync_pgio_error(struct list_head *head, int error)
{
struct nfs_page *req;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 29553fdba8af..4899b85f9b3c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -89,8 +89,8 @@ EXPORT_SYMBOL_GPL(nfs_file_release);
/**
* nfs_revalidate_size - Revalidate the file size
- * @inode - pointer to inode struct
- * @file - pointer to struct file
+ * @inode: pointer to inode struct
+ * @filp: pointer to struct file
*
* Revalidates the file length. This is basically a wrapper around
* nfs_revalidate_inode() that takes into account the fact that we may
@@ -276,6 +276,12 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
* then a modify/write/read cycle when writing to a page in the
* page cache.
*
+ * Some pNFS layout drivers can only read/write at a certain block
+ * granularity like all block devices and therefore we must perform
+ * read/modify/write whenever a page hasn't read yet and the data
+ * to be written there is not aligned to a block boundary and/or
+ * smaller than the block size.
+ *
* The modify/write/read cycle may occur if a page is read before
* being completely filled by the writer. In this situation, the
* page must be completely written to stable storage on the server
@@ -291,26 +297,32 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
* and that the new data won't completely replace the old data in
* that range of the file.
*/
-static int nfs_want_read_modify_write(struct file *file, struct page *page,
- loff_t pos, unsigned len)
+static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len)
{
unsigned int pglen = nfs_page_length(page);
unsigned int offset = pos & (PAGE_SIZE - 1);
unsigned int end = offset + len;
- if (pnfs_ld_read_whole_page(file->f_mapping->host)) {
- if (!PageUptodate(page))
- return 1;
- return 0;
- }
+ return !pglen || (end >= pglen && !offset);
+}
- if ((file->f_mode & FMODE_READ) && /* open for read? */
- !PageUptodate(page) && /* Uptodate? */
- !PagePrivate(page) && /* i/o request already? */
- pglen && /* valid bytes of file? */
- (end < pglen || offset)) /* replace all valid bytes? */
- return 1;
- return 0;
+static bool nfs_want_read_modify_write(struct file *file, struct page *page,
+ loff_t pos, unsigned int len)
+{
+ /*
+ * Up-to-date pages, those with ongoing or full-page write
+ * don't need read/modify/write
+ */
+ if (PageUptodate(page) || PagePrivate(page) ||
+ nfs_full_page_write(page, pos, len))
+ return false;
+
+ if (pnfs_ld_read_whole_page(file->f_mapping->host))
+ return true;
+ /* Open for reading too? */
+ if (file->f_mode & FMODE_READ)
+ return true;
+ return false;
}
/*
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 63abe705f4ca..f9264e1922a2 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -410,7 +410,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
for (i = 0; i < fls->mirror_array_cnt; i++) {
struct nfs4_ff_layout_mirror *mirror;
struct cred *kcred;
- const struct cred *cred;
+ const struct cred __rcu *cred;
kuid_t uid;
kgid_t gid;