summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 18:13:19 -0600
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 18:13:19 -0600
commit06b8ab55289345ab191bf4bf0e4acc6d4bdf293d (patch)
tree9af9215097e26c026f30a58c6ca3092ec15d1e1e /fs
parentdc1cc85133120e49c223f36aa77d398b8abac727 (diff)
parent71a6ec8ac587418ceb6b420def1ca44b334c1ff7 (diff)
Merge tag 'nfs-for-3.17-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: - stable fix for a bug in nfs3_list_one_acl() - speed up NFS path walks by supporting LOOKUP_RCU - more read/write code cleanups - pNFS fixes for layout return on close - fixes for the RCU handling in the rpcsec_gss code - more NFS/RDMA fixes" * tag 'nfs-for-3.17-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits) nfs: reject changes to resvport and sharecache during remount NFS: Avoid infinite loop when RELEASE_LOCKOWNER getting expired error SUNRPC: remove all refcounting of groupinfo from rpcauth_lookupcred NFS: fix two problems in lookup_revalidate in RCU-walk NFS: allow lockless access to access_cache NFS: teach nfs_lookup_verify_inode to handle LOOKUP_RCU NFS: teach nfs_neg_need_reval to understand LOOKUP_RCU NFS: support RCU_WALK in nfs_permission() sunrpc/auth: allow lockless (rcu) lookup of credential cache. NFS: prepare for RCU-walk support but pushing tests later in code. NFS: nfs4_lookup_revalidate: only evaluate parent if it will be used. NFS: add checks for returned value of try_module_get() nfs: clear_request_commit while holding i_lock pnfs: add pnfs_put_lseg_async pnfs: find swapped pages on pnfs commit lists too nfs: fix comment and add warn_on for PG_INODE_REF nfs: check wait_on_bit_lock err in page_group_lock sunrpc: remove "ec" argument from encrypt_v2 operation sunrpc: clean up sparse endianness warnings in gss_krb5_wrap.c sunrpc: clean up sparse endianness warnings in gss_krb5_seal.c ...
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c101
-rw-r--r--fs/nfs/callback.c12
-rw-r--r--fs/nfs/client.c18
-rw-r--r--fs/nfs/delegation.c34
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c208
-rw-r--r--fs/nfs/direct.c33
-rw-r--r--fs/nfs/filelayout/filelayout.c298
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c2
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/internal.h11
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs4_fs.h32
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4proc.c248
-rw-r--r--fs/nfs/nfs4state.c69
-rw-r--r--fs/nfs/nfs4trace.h28
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c81
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c276
-rw-r--r--fs/nfs/pnfs.c178
-rw-r--r--fs/nfs/pnfs.h45
-rw-r--r--fs/nfs/proc.c27
-rw-r--r--fs/nfs/read.c54
-rw-r--r--fs/nfs/super.c12
-rw-r--r--fs/nfs/write.c150
-rw-r--r--fs/nfs_common/nfsacl.c5
30 files changed, 1124 insertions, 870 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9b431f44fad9..cbb1797149d5 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
SetPageUptodate(bvec->bv_page);
if (err) {
- struct nfs_pgio_data *rdata = par->data;
- struct nfs_pgio_header *header = rdata->header;
+ struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
header->pnfs_error = -EIO;
@@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *rdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_pgio_data, task);
- pnfs_ld_read_done(rdata);
+ hdr = container_of(task, struct nfs_pgio_header, task);
+ pnfs_ld_read_done(hdr);
}
static void
bl_end_par_io_read(void *data, int unused)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
- rdata->task.tk_status = rdata->header->pnfs_error;
- INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
- schedule_work(&rdata->task.u.tk_work);
+ hdr->task.tk_status = hdr->pnfs_error;
+ INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
+ schedule_work(&hdr->task.u.tk_work);
}
static enum pnfs_try_status
-bl_read_pagelist(struct nfs_pgio_data *rdata)
+bl_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *header = rdata->header;
+ struct nfs_pgio_header *header = hdr;
int i, hole;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, extent_length = 0;
struct parallel_io *par;
- loff_t f_offset = rdata->args.offset;
- size_t bytes_left = rdata->args.count;
+ loff_t f_offset = hdr->args.offset;
+ size_t bytes_left = hdr->args.count;
unsigned int pg_offset, pg_len;
- struct page **pages = rdata->args.pages;
- int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+ struct page **pages = hdr->args.pages;
+ int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
const bool is_dio = (header->dreq != NULL);
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
- rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
+ hdr->page_array.npages, f_offset,
+ (unsigned int)hdr->args.count);
- par = alloc_parallel(rdata);
+ par = alloc_parallel(hdr);
if (!par)
goto use_mds;
par->pnfs_callback = bl_end_par_io_read;
@@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
isect = (sector_t) (f_offset >> SECTOR_SHIFT);
/* Code assumes extents are page-aligned */
- for (i = pg_index; i < rdata->pages.npages; i++) {
+ for (i = pg_index; i < hdr->page_array.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
@@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be;
- bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
+ bio = do_add_page_to_bio(bio,
+ hdr->page_array.npages - i,
READ,
isect, pages[i], be_read,
bl_end_io_read, par,
@@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
extent_length -= PAGE_CACHE_SECTORS;
}
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
- rdata->res.eof = 1;
- rdata->res.count = header->inode->i_size - rdata->args.offset;
+ hdr->res.eof = 1;
+ hdr->res.count = header->inode->i_size - hdr->args.offset;
} else {
- rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
+ hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
}
out:
bl_put_extent(be);
@@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
}
if (unlikely(err)) {
- struct nfs_pgio_data *data = par->data;
- struct nfs_pgio_header *header = data->header;
+ struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
header->pnfs_error = -EIO;
@@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_pgio_data *data = par->data;
- struct nfs_pgio_header *header = data->header;
+ struct nfs_pgio_header *header = par->data;
if (!uptodate) {
if (!header->pnfs_error)
@@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *wdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_pgio_data, task);
- if (likely(!wdata->header->pnfs_error)) {
+ hdr = container_of(task, struct nfs_pgio_header, task);
+ if (likely(!hdr->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
- mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
- wdata->args.offset, wdata->args.count);
+ mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
+ hdr->args.offset, hdr->args.count);
}
- pnfs_ld_write_done(wdata);
+ pnfs_ld_write_done(hdr);
}
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (unlikely(wdata->header->pnfs_error)) {
- bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
+ if (unlikely(hdr->pnfs_error)) {
+ bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
num_se);
}
- wdata->task.tk_status = wdata->header->pnfs_error;
- wdata->verf.committed = NFS_FILE_SYNC;
- INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
- schedule_work(&wdata->task.u.tk_work);
+ hdr->task.tk_status = hdr->pnfs_error;
+ hdr->verf.committed = NFS_FILE_SYNC;
+ INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
+ schedule_work(&hdr->task.u.tk_work);
}
/* FIXME STUB - mark intersection of layout and page as bad, so is not
@@ -673,18 +672,17 @@ check_page:
}
static enum pnfs_try_status
-bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_header *header, int sync)
{
- struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, last_isect = 0, extent_length = 0;
struct parallel_io *par = NULL;
- loff_t offset = wdata->args.offset;
- size_t count = wdata->args.count;
+ loff_t offset = header->args.offset;
+ size_t count = header->args.count;
unsigned int pg_offset, pg_len, saved_len;
- struct page **pages = wdata->args.pages;
+ struct page **pages = header->args.pages;
struct page *page;
pgoff_t index;
u64 temp;
@@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
goto out_mds;
}
- /* At this point, wdata->pages is a (sequential) list of nfs_pages.
+ /* At this point, header->page_aray is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
*/
- par = alloc_parallel(wdata);
+ par = alloc_parallel(header);
if (!par)
goto out_mds;
par->pnfs_callback = bl_end_par_io_write;
@@ -790,8 +788,8 @@ next_page:
bio = bl_submit_bio(WRITE, bio);
/* Middle pages */
- pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
- for (i = pg_index; i < wdata->pages.npages; i++) {
+ pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+ for (i = pg_index; i < header->page_array.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
@@ -862,7 +860,8 @@ next_page:
}
- bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+ bio = do_add_page_to_bio(bio, header->page_array.npages - i,
+ WRITE,
isect, pages[i], be,
bl_end_io_write, par,
pg_offset, pg_len);
@@ -890,7 +889,7 @@ next_page:
}
write_done:
- wdata->res.count = wdata->args.count;
+ header->res.count = header->args.count;
out:
bl_put_extent(be);
bl_put_extent(cow_read);
@@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
return ERR_PTR(-ENOMEM);
}
- pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
+ pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
kfree(dev);
return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf67ed9..54de482143cc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
if (p == NULL)
return 0;
+ /*
+ * Did we get the acceptor from userland during the SETCLIENID
+ * negotiation?
+ */
+ if (clp->cl_acceptor)
+ return !strcmp(p, clp->cl_acceptor);
+
+ /*
+ * Otherwise try to verify it using the cl_hostname. Note that this
+ * doesn't work if a non-canonical hostname was used in the devname.
+ */
+
/* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
if (memcmp(p, "nfs@", 4) != 0)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 180d1ec9c32e..1c5ff6d58385 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
mutex_unlock(&nfs_version_mutex);
}
- if (!IS_ERR(nfs))
- try_module_get(nfs->owner);
+ if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
+ return ERR_PTR(-EAGAIN);
return nfs;
}
@@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
goto error_0;
clp->cl_nfs_mod = cl_init->nfs_mod;
- try_module_get(clp->cl_nfs_mod->owner);
+ if (!try_module_get(clp->cl_nfs_mod->owner))
+ goto error_dealloc;
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
@@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
error_cleanup:
put_nfs_version(clp->cl_nfs_mod);
+error_dealloc:
kfree(clp);
error_0:
return ERR_PTR(err);
@@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp)
put_net(clp->cl_net);
put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
+ kfree(clp->cl_acceptor);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
+ if (cl_init->hostname == NULL) {
+ WARN_ON(1);
+ return NULL;
+ }
+
dprintk("--> nfs_get_client(%s,v%u)\n",
- cl_init->hostname ?: "", rpc_ops->version);
+ cl_init->hostname, rpc_ops->version);
/* see if the client already exists */
do {
@@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
} while (!IS_ERR(new));
dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
- cl_init->hostname ?: "", PTR_ERR(new));
+ cl_init->hostname, PTR_ERR(new));
return new;
}
EXPORT_SYMBOL_GPL(nfs_get_client);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5d8ccecf5f5c..5853f53db732 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
-/**
- * nfs_have_delegation - check if inode has a delegation
- * @inode: inode to check
- * @flags: delegation types to check for
- *
- * Returns one if inode has the indicated delegation, otherwise zero.
- */
-int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+static int
+nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
@@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL && (delegation->type & flags) == flags &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
- nfs_mark_delegation_referenced(delegation);
+ if (mark)
+ nfs_mark_delegation_referenced(delegation);
ret = 1;
}
rcu_read_unlock();
return ret;
}
+/**
+ * nfs_have_delegation - check if inode has a delegation, mark it
+ * NFS_DELEGATION_REFERENCED if there is one.
+ * @inode: inode to check
+ * @flags: delegation types to check for
+ *
+ * Returns one if inode has the indicated delegation, otherwise zero.
+ */
+int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+{
+ return nfs4_do_check_delegation(inode, flags, true);
+}
+
+/*
+ * nfs4_check_delegation - check if inode has a delegation, do not mark
+ * NFS_DELEGATION_REFERENCED if it has one.
+ */
+int nfs4_check_delegation(struct inode *inode, fmode_t flags)
+{
+ return nfs4_do_check_delegation(inode, flags, false);
+}
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6d..5c1cce39297f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
+int nfs4_check_delegation(struct inode *inode, fmode_t flags);
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4a3d4ef76127..36d921f0c602 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
* A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy
* and may need to be looked up again.
+ * If rcu_walk prevents us from performing a full check, return 0.
*/
-static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
+ int rcu_walk)
{
+ int ret;
+
if (IS_ROOT(dentry))
return 1;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0;
/* Revalidate nfsi->cache_change_attribute before we declare a match */
- if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+ if (rcu_walk)
+ ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
+ else
+ ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+ if (ret < 0)
return 0;
if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0;
@@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
out:
return (inode->i_nlink == 0) ? -ENOENT : 0;
out_force:
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
ret = __nfs_revalidate_inode(server, inode);
if (ret != 0)
return ret;
@@ -1054,6 +1064,9 @@ out_force:
*
* If parent mtime has changed, we revalidate, else we wait for a
* period corresponding to the parent's attribute cache timeout value.
+ *
+ * If LOOKUP_RCU prevents us from performing a full check, return 1
+ * suggesting a reval is needed.
*/
static inline
int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1;
- return !nfs_check_verifier(dir, dentry);
+ return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
/*
@@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
struct nfs4_label *label = NULL;
int error;
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- parent = dget_parent(dentry);
- dir = parent->d_inode;
+ if (flags & LOOKUP_RCU) {
+ parent = ACCESS_ONCE(dentry->d_parent);
+ dir = ACCESS_ONCE(parent->d_inode);
+ if (!dir)
+ return -ECHILD;
+ } else {
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ }
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode;
if (!inode) {
- if (nfs_neg_need_reval(dir, dentry, flags))
+ if (nfs_neg_need_reval(dir, dentry, flags)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
goto out_bad;
+ }
goto out_valid_noent;
}
if (is_bad_inode(inode)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
__func__, dentry);
goto out_bad;
@@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto out_set_verifier;
/* Force a full look up iff the parent directory has changed */
- if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
- if (nfs_lookup_verify_inode(inode, flags))
+ if (!nfs_is_exclusive_create(dir, flags) &&
+ nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
+
+ if (nfs_lookup_verify_inode(inode, flags)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
goto out_zap_parent;
+ }
goto out_valid;
}
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
if (NFS_STALE(inode))
goto out_bad;
@@ -1153,13 +1183,18 @@ out_set_verifier:
/* Success: notify readdir to use READDIRPLUS */
nfs_advise_use_readdirplus(dir);
out_valid_noent:
- dput(parent);
+ if (flags & LOOKUP_RCU) {
+ if (parent != ACCESS_ONCE(dentry->d_parent))
+ return -ECHILD;
+ } else
+ dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
__func__, dentry);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
out_bad:
+ WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
@@ -1185,6 +1220,7 @@ out_zap_parent:
__func__, dentry);
return 0;
out_error:
+ WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
@@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent = NULL;
struct inode *inode;
- struct inode *dir;
int ret = 0;
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
goto no_open;
if (d_mountpoint(dentry))
@@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto no_open;
inode = dentry->d_inode;
- parent = dget_parent(dentry);
- dir = parent->d_inode;
/* We can't create new files in nfs_open_revalidate(), so we
* optimize away revalidation of negative dentries.
*/
if (inode == NULL) {
+ struct dentry *parent;
+ struct inode *dir;
+
+ if (flags & LOOKUP_RCU) {
+ parent = ACCESS_ONCE(dentry->d_parent);
+ dir = ACCESS_ONCE(parent->d_inode);
+ if (!dir)
+ return -ECHILD;
+ } else {
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ }
if (!nfs_neg_need_reval(dir, dentry, flags))
ret = 1;
+ else if (flags & LOOKUP_RCU)
+ ret = -ECHILD;
+ if (!(flags & LOOKUP_RCU))
+ dput(parent);
+ else if (parent != ACCESS_ONCE(dentry->d_parent))
+ return -ECHILD;
goto out;
}
/* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode))
- goto no_open_dput;
+ goto no_open;
/* We cannot do exclusive creation on a positive dentry */
if (flags & LOOKUP_EXCL)
- goto no_open_dput;
+ goto no_open;
/* Let f_op->open() actually open (and revalidate) the file */
ret = 1;
out:
- dput(parent);
return ret;
-no_open_dput:
- dput(parent);
no_open:
return nfs_lookup_revalidate(dentry, flags);
}
@@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock);
static LIST_HEAD(nfs_access_lru_list);
static atomic_long_t nfs_access_nr_entries;
+static unsigned long nfs_access_max_cachesize = ULONG_MAX;
+module_param(nfs_access_max_cachesize, ulong, 0644);
+MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
+
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
put_rpccred(entry->cred);
- kfree(entry);
+ kfree_rcu(entry, rcu_head);
smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
smp_mb__after_atomic();
@@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-unsigned long
-nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+nfs_do_access_cache_scan(unsigned int nr_to_scan)
{
LIST_HEAD(head);
struct nfs_inode *nfsi, *next;
struct nfs_access_entry *cache;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
long freed = 0;
- if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
- return SHRINK_STOP;
-
spin_lock(&nfs_access_lru_lock);
list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
struct inode *inode;
@@ -2094,11 +2137,39 @@ remove_lru_entry:
}
unsigned long
+nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int nr_to_scan = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
+
+ if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return SHRINK_STOP;
+ return nfs_do_access_cache_scan(nr_to_scan);
+}
+
+
+unsigned long
nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
}
+static void
+nfs_access_cache_enforce_limit(void)
+{
+ long nr_entries = atomic_long_read(&nfs_access_nr_entries);
+ unsigned long diff;
+ unsigned int nr_to_scan;
+
+ if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
+ return;
+ nr_to_scan = 100;
+ diff = nr_entries - nfs_access_max_cachesize;
+ if (diff < nr_to_scan)
+ nr_to_scan = diff;
+ nfs_do_access_cache_scan(nr_to_scan);
+}
+
static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
{
struct rb_root *root_node = &nfsi->access_cache;
@@ -2186,6 +2257,38 @@ out_zap:
return -ENOENT;
}
+static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+ /* Only check the most recently returned cache entry,
+ * but do it without locking.
+ */
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_access_entry *cache;
+ int err = -ECHILD;
+ struct list_head *lh;
+
+ rcu_read_lock();
+ if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+ goto out;
+ lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
+ cache = list_entry(lh, struct nfs_access_entry, lru);
+ if (lh == &nfsi->access_cache_entry_lru ||
+ cred != cache->cred)
+ cache = NULL;
+ if (cache == NULL)
+ goto out;
+ if (!nfs_have_delegated_attributes(inode) &&
+ !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+ goto out;
+ res->jiffies = cache->jiffies;
+ res->cred = cache->cred;
+ res->mask = cache->mask;
+ err = 0;
+out:
+ rcu_read_unlock();
+ return err;
+}
+
static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
cache->cred = get_rpccred(set->cred);
cache->mask = set->mask;
+ /* The above field assignments must be visible
+ * before this item appears on the lru. We cannot easily
+ * use rcu_assign_pointer, so just force the memory barrier.
+ */
+ smp_wmb();
nfs_access_add_rbtree(inode, cache);
/* Update accounting */
@@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
&nfs_access_lru_list);
spin_unlock(&nfs_access_lru_lock);
}
+ nfs_access_cache_enforce_limit();
}
EXPORT_SYMBOL_GPL(nfs_access_add_cache);
@@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
trace_nfs_access_enter(inode);
- status = nfs_access_get_cached(inode, cred, &cache);
+ status = nfs_access_get_cached_rcu(inode, cred, &cache);
+ if (status != 0)
+ status = nfs_access_get_cached(inode, cred, &cache);
if (status == 0)
goto out_cached;
+ status = -ECHILD;
+ if (mask & MAY_NOT_BLOCK)
+ goto out;
+
/* Be clever: ask server to check for all possible rights */
cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
cache.cred = cred;
@@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask)
struct rpc_cred *cred;
int res = 0;
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
-
nfs_inc_stats(inode, NFSIOS_VFSACCESS);
if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2350,12 +2462,23 @@ force_lookup:
if (!NFS_PROTO(inode)->access)
goto out_notsup;
- cred = rpc_lookup_cred();
- if (!IS_ERR(cred)) {
- res = nfs_do_access(inode, cred, mask);
- put_rpccred(cred);
- } else
+ /* Always try fast lookups first */
+ rcu_read_lock();
+ cred = rpc_lookup_cred_nonblock();
+ if (!IS_ERR(cred))
+ res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
+