summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 20:51:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 20:51:58 -0700
commite5fef2a9732580c5bd30c0097f5e9091a3d58ce5 (patch)
treecb91b5a51f7e451300e06e203a50cbed8aabbc1c /fs
parent149e703cb8bfcbdae46140b108bb6f7d2407df8f (diff)
parentf5e4546347bc847be30b3cf904db5fc874b3c5dc (diff)
Merge tag 'afs-next-20190507' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull AFS updates from David Howells: "A set of fix and development patches for AFS for 5.2. Summary: - Fix the AFS file locking so that sqlite can run on an AFS mount and also so that firefox and gnome can use a homedir that's mounted through AFS. This required emulation of fine-grained locking when the server will only support whole-file locks and no upgrade/downgrade. Four modes are provided, settable by mount parameter: "flock=local" - No reference to the server "flock=openafs" - Fine-grained locks are local-only, whole-file locks require sufficient server locks "flock=strict" - All locks require sufficient server locks "flock=write" - Always get an exclusive server lock If the volume is a read-only or backup volume, then flock=local for that volume. - Log extra information for a couple of cases where the client mucks up somehow: AFS vnode with undefined type and dir check failure - in both cases we seem to end up with unfilled data, but the issues happen infrequently and are difficult to reproduce at will. - Implement silly rename for unlink() and rename(). - Set i_blocks so that du can get some information about usage. - Fix xattr handlers to return the right amount of data and to not overflow buffers. - Implement getting/setting raw AFS and YFS ACLs as xattrs" * tag 'afs-next-20190507' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: afs: Implement YFS ACL setting afs: Get YFS ACLs and information through xattrs afs: implement acl setting afs: Get an AFS3 ACL as an xattr afs: Fix getting the afs.fid xattr afs: Fix the afs.cell and afs.volume xattr handlers afs: Calculate i_blocks based on file size afs: Log more information for "kAFS: AFS vnode with undefined type\n" afs: Provide mount-time configurable byte-range file locking emulation afs: Add more tracepoints afs: Implement sillyrename for unlink and rename afs: Add directory reload tracepoint afs: Handle lock rpc ops failing on a file that got deleted afs: Improve dir check failure reports afs: Add file locking tracepoints afs: Further fix file locking afs: Fix AFS file locking to allow fine grained locks afs: Calculate lock extend timer from set/extend reply reception afs: Split wait from afs_make_call()
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/Makefile1
-rw-r--r--fs/afs/afs_fs.h2
-rw-r--r--fs/afs/dir.c167
-rw-r--r--fs/afs/dir_silly.c239
-rw-r--r--fs/afs/flock.c616
-rw-r--r--fs/afs/fs_probe.c13
-rw-r--r--fs/afs/fsclient.c277
-rw-r--r--fs/afs/inode.c43
-rw-r--r--fs/afs/internal.h64
-rw-r--r--fs/afs/protocol_yfs.h6
-rw-r--r--fs/afs/rxrpc.c33
-rw-r--r--fs/afs/super.c34
-rw-r--r--fs/afs/vl_probe.c14
-rw-r--r--fs/afs/vlclient.c26
-rw-r--r--fs/afs/xattr.c270
-rw-r--r--fs/afs/yfsclient.c329
16 files changed, 1761 insertions, 373 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 0738e2bf5193..cbf31f6cd177 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -13,6 +13,7 @@ kafs-y := \
cmservice.o \
dir.o \
dir_edit.o \
+ dir_silly.o \
dynroot.o \
file.o \
flock.o \
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index ddfa88a7a9c0..18a54ca422f8 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -17,8 +17,10 @@
enum AFS_FS_Operations {
FSFETCHDATA = 130, /* AFS Fetch file data */
+ FSFETCHACL = 131, /* AFS Fetch file ACL */
FSFETCHSTATUS = 132, /* AFS Fetch file status */
FSSTOREDATA = 133, /* AFS Store file data */
+ FSSTOREACL = 134, /* AFS Store file ACL */
FSSTORESTATUS = 135, /* AFS Store file status */
FSREMOVEFILE = 136, /* AFS Remove a file */
FSCREATEFILE = 137, /* AFS Create a file */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 8a2562e3a316..9a466be583d2 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -26,6 +26,7 @@ static int afs_dir_open(struct inode *inode, struct file *file);
static int afs_readdir(struct file *file, struct dir_context *ctx);
static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
static int afs_d_delete(const struct dentry *dentry);
+static void afs_d_iput(struct dentry *dentry, struct inode *inode);
static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
@@ -85,6 +86,7 @@ const struct dentry_operations afs_fs_dentry_operations = {
.d_delete = afs_d_delete,
.d_release = afs_d_release,
.d_automount = afs_d_automount,
+ .d_iput = afs_d_iput,
};
struct afs_lookup_one_cookie {
@@ -160,6 +162,38 @@ error:
}
/*
+ * Check the contents of a directory that we've just read.
+ */
+static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req)
+{
+ struct afs_xdr_dir_page *dbuf;
+ unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+
+ for (i = 0; i < req->nr_pages; i++)
+ if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len))
+ goto bad;
+ return true;
+
+bad:
+ pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n",
+ dvnode->fid.vid, dvnode->fid.vnode,
+ req->file_size, req->len, req->actual_len, req->remain);
+ pr_warn("DIR %llx %x %x %x\n",
+ req->pos, req->index, req->nr_pages, req->offset);
+
+ for (i = 0; i < req->nr_pages; i++) {
+ dbuf = kmap(req->pages[i]);
+ for (j = 0; j < qty; j++) {
+ union afs_xdr_dir_block *block = &dbuf->blocks[j];
+
+ pr_warn("[%02x] %32phN\n", i * qty + j, block);
+ }
+ kunmap(req->pages[i]);
+ }
+ return false;
+}
+
+/*
* open an AFS directory file
*/
static int afs_dir_open(struct inode *inode, struct file *file)
@@ -277,6 +311,7 @@ retry:
goto error;
if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+ trace_afs_reload_dir(dvnode);
ret = afs_fetch_data(dvnode, key, req);
if (ret < 0)
goto error_unlock;
@@ -288,10 +323,8 @@ retry:
/* Validate the data we just read. */
ret = -EIO;
- for (i = 0; i < req->nr_pages; i++)
- if (!afs_dir_check_page(dvnode, req->pages[i],
- req->actual_len))
- goto error_unlock;
+ if (!afs_dir_check_pages(dvnode, req))
+ goto error_unlock;
// TODO: Trim excess pages
@@ -743,7 +776,7 @@ success:
ti = afs_iget(dir->i_sb, key, &cookie->fids[i],
&cookie->statuses[i],
&cookie->callbacks[i],
- cbi);
+ cbi, dvnode);
if (i == 0) {
inode = ti;
} else {
@@ -875,8 +908,14 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
(void *)(unsigned long)dvnode->status.data_version;
}
d = d_splice_alias(inode, dentry);
- if (!IS_ERR_OR_NULL(d))
+ if (!IS_ERR_OR_NULL(d)) {
d->d_fsdata = dentry->d_fsdata;
+ trace_afs_lookup(dvnode, &d->d_name,
+ inode ? AFS_FS_I(inode) : NULL);
+ } else {
+ trace_afs_lookup(dvnode, &dentry->d_name,
+ inode ? AFS_FS_I(inode) : NULL);
+ }
return d;
}
@@ -1053,6 +1092,16 @@ zap:
}
/*
+ * Clean up sillyrename files on dentry removal.
+ */
+static void afs_d_iput(struct dentry *dentry, struct inode *inode)
+{
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ afs_silly_iput(dentry, inode);
+ iput(inode);
+}
+
+/*
* handle dentry release
*/
void afs_d_release(struct dentry *dentry)
@@ -1076,7 +1125,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
return;
inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
- newfid, newstatus, newcb, fc->cbi);
+ newfid, newstatus, newcb, fc->cbi, fc->vnode);
if (IS_ERR(inode)) {
/* ENOMEM or EINTR at a really inconvenient time - just abandon
* the new directory on the server.
@@ -1194,6 +1243,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
goto error_key;
}
+ if (vnode) {
+ ret = down_write_killable(&vnode->rmdir_lock);
+ if (ret < 0)
+ goto error_key;
+ }
+
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
@@ -1212,6 +1267,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
}
}
+ if (vnode)
+ up_write(&vnode->rmdir_lock);
error_key:
key_put(key);
error:
@@ -1228,9 +1285,9 @@ error:
* However, if we didn't have a callback promise outstanding, or it was
* outstanding on a different server, then it won't break it either...
*/
-static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
- unsigned long d_version_before,
- unsigned long d_version_after)
+int afs_dir_remove_link(struct dentry *dentry, struct key *key,
+ unsigned long d_version_before,
+ unsigned long d_version_after)
{
bool dir_valid;
int ret = 0;
@@ -1277,6 +1334,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
unsigned long d_version = (unsigned long)dentry->d_fsdata;
+ bool need_rehash = false;
u64 data_version = dvnode->status.data_version;
int ret;
@@ -1300,6 +1358,21 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
goto error_key;
}
+ spin_lock(&dentry->d_lock);
+ if (vnode && d_count(dentry) > 1) {
+ spin_unlock(&dentry->d_lock);
+ /* Start asynchronous writeout of the inode */
+ write_inode_now(d_inode(dentry), 0);
+ ret = afs_sillyrename(dvnode, vnode, dentry, key);
+ goto error_key;
+ }
+ if (!d_unhashed(dentry)) {
+ /* Prevent a race with RCU lookup. */
+ __d_drop(dentry);
+ need_rehash = true;
+ }
+ spin_unlock(&dentry->d_lock);
+
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
@@ -1331,6 +1404,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
afs_edit_dir_for_unlink);
}
+ if (need_rehash && ret < 0 && ret != -ENOENT)
+ d_rehash(dentry);
+
error_key:
key_put(key);
error:
@@ -1551,6 +1627,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
struct afs_fs_cursor fc;
struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
+ struct dentry *tmp = NULL, *rehash = NULL;
+ struct inode *new_inode;
struct key *key;
u64 orig_data_version, new_data_version;
bool new_negative = d_is_negative(new_dentry);
@@ -1559,6 +1637,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (flags)
return -EINVAL;
+ /* Don't allow silly-rename files be moved around. */
+ if (old_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ return -EINVAL;
+
vnode = AFS_FS_I(d_inode(old_dentry));
orig_dvnode = AFS_FS_I(old_dir);
new_dvnode = AFS_FS_I(new_dir);
@@ -1577,12 +1659,48 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto error;
}
+ /* For non-directories, check whether the target is busy and if so,
+ * make a copy of the dentry and then do a silly-rename. If the
+ * silly-rename succeeds, the copied dentry is hashed and becomes the
+ * new target.
+ */
+ if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) {
+ /* To prevent any new references to the target during the
+ * rename, we unhash the dentry in advance.
+ */
+ if (!d_unhashed(new_dentry)) {
+ d_drop(new_dentry);
+ rehash = new_dentry;
+ }
+
+ if (d_count(new_dentry) > 2) {
+ /* copy the target dentry's name */
+ ret = -ENOMEM;
+ tmp = d_alloc(new_dentry->d_parent,
+ &new_dentry->d_name);
+ if (!tmp)
+ goto error_rehash;
+
+ ret = afs_sillyrename(new_dvnode,
+ AFS_FS_I(d_inode(new_dentry)),
+ new_dentry, key);
+ if (ret)
+ goto error_rehash;
+
+ new_dentry = tmp;
+ rehash = NULL;
+ new_negative = true;
+ orig_data_version = orig_dvnode->status.data_version;
+ new_data_version = new_dvnode->status.data_version;
+ }
+ }
+
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
if (orig_dvnode != new_dvnode) {
if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
- goto error_key;
+ goto error_rehash;
}
}
while (afs_select_fileserver(&fc)) {
@@ -1599,25 +1717,42 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
mutex_unlock(&new_dvnode->io_lock);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
- goto error_key;
+ goto error_rehash;
}
if (ret == 0) {
+ if (rehash)
+ d_rehash(rehash);
if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags))
afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
- afs_edit_dir_for_rename);
+ afs_edit_dir_for_rename_0);
if (!new_negative &&
test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
- afs_edit_dir_for_rename);
+ afs_edit_dir_for_rename_1);
if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
- &vnode->fid, afs_edit_dir_for_rename);
+ &vnode->fid, afs_edit_dir_for_rename_2);
+
+ new_inode = d_inode(new_dentry);
+ if (new_inode) {
+ spin_lock(&new_inode->i_lock);
+ if (new_inode->i_nlink > 0)
+ drop_nlink(new_inode);
+ spin_unlock(&new_inode->i_lock);
+ }
+ d_move(old_dentry, new_dentry);
+ goto error_tmp;
}
-error_key:
+error_rehash:
+ if (rehash)
+ d_rehash(rehash);
+error_tmp:
+ if (tmp)
+ dput(tmp);
key_put(key);
error:
_leave(" = %d", ret);
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
new file mode 100644
index 000000000000..f6f89fdab6b2
--- /dev/null
+++ b/fs/afs/dir_silly.c
@@ -0,0 +1,239 @@
+/* AFS silly rename handling
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from NFS's sillyrename.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/fsnotify.h>
+#include "internal.h"
+
+/*
+ * Actually perform the silly rename step.
+ */
+static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+ struct dentry *old, struct dentry *new,
+ struct key *key)
+{
+ struct afs_fs_cursor fc;
+ u64 dir_data_version = dvnode->status.data_version;
+ int ret = -ERESTARTSYS;
+
+ _enter("%pd,%pd", old, new);
+
+ trace_afs_silly_rename(vnode, false);
+ if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+ afs_fs_rename(&fc, old->d_name.name,
+ dvnode, new->d_name.name,
+ dir_data_version, dir_data_version);
+ }
+
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
+ if (ret == 0) {
+ spin_lock(&old->d_lock);
+ old->d_flags |= DCACHE_NFSFS_RENAMED;
+ spin_unlock(&old->d_lock);
+ if (dvnode->silly_key != key) {
+ key_put(dvnode->silly_key);
+ dvnode->silly_key = key_get(key);
+ }
+
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_edit_dir_remove(dvnode, &old->d_name,
+ afs_edit_dir_for_silly_0);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_edit_dir_add(dvnode, &new->d_name,
+ &vnode->fid, afs_edit_dir_for_silly_1);
+
+ /* vfs_unlink and the like do not issue this when a file is
+ * sillyrenamed, so do it here.
+ */
+ fsnotify_nameremove(old, 0);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * afs_sillyrename - Perform a silly-rename of a dentry
+ *
+ * AFS is stateless and the server doesn't know when the client is holding a
+ * file open. To prevent application problems when a file is unlinked while
+ * it's still open, the client performs a "silly-rename". That is, it renames
+ * the file to a hidden file in the same directory, and only performs the
+ * unlink once the last reference to it is put.
+ *
+ * The final cleanup is done during dentry_iput.
+ */
+int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+ struct dentry *dentry, struct key *key)
+{
+ static unsigned int sillycounter;
+ struct dentry *sdentry = NULL;
+ unsigned char silly[16];
+ int ret = -EBUSY;
+
+ _enter("");
+
+ /* We don't allow a dentry to be silly-renamed twice. */
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ return -EBUSY;
+
+ sdentry = NULL;
+ do {
+ int slen;
+
+ dput(sdentry);
+ sillycounter++;
+
+ /* Create a silly name. Note that the ".__afs" prefix is
+ * understood by the salvager and must not be changed.
+ */
+ slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+ sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+
+ /* N.B. Better to return EBUSY here ... it could be dangerous
+ * to delete the file while it's in use.
+ */
+ if (IS_ERR(sdentry))
+ goto out;
+ } while (!d_is_negative(sdentry));
+
+ ihold(&vnode->vfs_inode);
+
+ ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key);
+ switch (ret) {
+ case 0:
+ /* The rename succeeded. */
+ d_move(dentry, sdentry);
+ break;
+ case -ERESTARTSYS:
+ /* The result of the rename is unknown. Play it safe by forcing
+ * a new lookup.
+ */
+ d_drop(dentry);
+ d_drop(sdentry);
+ }
+
+ iput(&vnode->vfs_inode);
+ dput(sdentry);
+out:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Tell the server to remove a sillyrename file.
+ */
+static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+ struct dentry *dentry, struct key *key)
+{
+ struct afs_fs_cursor fc;
+ u64 dir_data_version = dvnode->status.data_version;
+ int ret = -ERESTARTSYS;
+
+ _enter("");
+
+ trace_afs_silly_rename(vnode, true);
+ if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
+ !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
+ yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
+ dir_data_version);
+ if (fc.ac.error != -ECONNABORTED ||
+ fc.ac.abort_code != RXGEN_OPCODE)
+ continue;
+ set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
+ }
+
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
+ dir_data_version);
+ }
+
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ if (ret == 0) {
+ drop_nlink(&vnode->vfs_inode);
+ if (vnode->vfs_inode.i_nlink == 0) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
+ }
+ if (ret == 0 &&
+ test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_edit_dir_remove(dvnode, &dentry->d_name,
+ afs_edit_dir_for_unlink);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Remove sillyrename file on iput.
+ */
+int afs_silly_iput(struct dentry *dentry, struct inode *inode)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(d_inode(dentry->d_parent));
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct dentry *alias;
+ int ret;
+
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
+
+ down_read(&dvnode->rmdir_lock);
+
+ alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+ if (IS_ERR(alias)) {
+ up_read(&dvnode->rmdir_lock);
+ return 0;
+ }
+
+ if (!d_in_lookup(alias)) {
+ /* We raced with lookup... See if we need to transfer the
+ * sillyrename information to the aliased dentry.
+ */
+ ret = 0;
+ spin_lock(&alias->d_lock);
+ if (d_really_is_positive(alias) &&
+ !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+ alias->d_flags |= DCACHE_NFSFS_RENAMED;
+ ret = 1;
+ }
+ spin_unlock(&alias->d_lock);
+ up_read(&dvnode->rmdir_lock);
+ dput(alias);
+ return ret;
+ }
+
+ /* Stop lock-release from complaining. */
+ spin_lock(&vnode->lock);
+ vnode->lock_state = AFS_VNODE_LOCK_DELETED;
+ trace_afs_flock_ev(vnode, NULL, afs_flock_silly_delete, 0);
+ spin_unlock(&vnode->lock);
+
+ afs_do_silly_unlink(dvnode, vnode, dentry, dvnode->silly_key);
+ up_read(&dvnode->rmdir_lock);
+ d_lookup_done(alias);
+ dput(alias);
+ return 1;
+}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 6a0174258382..adc88eff7849 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -13,9 +13,11 @@
#define AFS_LOCK_GRANTED 0
#define AFS_LOCK_PENDING 1
+#define AFS_LOCK_YOUR_TRY 2
struct workqueue_struct *afs_lock_manager;
+static void afs_next_locker(struct afs_vnode *vnode, int error);
static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
static void afs_fl_release_private(struct file_lock *fl);
@@ -24,6 +26,14 @@ static const struct file_lock_operations afs_lock_ops = {
.fl_release_private = afs_fl_release_private,
};
+static inline void afs_set_lock_state(struct afs_vnode *vnode, enum afs_lock_state state)
+{
+ _debug("STATE %u -> %u", vnode->lock_state, state);
+ vnode->lock_state = state;
+}
+
+static atomic_t afs_file_lock_debug_id;
+
/*
* if the callback is broken on this vnode, then the lock may now be available
*/
@@ -31,7 +41,14 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
{
_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
- queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
+ if (vnode->lock_state != AFS_VNODE_LOCK_WAITING_FOR_CB)
+ return;
+
+ spin_lock(&vnode->lock);
+ if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
+ afs_next_locker(vnode, 0);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_callback_break, 0);
+ spin_unlock(&vnode->lock);
}
/*
@@ -40,8 +57,35 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
*/
static void afs_schedule_lock_extension(struct afs_vnode *vnode)
{
- queue_delayed_work(afs_lock_manager, &vnode->lock_work,
- AFS_LOCKWAIT * HZ / 2);
+ ktime_t expires_at, now, duration;
+ u64 duration_j;
+
+ expires_at = ktime_add_ms(vnode->locked_at, AFS_LOCKWAIT * 1000 / 2);
+ now = ktime_get_real();
+ duration = ktime_sub(expires_at, now);
+ if (duration <= 0)
+ duration_j = 0;
+ else
+ duration_j = nsecs_to_jiffies(ktime_to_ns(duration));
+
+ queue_delayed_work(afs_lock_manager, &vnode->lock_work, duration_j);
+}
+
+/*
+ * In the case of successful completion of a lock operation, record the time
+ * the reply appeared and start the lock extension timer.
+ */
+void afs_lock_op_done(struct afs_call *call)
+{
+ struct afs_vnode *vnode = call->reply[0];
+
+ if (call->error == 0) {
+ spin_lock(&vnode->lock);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
+ vnode->locked_at = call->reply_time;
+ afs_schedule_lock_extension(vnode);
+ spin_unlock(&vnode->lock);
+ }
}
/*
@@ -49,22 +93,90 @@ static void afs_schedule_lock_extension(struct afs_vnode *vnode)
* first lock in the queue is itself a readlock)
* - the caller must hold the vnode lock
*/
-static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl)
+static void afs_grant_locks(struct afs_vnode *vnode)
{
struct file_lock *p, *_p;
+ bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE);
- list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
- if (fl->fl_type == F_RDLCK) {
- list_for_each_entry_safe(p, _p, &vnode->pending_locks,
- fl_u.afs.link) {
- if (p->fl_type == F_RDLCK) {
- p->fl_u.afs.state = AFS_LOCK_GRANTED;
- list_move_tail(&p->fl_u.afs.link,
- &vnode->granted_locks);
- wake_up(&p->fl_wait);
- }
+ list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+ if (!exclusive && p->fl_type == F_WRLCK)
+ continue;
+
+ list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks);
+ p->fl_u.afs.state = AFS_LOCK_GRANTED;
+ trace_afs_flock_op(vnode, p, afs_flock_op_grant);
+ wake_up(&p->fl_wait);
+ }
+}
+
+/*
+ * If an error is specified, reject every pending lock that matches the
+ * authentication and type of the lock we failed to get. If there are any
+ * remaining lockers, try to wake up one of them to have a go.
+ */
+static void afs_next_locker(struct afs_vnode *vnode, int error)
+{
+ struct file_lock *p, *_p, *next = NULL;
+ struct key *key = vnode->lock_key;
+ unsigned int fl_type = F_RDLCK;
+
+ _enter("");
+
+ if (vnode->lock_type == AFS_LOCK_WRITE)
+ fl_type = F_WRLCK;
+
+ list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+ if (error &&
+ p->fl_type == fl_type &&
+ afs_file_key(p->fl_file) == key) {
+ list_del_init(&p->fl_u.afs.link);
+ p->fl_u.afs.state = error;
+ wake_up(&p->fl_wait);
}
+
+ /* Select the next locker to hand off to. */
+ if (next &&
+ (next->fl_type == F_WRLCK || p->fl_type == F_RDLCK))
+ continue;
+ next = p;
}
+
+ vnode->lock_key = NULL;
+ key_put(key);
+
+ if (next) {
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+ next->fl_u.afs.state = AFS_LOCK_YOUR_TRY;
+ trace_afs_flock_op(vnode, next, afs_flock_op_wake);
+ wake_up(&next->fl_wait);
+ } else {
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0);
+ }
+
+ _leave("");
+}
+
+/*
+ * Kill off all waiters in the the pending lock queue due to the vnode being
+ * deleted.
+ */
+static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
+{
+ struct file_lock *p;
+
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_DELETED);
+
+ while (!list_empty(&vnode->pending_locks)) {
+ p = list_entry(vnode->pending_locks.next,
+ struct file_lock, fl_u.afs.link);
+ list_del_init(&p->fl_u.afs.link);
+ p->fl_u.afs.state = -ENOENT;
+ wake_up(&p->fl_wait);
+ }
+
+ key_put(vnode->lock_key);
+ vnode->lock_key = NULL;
}
/*
@@ -170,8 +282,6 @@ void afs_lock_work(struct work_struct *work)
{
struct afs_vnode *vnode =
container_of(work, struct afs_vnode, lock_work.work);
- struct file_lock *fl, *next;
- afs_lock_type_t type;
struct key *key;
int ret;
@@ -183,35 +293,28 @@ again:
_debug("wstate %u for %p", vnode->lock_state, vnode);
switch (vnode->lock_state) {
case AFS_VNODE_LOCK_NEED_UNLOCK:
- _debug("unlock");
- vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING;
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_UNLOCKING);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_work_unlocking, 0);
spin_unlock(&vnode->lock);
/* attempt to release the server lock; if it fails, we just
* wait 5 minutes and it'll expire anyway */
ret = afs_release_lock(vnode, vnode->lock_key);
- if (ret < 0)
+ if (ret < 0 && vnode->lock_state != AFS_VNODE_LOCK_DELETED) {
+ trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail,
+ ret);
printk(KERN_WARNING "AFS:"
" Failed to release lock on {%llx:%llx} error %d\n",
vnode->fid.vid, vnode->fid.vnode, ret);
-
- spin_lock(&vnode->lock);
- key_put(vnode->lock_key);
- vnode->lock_key = NULL;
- vnode->lock_state = AFS_VNODE_LOCK_NONE;
-
- if (list_empty(&vnode->pending_locks)) {
- spin_unlock(&vnode->lock);
- return;
}
- /* The new front of the queue now owns the state variables. */
- next = list_entry(vnode->pending_locks.next,
- struct file_lock, fl_u.afs.link);
- vnode->lock_key = key_get(afs_file_key(next->fl_file));
- vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
- vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
- goto again;
+ spin_lock(&vnode->lock);
+ if (ret == -ENOENT)
+ afs_kill_lockers_enoent(vnode);
+ else
+ afs_next_locker(vnode, 0);
+ spin_unlock(&vnode->lock);
+ return;
/* If we've already got a lock, then it must be time to extend that
* lock as AFS locks time out after 5 minutes.
@@ -222,86 +325,55 @@ again:
ASSERT(!list_empty(&vnode->granted_locks));
key = key_get(vnode->lock_key);
- vnode->lock_state = AFS_VNODE_LOCK_EXTENDING;
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_EXTENDING);
+ trace_afs_flock_ev(vnode, NULL, afs_flock_work_extending, 0);
spin_unlock(&vnode->lock);
ret = afs_extend_lock(vnode, key); /* RPC */
key_put(key);
- if (ret < 0)
+ if (ret < 0) {
+ trace_afs_flock_ev(vnode, NULL, afs_flock_extend_fail,
+ ret);
pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
vnode->fid.vid, vnode->fid.vnode, ret);
+ }
spin_lock(&vnode->lock);
+ if (ret == -ENOENT) {
+ afs_kill_lockers_enoent(vnode);
+ spin_unlock(&vnode->lock);
+ return;
+ }
+
if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
goto again;
- vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
+ afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
- if (ret == 0)
- afs_schedule_lock_extension(vnode);
- else
+ if (ret != 0)
queue_delayed_work(afs_lock_manager, &vnode->lock_work,
HZ * 10);
spin_unlock(&vnode->lock);
_leave(" [ext]");
return;
- /* If we don't have a granted lock, then we must've been called
- * back by the server, and so if might be possible to get a
- * lock we're currently waiting for.
- */
+ /* If we're waiting for a callback to indicate lock release, we can't
+ * actually rely on this, so need to recheck at regular intervals. The
+ * problem is that the server might not notify us if the lock just
+ * expires (say because a client died) rather than being explicitly
+ * released.
+ */
case AFS_VNODE_LOCK_WAITING_FOR_CB:
- _debug("get");
-
- key = key_get(vnode->lock_key);
- type = vnode->lock_type;
- vnode->lock_state = AFS_VNODE_LOCK_SETTING;
+ _debug("retry");
+ afs_next_locker(vnode, 0);
spin_unlock(&vnode->lock);
+ return;
- ret = afs_set_lock(vnode, key, type); /* RPC */
- key_put(key);
-
- spin_lock(&vnode->lock);
- switch (ret) {
- case -EWOULDBLOCK:
- _debug("blocked");
- break;
- case 0:
- _debug("acquired");
- vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
- /* Fall through */
- default:
- /* Pass the lock or the error onto the first locker in
- * the list - if they're looking for this type of lock.
- * If they're not, we assume that whoever asked for it
- * took a signal.
- */
- if (list_empty(&vnode->pending_locks)) {
- _debug("withdrawn");
- vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
- goto again;
- }
-
- fl = list_entry(vnode->pending_locks.next,
- struct file_lock, fl_u.afs.link);
- type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
- if (vnode->lock_type != type) {
- _debug("changed");
- vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
- goto again;
- }
-
- fl->fl_u.afs.state = ret;
- if (ret == 0)
- afs_grant_locks(vnode, fl);
- else
- list_del_init(&fl->fl_u.afs.link);
- wake_up(&fl->fl_wait);
- spin_unlock(&vnode->lock);
- _leave(" [granted]");
- return;
- }
+ case AFS_VNODE_LOCK_DELETED:
+ afs_kill_lockers_enoent(vnode);
+ spin_unlock(&vnode->lock);
+ return;
/* Fall through */
defaul