Merge tag 'afs-next-20190507' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

Pull AFS updates from David Howells: "A set of fix and development patches for AFS for 5.2. Summary: - Fix the AFS file locking so that sqlite can run on an AFS mount and also so that firefox and gnome can use a homedir that's mounted through AFS. This required emulation of fine-grained locking when the server will only support whole-file locks and no upgrade/downgrade. Four modes are provided, settable by mount parameter: "flock=local" - No reference to the server "flock=openafs" - Fine-grained locks are local-only, whole-file locks require sufficient server locks "flock=strict" - All locks require sufficient server locks "flock=write" - Always get an exclusive server lock If the volume is a read-only or backup volume, then flock=local for that volume. - Log extra information for a couple of cases where the client mucks up somehow: AFS vnode with undefined type and dir check failure - in both cases we seem to end up with unfilled data, but the issues happen infrequently and are difficult to reproduce at will. - Implement silly rename for unlink() and rename(). - Set i_blocks so that du can get some information about usage. - Fix xattr handlers to return the right amount of data and to not overflow buffers. - Implement getting/setting raw AFS and YFS ACLs as xattrs" * tag 'afs-next-20190507' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: afs: Implement YFS ACL setting afs: Get YFS ACLs and information through xattrs afs: implement acl setting afs: Get an AFS3 ACL as an xattr afs: Fix getting the afs.fid xattr afs: Fix the afs.cell and afs.volume xattr handlers afs: Calculate i_blocks based on file size afs: Log more information for "kAFS: AFS vnode with undefined type\n" afs: Provide mount-time configurable byte-range file locking emulation afs: Add more tracepoints afs: Implement sillyrename for unlink and rename afs: Add directory reload tracepoint afs: Handle lock rpc ops failing on a file that got deleted afs: Improve dir check failure reports afs: Add file locking tracepoints afs: Further fix file locking afs: Fix AFS file locking to allow fine grained locks afs: Calculate lock extend timer from set/extend reply reception afs: Split wait from afs_make_call()
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-05-07 20:51:58 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-05-07 20:51:58 -0700
commit: e5fef2a9732580c5bd30c0097f5e9091a3d58ce5 (patch)
tree: cb91b5a51f7e451300e06e203a50cbed8aabbc1c /fs
parent: 149e703cb8bfcbdae46140b108bb6f7d2407df8f (diff)
parent: f5e4546347bc847be30b3cf904db5fc874b3c5dc (diff)
16 files changed, 1761 insertions, 373 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 0738e2bf5193..cbf31f6cd177 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -13,6 +13,7 @@ kafs-y := \
 	cmservice.o \
 	dir.o \
 	dir_edit.o \
+	dir_silly.o \
 	dynroot.o \
 	file.o \
 	flock.o \
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index ddfa88a7a9c0..18a54ca422f8 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -17,8 +17,10 @@
 
 enum AFS_FS_Operations {
 	FSFETCHDATA		= 130,	/* AFS Fetch file data */
+	FSFETCHACL		= 131,	/* AFS Fetch file ACL */
 	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
 	FSSTOREDATA		= 133,	/* AFS Store file data */
+	FSSTOREACL		= 134,	/* AFS Store file ACL */
 	FSSTORESTATUS		= 135,	/* AFS Store file status */
 	FSREMOVEFILE		= 136,	/* AFS Remove a file */
 	FSCREATEFILE		= 137,	/* AFS Create a file */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 8a2562e3a316..9a466be583d2 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -26,6 +26,7 @@ static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, struct dir_context *ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
+static void afs_d_iput(struct dentry *dentry, struct inode *inode);
 static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
@@ -85,6 +86,7 @@ const struct dentry_operations afs_fs_dentry_operations = {
 	.d_delete	= afs_d_delete,
 	.d_release	= afs_d_release,
 	.d_automount	= afs_d_automount,
+	.d_iput		= afs_d_iput,
 };
 
 struct afs_lookup_one_cookie {
@@ -160,6 +162,38 @@ error:
 }
 
 /*
+ * Check the contents of a directory that we've just read.
+ */
+static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req)
+{
+	struct afs_xdr_dir_page *dbuf;
+	unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+
+	for (i = 0; i < req->nr_pages; i++)
+		if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len))
+			goto bad;
+	return true;
+
+bad:
+	pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n",
+		dvnode->fid.vid, dvnode->fid.vnode,
+		req->file_size, req->len, req->actual_len, req->remain);
+	pr_warn("DIR %llx %x %x %x\n",
+		req->pos, req->index, req->nr_pages, req->offset);
+
+	for (i = 0; i < req->nr_pages; i++) {
+		dbuf = kmap(req->pages[i]);
+		for (j = 0; j < qty; j++) {
+			union afs_xdr_dir_block *block = &dbuf->blocks[j];
+
+			pr_warn("[%02x] %32phN\n", i * qty + j, block);
+		}
+		kunmap(req->pages[i]);
+	}
+	return false;
+}
+
+/*
  * open an AFS directory file
  */
 static int afs_dir_open(struct inode *inode, struct file *file)
@@ -277,6 +311,7 @@ retry:
 		goto error;
 
 	if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+		trace_afs_reload_dir(dvnode);
 		ret = afs_fetch_data(dvnode, key, req);
 		if (ret < 0)
 			goto error_unlock;
@@ -288,10 +323,8 @@ retry:
 
 		/* Validate the data we just read. */
 		ret = -EIO;
-		for (i = 0; i < req->nr_pages; i++)
-			if (!afs_dir_check_page(dvnode, req->pages[i],
-						req->actual_len))
-				goto error_unlock;
+		if (!afs_dir_check_pages(dvnode, req))
+			goto error_unlock;
 
 		// TODO: Trim excess pages
 
@@ -743,7 +776,7 @@ success:
 		ti = afs_iget(dir->i_sb, key, &cookie->fids[i],
 			      &cookie->statuses[i],
 			      &cookie->callbacks[i],
-			      cbi);
+			      cbi, dvnode);
 		if (i == 0) {
 			inode = ti;
 		} else {
@@ -875,8 +908,14 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 			(void *)(unsigned long)dvnode->status.data_version;
 	}
 	d = d_splice_alias(inode, dentry);
-	if (!IS_ERR_OR_NULL(d))
+	if (!IS_ERR_OR_NULL(d)) {
 		d->d_fsdata = dentry->d_fsdata;
+		trace_afs_lookup(dvnode, &d->d_name,
+				 inode ? AFS_FS_I(inode) : NULL);
+	} else {
+		trace_afs_lookup(dvnode, &dentry->d_name,
+				 inode ? AFS_FS_I(inode) : NULL);
+	}
 	return d;
 }
 
@@ -1053,6 +1092,16 @@ zap:
 }
 
 /*
+ * Clean up sillyrename files on dentry removal.
+ */
+static void afs_d_iput(struct dentry *dentry, struct inode *inode)
+{
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		afs_silly_iput(dentry, inode);
+	iput(inode);
+}
+
+/*
  * handle dentry release
  */
 void afs_d_release(struct dentry *dentry)
@@ -1076,7 +1125,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 		return;
 
 	inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
-			 newfid, newstatus, newcb, fc->cbi);
+			 newfid, newstatus, newcb, fc->cbi, fc->vnode);
 	if (IS_ERR(inode)) {
 		/* ENOMEM or EINTR at a really inconvenient time - just abandon
 		 * the new directory on the server.
@@ -1194,6 +1243,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 			goto error_key;
 	}
 
+	if (vnode) {
+		ret = down_write_killable(&vnode->rmdir_lock);
+		if (ret < 0)
+			goto error_key;
+	}
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
@@ -1212,6 +1267,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 		}
 	}
 
+	if (vnode)
+		up_write(&vnode->rmdir_lock);
 error_key:
 	key_put(key);
 error:
@@ -1228,9 +1285,9 @@ error:
  * However, if we didn't have a callback promise outstanding, or it was
  * outstanding on a different server, then it won't break it either...
  */
-static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
-			       unsigned long d_version_before,
-			       unsigned long d_version_after)
+int afs_dir_remove_link(struct dentry *dentry, struct key *key,
+			unsigned long d_version_before,
+			unsigned long d_version_after)
 {
 	bool dir_valid;
 	int ret = 0;
@@ -1277,6 +1334,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
 	struct key *key;
 	unsigned long d_version = (unsigned long)dentry->d_fsdata;
+	bool need_rehash = false;
 	u64 data_version = dvnode->status.data_version;
 	int ret;
 
@@ -1300,6 +1358,21 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 			goto error_key;
 	}
 
+	spin_lock(&dentry->d_lock);
+	if (vnode && d_count(dentry) > 1) {
+		spin_unlock(&dentry->d_lock);
+		/* Start asynchronous writeout of the inode */
+		write_inode_now(d_inode(dentry), 0);
+		ret = afs_sillyrename(dvnode, vnode, dentry, key);
+		goto error_key;
+	}
+	if (!d_unhashed(dentry)) {
+		/* Prevent a race with RCU lookup. */
+		__d_drop(dentry);
+		need_rehash = true;
+	}
+	spin_unlock(&dentry->d_lock);
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
@@ -1331,6 +1404,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 					    afs_edit_dir_for_unlink);
 	}
 
+	if (need_rehash && ret < 0 && ret != -ENOENT)
+		d_rehash(dentry);
+
 error_key:
 	key_put(key);
 error:
@@ -1551,6 +1627,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct afs_fs_cursor fc;
 	struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
+	struct dentry *tmp = NULL, *rehash = NULL;
+	struct inode *new_inode;
 	struct key *key;
 	u64 orig_data_version, new_data_version;
 	bool new_negative = d_is_negative(new_dentry);
@@ -1559,6 +1637,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (flags)
 		return -EINVAL;
 
+	/* Don't allow silly-rename files be moved around. */
+	if (old_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EINVAL;
+
 	vnode = AFS_FS_I(d_inode(old_dentry));
 	orig_dvnode = AFS_FS_I(old_dir);
 	new_dvnode = AFS_FS_I(new_dir);
@@ -1577,12 +1659,48 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto error;
 	}
 
+	/* For non-directories, check whether the target is busy and if so,
+	 * make a copy of the dentry and then do a silly-rename.  If the
+	 * silly-rename succeeds, the copied dentry is hashed and becomes the
+	 * new target.
+	 */
+	if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) {
+		/* To prevent any new references to the target during the
+		 * rename, we unhash the dentry in advance.
+		 */
+		if (!d_unhashed(new_dentry)) {
+			d_drop(new_dentry);
+			rehash = new_dentry;
+		}
+
+		if (d_count(new_dentry) > 2) {
+			/* copy the target dentry's name */
+			ret = -ENOMEM;
+			tmp = d_alloc(new_dentry->d_parent,
+				      &new_dentry->d_name);
+			if (!tmp)
+				goto error_rehash;
+
+			ret = afs_sillyrename(new_dvnode,
+					      AFS_FS_I(d_inode(new_dentry)),
+					      new_dentry, key);
+			if (ret)
+				goto error_rehash;
+
+			new_dentry = tmp;
+			rehash = NULL;
+			new_negative = true;
+			orig_data_version = orig_dvnode->status.data_version;
+			new_data_version = new_dvnode->status.data_version;
+		}
+	}
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
 		if (orig_dvnode != new_dvnode) {
 			if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
 				afs_end_vnode_operation(&fc);
-				goto error_key;
+				goto error_rehash;
 			}
 		}
 		while (afs_select_fileserver(&fc)) {
@@ -1599,25 +1717,42 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			mutex_unlock(&new_dvnode->io_lock);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
-			goto error_key;
+			goto error_rehash;
 	}
 
 	if (ret == 0) {
+		if (rehash)
+			d_rehash(rehash);
 		if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags))
 		    afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
-					afs_edit_dir_for_rename);
+					afs_edit_dir_for_rename_0);
 
 		if (!new_negative &&
 		    test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
 			afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
-					    afs_edit_dir_for_rename);
+					    afs_edit_dir_for_rename_1);
 
 		if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
 			afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
-					 &vnode->fid,  afs_edit_dir_for_rename);
+					 &vnode->fid, afs_edit_dir_for_rename_2);
+
+		new_inode = d_inode(new_dentry);
+		if (new_inode) {
+			spin_lock(&new_inode->i_lock);
+			if (new_inode->i_nlink > 0)
+				drop_nlink(new_inode);
+			spin_unlock(&new_inode->i_lock);
+		}
+		d_move(old_dentry, new_dentry);
+		goto error_tmp;
 	}
 
-error_key:
+error_rehash:
+	if (rehash)
+		d_rehash(rehash);
+error_tmp:
+	if (tmp)
+		dput(tmp);
 	key_put(key);
 error:
 	_leave(" = %d", ret);
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
new file mode 100644
index 000000000000..f6f89fdab6b2
--- /dev/null
+++ b/fs/afs/dir_silly.c
@@ -0,0 +1,239 @@
+/* AFS silly rename handling
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from NFS's sillyrename.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/fsnotify.h>
+#include "internal.h"
+
+/*
+ * Actually perform the silly rename step.
+ */
+static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			       struct dentry *old, struct dentry *new,
+			       struct key *key)
+{
+	struct afs_fs_cursor fc;
+	u64 dir_data_version = dvnode->status.data_version;
+	int ret = -ERESTARTSYS;
+
+	_enter("%pd,%pd", old, new);
+
+	trace_afs_silly_rename(vnode, false);
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+			afs_fs_rename(&fc, old->d_name.name,
+				      dvnode, new->d_name.name,
+				      dir_data_version, dir_data_version);
+		}
+
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+	}
+
+	if (ret == 0) {
+		spin_lock(&old->d_lock);
+		old->d_flags |= DCACHE_NFSFS_RENAMED;
+		spin_unlock(&old->d_lock);
+		if (dvnode->silly_key != key) {
+			key_put(dvnode->silly_key);
+			dvnode->silly_key = key_get(key);
+		}
+
+		if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_remove(dvnode, &old->d_name,
+					    afs_edit_dir_for_silly_0);
+		if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_add(dvnode, &new->d_name,
+					 &vnode->fid, afs_edit_dir_for_silly_1);
+
+		/* vfs_unlink and the like do not issue this when a file is
+		 * sillyrenamed, so do it here.
+		 */
+		fsnotify_nameremove(old, 0);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/**
+ * afs_sillyrename - Perform a silly-rename of a dentry
+ *
+ * AFS is stateless and the server doesn't know when the client is holding a
+ * file open.  To prevent application problems when a file is unlinked while
+ * it's still open, the client performs a "silly-rename".  That is, it renames
+ * the file to a hidden file in the same directory, and only performs the
+ * unlink once the last reference to it is put.
+ *
+ * The final cleanup is done during dentry_iput.
+ */
+int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+		    struct dentry *dentry, struct key *key)
+{
+	static unsigned int sillycounter;
+	struct dentry *sdentry = NULL;
+	unsigned char silly[16];
+	int ret = -EBUSY;
+
+	_enter("");
+
+	/* We don't allow a dentry to be silly-renamed twice. */
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EBUSY;
+
+	sdentry = NULL;
+	do {
+		int slen;
+
+		dput(sdentry);
+		sillycounter++;
+
+		/* Create a silly name.  Note that the ".__afs" prefix is
+		 * understood by the salvager and must not be changed.
+		 */
+		slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter);
+		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+
+		/* N.B. Better to return EBUSY here ... it could be dangerous
+		 * to delete the file while it's in use.
+		 */
+		if (IS_ERR(sdentry))
+			goto out;
+	} while (!d_is_negative(sdentry));
+
+	ihold(&vnode->vfs_inode);
+
+	ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key);
+	switch (ret) {
+	case 0:
+		/* The rename succeeded. */
+		d_move(dentry, sdentry);
+		break;
+	case -ERESTARTSYS:
+		/* The result of the rename is unknown. Play it safe by forcing
+		 * a new lookup.
+		 */
+		d_drop(dentry);
+		d_drop(sdentry);
+	}
+
+	iput(&vnode->vfs_inode);
+	dput(sdentry);
+out:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Tell the server to remove a sillyrename file.
+ */
+static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			       struct dentry *dentry, struct key *key)
+{
+	struct afs_fs_cursor fc;
+	u64 dir_data_version = dvnode->status.data_version;
+	int ret = -ERESTARTSYS;
+
+	_enter("");
+
+	trace_afs_silly_rename(vnode, true);
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+
+			if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
+			    !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
+				yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
+						    dir_data_version);
+				if (fc.ac.error != -ECONNABORTED ||
+				    fc.ac.abort_code != RXGEN_OPCODE)
+					continue;
+				set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
+			}
+
+			afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
+				      dir_data_version);
+		}
+
+		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+		ret = afs_end_vnode_operation(&fc);
+		if (ret == 0) {
+			drop_nlink(&vnode->vfs_inode);
+			if (vnode->vfs_inode.i_nlink == 0) {
+				set_bit(AFS_VNODE_DELETED, &vnode->flags);
+				clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+			}
+		}
+		if (ret == 0 &&
+		    test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_remove(dvnode, &dentry->d_name,
+					    afs_edit_dir_for_unlink);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Remove sillyrename file on iput.
+ */
+int afs_silly_iput(struct dentry *dentry, struct inode *inode)
+{
+	struct afs_vnode *dvnode = AFS_FS_I(d_inode(dentry->d_parent));
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct dentry *alias;
+	int ret;
+
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+	_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
+
+	down_read(&dvnode->rmdir_lock);
+
+	alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+	if (IS_ERR(alias)) {
+		up_read(&dvnode->rmdir_lock);
+		return 0;
+	}
+
+	if (!d_in_lookup(alias)) {
+		/* We raced with lookup...  See if we need to transfer the
+		 * sillyrename information to the aliased dentry.
+		 */
+		ret = 0;
+		spin_lock(&alias->d_lock);
+		if (d_really_is_positive(alias) &&
+		    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+			alias->d_flags |= DCACHE_NFSFS_RENAMED;
+			ret = 1;
+		}
+		spin_unlock(&alias->d_lock);
+		up_read(&dvnode->rmdir_lock);
+		dput(alias);
+		return ret;
+	}
+
+	/* Stop lock-release from complaining. */
+	spin_lock(&vnode->lock);
+	vnode->lock_state = AFS_VNODE_LOCK_DELETED;
+	trace_afs_flock_ev(vnode, NULL, afs_flock_silly_delete, 0);
+	spin_unlock(&vnode->lock);
+
+	afs_do_silly_unlink(dvnode, vnode, dentry, dvnode->silly_key);
+	up_read(&dvnode->rmdir_lock);
+	d_lookup_done(alias);
+	dput(alias);
+	return 1;
+}
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 6a0174258382..adc88eff7849 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -13,9 +13,11 @@
 
 #define AFS_LOCK_GRANTED	0
 #define AFS_LOCK_PENDING	1
+#define AFS_LOCK_YOUR_TRY	2
 
 struct workqueue_struct *afs_lock_manager;
 
+static void afs_next_locker(struct afs_vnode *vnode, int error);
 static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl);
 static void afs_fl_release_private(struct file_lock *fl);
 
@@ -24,6 +26,14 @@ static const struct file_lock_operations afs_lock_ops = {
 	.fl_release_private	= afs_fl_release_private,
 };
 
+static inline void afs_set_lock_state(struct afs_vnode *vnode, enum afs_lock_state state)
+{
+	_debug("STATE %u -> %u", vnode->lock_state, state);
+	vnode->lock_state = state;
+}
+
+static atomic_t afs_file_lock_debug_id;
+
 /*
  * if the callback is broken on this vnode, then the lock may now be available
  */
@@ -31,7 +41,14 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
 {
 	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
-	queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
+	if (vnode->lock_state != AFS_VNODE_LOCK_WAITING_FOR_CB)
+		return;
+
+	spin_lock(&vnode->lock);
+	if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
+		afs_next_locker(vnode, 0);
+	trace_afs_flock_ev(vnode, NULL, afs_flock_callback_break, 0);
+	spin_unlock(&vnode->lock);
 }
 
 /*
@@ -40,8 +57,35 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
  */
 static void afs_schedule_lock_extension(struct afs_vnode *vnode)
 {
-	queue_delayed_work(afs_lock_manager, &vnode->lock_work,
-			   AFS_LOCKWAIT * HZ / 2);
+	ktime_t expires_at, now, duration;
+	u64 duration_j;
+
+	expires_at = ktime_add_ms(vnode->locked_at, AFS_LOCKWAIT * 1000 / 2);
+	now = ktime_get_real();
+	duration = ktime_sub(expires_at, now);
+	if (duration <= 0)
+		duration_j = 0;
+	else
+		duration_j = nsecs_to_jiffies(ktime_to_ns(duration));
+
+	queue_delayed_work(afs_lock_manager, &vnode->lock_work, duration_j);
+}
+
+/*
+ * In the case of successful completion of a lock operation, record the time
+ * the reply appeared and start the lock extension timer.
+ */
+void afs_lock_op_done(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+
+	if (call->error == 0) {
+		spin_lock(&vnode->lock);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
+		vnode->locked_at = call->reply_time;
+		afs_schedule_lock_extension(vnode);
+		spin_unlock(&vnode->lock);
+	}
 }
 
 /*
@@ -49,22 +93,90 @@ static void afs_schedule_lock_extension(struct afs_vnode *vnode)
  * first lock in the queue is itself a readlock)
  * - the caller must hold the vnode lock
  */
-static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl)
+static void afs_grant_locks(struct afs_vnode *vnode)
 {
 	struct file_lock *p, *_p;
+	bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE);
 
-	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
-	if (fl->fl_type == F_RDLCK) {
-		list_for_each_entry_safe(p, _p, &vnode->pending_locks,
-					 fl_u.afs.link) {
-			if (p->fl_type == F_RDLCK) {
-				p->fl_u.afs.state = AFS_LOCK_GRANTED;
-				list_move_tail(&p->fl_u.afs.link,
-					       &vnode->granted_locks);
-				wake_up(&p->fl_wait);
-			}
+	list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+		if (!exclusive && p->fl_type == F_WRLCK)
+			continue;
+
+		list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks);
+		p->fl_u.afs.state = AFS_LOCK_GRANTED;
+		trace_afs_flock_op(vnode, p, afs_flock_op_grant);
+		wake_up(&p->fl_wait);
+	}
+}
+
+/*
+ * If an error is specified, reject every pending lock that matches the
+ * authentication and type of the lock we failed to get.  If there are any
+ * remaining lockers, try to wake up one of them to have a go.
+ */
+static void afs_next_locker(struct afs_vnode *vnode, int error)
+{
+	struct file_lock *p, *_p, *next = NULL;
+	struct key *key = vnode->lock_key;
+	unsigned int fl_type = F_RDLCK;
+
+	_enter("");
+
+	if (vnode->lock_type == AFS_LOCK_WRITE)
+		fl_type = F_WRLCK;
+
+	list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) {
+		if (error &&
+		    p->fl_type == fl_type &&
+		    afs_file_key(p->fl_file) == key) {
+			list_del_init(&p->fl_u.afs.link);
+			p->fl_u.afs.state = error;
+			wake_up(&p->fl_wait);
 		}
+
+		/* Select the next locker to hand off to. */
+		if (next &&
+		    (next->fl_type == F_WRLCK || p->fl_type == F_RDLCK))
+			continue;
+		next = p;
 	}
+
+	vnode->lock_key = NULL;
+	key_put(key);
+
+	if (next) {
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING);
+		next->fl_u.afs.state = AFS_LOCK_YOUR_TRY;
+		trace_afs_flock_op(vnode, next, afs_flock_op_wake);
+		wake_up(&next->fl_wait);
+	} else {
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0);
+	}
+
+	_leave("");
+}
+
+/*
+ * Kill off all waiters in the the pending lock queue due to the vnode being
+ * deleted.
+ */
+static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
+{
+	struct file_lock *p;
+
+	afs_set_lock_state(vnode, AFS_VNODE_LOCK_DELETED);
+
+	while (!list_empty(&vnode->pending_locks)) {
+		p = list_entry(vnode->pending_locks.next,
+			       struct file_lock, fl_u.afs.link);
+		list_del_init(&p->fl_u.afs.link);
+		p->fl_u.afs.state = -ENOENT;
+		wake_up(&p->fl_wait);
+	}
+
+	key_put(vnode->lock_key);
+	vnode->lock_key = NULL;
 }
 
 /*
@@ -170,8 +282,6 @@ void afs_lock_work(struct work_struct *work)
 {
 	struct afs_vnode *vnode =
 		container_of(work, struct afs_vnode, lock_work.work);
-	struct file_lock *fl, *next;
-	afs_lock_type_t type;
 	struct key *key;
 	int ret;
 
@@ -183,35 +293,28 @@ again:
 	_debug("wstate %u for %p", vnode->lock_state, vnode);
 	switch (vnode->lock_state) {
 	case AFS_VNODE_LOCK_NEED_UNLOCK:
-		_debug("unlock");
-		vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING;
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_UNLOCKING);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_work_unlocking, 0);
 		spin_unlock(&vnode->lock);
 
 		/* attempt to release the server lock; if it fails, we just
 		 * wait 5 minutes and it'll expire anyway */
 		ret = afs_release_lock(vnode, vnode->lock_key);
-		if (ret < 0)
+		if (ret < 0 && vnode->lock_state != AFS_VNODE_LOCK_DELETED) {
+			trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail,
+					   ret);
 			printk(KERN_WARNING "AFS:"
 			       " Failed to release lock on {%llx:%llx} error %d\n",
 			       vnode->fid.vid, vnode->fid.vnode, ret);
-
-		spin_lock(&vnode->lock);
-		key_put(vnode->lock_key);
-		vnode->lock_key = NULL;
-		vnode->lock_state = AFS_VNODE_LOCK_NONE;
-
-		if (list_empty(&vnode->pending_locks)) {
-			spin_unlock(&vnode->lock);
-			return;
 		}
 
-		/* The new front of the queue now owns the state variables. */
-		next = list_entry(vnode->pending_locks.next,
-				  struct file_lock, fl_u.afs.link);
-		vnode->lock_key = key_get(afs_file_key(next->fl_file));
-		vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
-		vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
-		goto again;
+		spin_lock(&vnode->lock);
+		if (ret == -ENOENT)
+			afs_kill_lockers_enoent(vnode);
+		else
+			afs_next_locker(vnode, 0);
+		spin_unlock(&vnode->lock);
+		return;
 
 	/* If we've already got a lock, then it must be time to extend that
 	 * lock as AFS locks time out after 5 minutes.
@@ -222,86 +325,55 @@ again:
 		ASSERT(!list_empty(&vnode->granted_locks));
 
 		key = key_get(vnode->lock_key);
-		vnode->lock_state = AFS_VNODE_LOCK_EXTENDING;
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_EXTENDING);
+		trace_afs_flock_ev(vnode, NULL, afs_flock_work_extending, 0);
 		spin_unlock(&vnode->lock);
 
 		ret = afs_extend_lock(vnode, key); /* RPC */
 		key_put(key);
 
-		if (ret < 0)
+		if (ret < 0) {
+			trace_afs_flock_ev(vnode, NULL, afs_flock_extend_fail,
+					   ret);
 			pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
 				   vnode->fid.vid, vnode->fid.vnode, ret);
+		}
 
 		spin_lock(&vnode->lock);
 
+		if (ret == -ENOENT) {
+			afs_kill_lockers_enoent(vnode);
+			spin_unlock(&vnode->lock);
+			return;
+		}
+
 		if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
 			goto again;
-		vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
+		afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED);
 
-		if (ret == 0)
-			afs_schedule_lock_extension(vnode);
-		else
+		if (ret != 0)
 			queue_delayed_work(afs_lock_manager, &vnode->lock_work,
 					   HZ * 10);
 		spin_unlock(&vnode->lock);
 		_leave(" [ext]");
 		return;
 
-		/* If we don't have a granted lock, then we must've been called
-		 * back by the server, and so if might be possible to get a
-		 * lock we're currently waiting for.
-		 */
+	/* If we're waiting for a callback to indicate lock release, we can't
+	 * actually rely on this, so need to recheck at regular intervals.  The
+	 * problem is that the server might not notify us if the lock just
+	 * expires (say because a client died) rather than being explicitly
+	 * released.
+	 */
 	case AFS_VNODE_LOCK_WAITING_FOR_CB:
-		_debug("get");
-
-		key = key_get(vnode->lock_key);
-		type = vnode->lock_type;
-		vnode->lock_state = AFS_VNODE_LOCK_SETTING;
+		_debug("retry");
+		afs_next_locker(vnode, 0);
 		spin_unlock(&vnode->lock);
+		return;
 
-		ret = afs_set_lock(vnode, key, type); /* RPC */
-		key_put(key);
-
-		spin_lock(&vnode->lock);
-		switch (ret) {
-		case -EWOULDBLOCK:
-			_debug("blocked");
-			break;
-		case 0:
-			_debug("acquired");
-			vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
-			/* Fall through */
-		default:
-			/* Pass the lock or the error onto the first locker in
-			 * the list - if they're looking for this type of lock.
-			 * If they're not, we assume that whoever asked for it
-			 * took a signal.
-			 */
-			if (list_empty(&vnode->pending_locks)) {
-				_debug("withdrawn");
-				vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
-				goto again;
-			}
-
-			fl = list_entry(vnode->pending_locks.next,
-					struct file_lock, fl_u.afs.link);
-			type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
-			if (vnode->lock_type != type) {
-				_debug("changed");
-				vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
-				goto again;
-			}
-
-			fl->fl_u.afs.state = ret;
-			if (ret == 0)
-				afs_grant_locks(vnode, fl);
-			else
-				list_del_init(&fl->fl_u.afs.link);
-			wake_up(&fl->fl_wait);
-			spin_unlock(&vnode->lock);
-			_leave(" [granted]");
-			return;
-		}
+	case AFS_VNODE_LOCK_DELETED:
+		afs_kill_lockers_enoent(vnode);
+		spin_unlock(&vnode->lock);
+		return;
 
 		/* Fall through */
 	defaul
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-05-07 20:51:58 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-05-07 20:51:58 -0700
commit	e5fef2a9732580c5bd30c0097f5e9091a3d58ce5 (patch)
tree	cb91b5a51f7e451300e06e203a50cbed8aabbc1c /fs
parent	149e703cb8bfcbdae46140b108bb6f7d2407df8f (diff)
parent	f5e4546347bc847be30b3cf904db5fc874b3c5dc (diff)