diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 20:51:58 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 20:51:58 -0700 |
commit | e5fef2a9732580c5bd30c0097f5e9091a3d58ce5 (patch) | |
tree | cb91b5a51f7e451300e06e203a50cbed8aabbc1c /fs | |
parent | 149e703cb8bfcbdae46140b108bb6f7d2407df8f (diff) | |
parent | f5e4546347bc847be30b3cf904db5fc874b3c5dc (diff) |
Merge tag 'afs-next-20190507' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull AFS updates from David Howells:
"A set of fix and development patches for AFS for 5.2.
Summary:
- Fix the AFS file locking so that sqlite can run on an AFS mount and
also so that firefox and gnome can use a homedir that's mounted
through AFS.
This required emulation of fine-grained locking when the server
will only support whole-file locks and no upgrade/downgrade. Four
modes are provided, settable by mount parameter:
"flock=local" - No reference to the server
"flock=openafs" - Fine-grained locks are local-only, whole-file
locks require sufficient server locks
"flock=strict" - All locks require sufficient server locks
"flock=write" - Always get an exclusive server lock
If the volume is a read-only or backup volume, then flock=local for
that volume.
- Log extra information for a couple of cases where the client mucks
up somehow: AFS vnode with undefined type and dir check failure -
in both cases we seem to end up with unfilled data, but the issues
happen infrequently and are difficult to reproduce at will.
- Implement silly rename for unlink() and rename().
- Set i_blocks so that du can get some information about usage.
- Fix xattr handlers to return the right amount of data and to not
overflow buffers.
- Implement getting/setting raw AFS and YFS ACLs as xattrs"
* tag 'afs-next-20190507' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
afs: Implement YFS ACL setting
afs: Get YFS ACLs and information through xattrs
afs: implement acl setting
afs: Get an AFS3 ACL as an xattr
afs: Fix getting the afs.fid xattr
afs: Fix the afs.cell and afs.volume xattr handlers
afs: Calculate i_blocks based on file size
afs: Log more information for "kAFS: AFS vnode with undefined type\n"
afs: Provide mount-time configurable byte-range file locking emulation
afs: Add more tracepoints
afs: Implement sillyrename for unlink and rename
afs: Add directory reload tracepoint
afs: Handle lock rpc ops failing on a file that got deleted
afs: Improve dir check failure reports
afs: Add file locking tracepoints
afs: Further fix file locking
afs: Fix AFS file locking to allow fine grained locks
afs: Calculate lock extend timer from set/extend reply reception
afs: Split wait from afs_make_call()
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/Makefile | 1 | ||||
-rw-r--r-- | fs/afs/afs_fs.h | 2 | ||||
-rw-r--r-- | fs/afs/dir.c | 167 | ||||
-rw-r--r-- | fs/afs/dir_silly.c | 239 | ||||
-rw-r--r-- | fs/afs/flock.c | 616 | ||||
-rw-r--r-- | fs/afs/fs_probe.c | 13 | ||||
-rw-r--r-- | fs/afs/fsclient.c | 277 | ||||
-rw-r--r-- | fs/afs/inode.c | 43 | ||||
-rw-r--r-- | fs/afs/internal.h | 64 | ||||
-rw-r--r-- | fs/afs/protocol_yfs.h | 6 | ||||
-rw-r--r-- | fs/afs/rxrpc.c | 33 | ||||
-rw-r--r-- | fs/afs/super.c | 34 | ||||
-rw-r--r-- | fs/afs/vl_probe.c | 14 | ||||
-rw-r--r-- | fs/afs/vlclient.c | 26 | ||||
-rw-r--r-- | fs/afs/xattr.c | 270 | ||||
-rw-r--r-- | fs/afs/yfsclient.c | 329 |
16 files changed, 1761 insertions, 373 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 0738e2bf5193..cbf31f6cd177 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -13,6 +13,7 @@ kafs-y := \ cmservice.o \ dir.o \ dir_edit.o \ + dir_silly.o \ dynroot.o \ file.o \ flock.o \ diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index ddfa88a7a9c0..18a54ca422f8 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -17,8 +17,10 @@ enum AFS_FS_Operations { FSFETCHDATA = 130, /* AFS Fetch file data */ + FSFETCHACL = 131, /* AFS Fetch file ACL */ FSFETCHSTATUS = 132, /* AFS Fetch file status */ FSSTOREDATA = 133, /* AFS Store file data */ + FSSTOREACL = 134, /* AFS Store file ACL */ FSSTORESTATUS = 135, /* AFS Store file status */ FSREMOVEFILE = 136, /* AFS Remove a file */ FSCREATEFILE = 137, /* AFS Create a file */ diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 8a2562e3a316..9a466be583d2 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -26,6 +26,7 @@ static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, struct dir_context *ctx); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); +static void afs_d_iput(struct dentry *dentry, struct inode *inode); static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, @@ -85,6 +86,7 @@ const struct dentry_operations afs_fs_dentry_operations = { .d_delete = afs_d_delete, .d_release = afs_d_release, .d_automount = afs_d_automount, + .d_iput = afs_d_iput, }; struct afs_lookup_one_cookie { @@ -160,6 +162,38 @@ error: } /* + * Check the contents of a directory that we've just read. + */ +static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req) +{ + struct afs_xdr_dir_page *dbuf; + unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block); + + for (i = 0; i < req->nr_pages; i++) + if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len)) + goto bad; + return true; + +bad: + pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n", + dvnode->fid.vid, dvnode->fid.vnode, + req->file_size, req->len, req->actual_len, req->remain); + pr_warn("DIR %llx %x %x %x\n", + req->pos, req->index, req->nr_pages, req->offset); + + for (i = 0; i < req->nr_pages; i++) { + dbuf = kmap(req->pages[i]); + for (j = 0; j < qty; j++) { + union afs_xdr_dir_block *block = &dbuf->blocks[j]; + + pr_warn("[%02x] %32phN\n", i * qty + j, block); + } + kunmap(req->pages[i]); + } + return false; +} + +/* * open an AFS directory file */ static int afs_dir_open(struct inode *inode, struct file *file) @@ -277,6 +311,7 @@ retry: goto error; if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + trace_afs_reload_dir(dvnode); ret = afs_fetch_data(dvnode, key, req); if (ret < 0) goto error_unlock; @@ -288,10 +323,8 @@ retry: /* Validate the data we just read. */ ret = -EIO; - for (i = 0; i < req->nr_pages; i++) - if (!afs_dir_check_page(dvnode, req->pages[i], - req->actual_len)) - goto error_unlock; + if (!afs_dir_check_pages(dvnode, req)) + goto error_unlock; // TODO: Trim excess pages @@ -743,7 +776,7 @@ success: ti = afs_iget(dir->i_sb, key, &cookie->fids[i], &cookie->statuses[i], &cookie->callbacks[i], - cbi); + cbi, dvnode); if (i == 0) { inode = ti; } else { @@ -875,8 +908,14 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, (void *)(unsigned long)dvnode->status.data_version; } d = d_splice_alias(inode, dentry); - if (!IS_ERR_OR_NULL(d)) + if (!IS_ERR_OR_NULL(d)) { d->d_fsdata = dentry->d_fsdata; + trace_afs_lookup(dvnode, &d->d_name, + inode ? AFS_FS_I(inode) : NULL); + } else { + trace_afs_lookup(dvnode, &dentry->d_name, + inode ? AFS_FS_I(inode) : NULL); + } return d; } @@ -1053,6 +1092,16 @@ zap: } /* + * Clean up sillyrename files on dentry removal. + */ +static void afs_d_iput(struct dentry *dentry, struct inode *inode) +{ + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + afs_silly_iput(dentry, inode); + iput(inode); +} + +/* * handle dentry release */ void afs_d_release(struct dentry *dentry) @@ -1076,7 +1125,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, return; inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key, - newfid, newstatus, newcb, fc->cbi); + newfid, newstatus, newcb, fc->cbi, fc->vnode); if (IS_ERR(inode)) { /* ENOMEM or EINTR at a really inconvenient time - just abandon * the new directory on the server. @@ -1194,6 +1243,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) goto error_key; } + if (vnode) { + ret = down_write_killable(&vnode->rmdir_lock); + if (ret < 0) + goto error_key; + } + ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { @@ -1212,6 +1267,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) } } + if (vnode) + up_write(&vnode->rmdir_lock); error_key: key_put(key); error: @@ -1228,9 +1285,9 @@ error: * However, if we didn't have a callback promise outstanding, or it was * outstanding on a different server, then it won't break it either... */ -static int afs_dir_remove_link(struct dentry *dentry, struct key *key, - unsigned long d_version_before, - unsigned long d_version_after) +int afs_dir_remove_link(struct dentry *dentry, struct key *key, + unsigned long d_version_before, + unsigned long d_version_after) { bool dir_valid; int ret = 0; @@ -1277,6 +1334,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; unsigned long d_version = (unsigned long)dentry->d_fsdata; + bool need_rehash = false; u64 data_version = dvnode->status.data_version; int ret; @@ -1300,6 +1358,21 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) goto error_key; } + spin_lock(&dentry->d_lock); + if (vnode && d_count(dentry) > 1) { + spin_unlock(&dentry->d_lock); + /* Start asynchronous writeout of the inode */ + write_inode_now(d_inode(dentry), 0); + ret = afs_sillyrename(dvnode, vnode, dentry, key); + goto error_key; + } + if (!d_unhashed(dentry)) { + /* Prevent a race with RCU lookup. */ + __d_drop(dentry); + need_rehash = true; + } + spin_unlock(&dentry->d_lock); + ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { @@ -1331,6 +1404,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) afs_edit_dir_for_unlink); } + if (need_rehash && ret < 0 && ret != -ENOENT) + d_rehash(dentry); + error_key: key_put(key); error: @@ -1551,6 +1627,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct afs_fs_cursor fc; struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; + struct dentry *tmp = NULL, *rehash = NULL; + struct inode *new_inode; struct key *key; u64 orig_data_version, new_data_version; bool new_negative = d_is_negative(new_dentry); @@ -1559,6 +1637,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, if (flags) return -EINVAL; + /* Don't allow silly-rename files be moved around. */ + if (old_dentry->d_flags & DCACHE_NFSFS_RENAMED) + return -EINVAL; + vnode = AFS_FS_I(d_inode(old_dentry)); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); @@ -1577,12 +1659,48 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, goto error; } + /* For non-directories, check whether the target is busy and if so, + * make a copy of the dentry and then do a silly-rename. If the + * silly-rename succeeds, the copied dentry is hashed and becomes the + * new target. + */ + if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) { + /* To prevent any new references to the target during the + * rename, we unhash the dentry in advance. + */ + if (!d_unhashed(new_dentry)) { + d_drop(new_dentry); + rehash = new_dentry; + } + + if (d_count(new_dentry) > 2) { + /* copy the target dentry's name */ + ret = -ENOMEM; + tmp = d_alloc(new_dentry->d_parent, + &new_dentry->d_name); + if (!tmp) + goto error_rehash; + + ret = afs_sillyrename(new_dvnode, + AFS_FS_I(d_inode(new_dentry)), + new_dentry, key); + if (ret) + goto error_rehash; + + new_dentry = tmp; + rehash = NULL; + new_negative = true; + orig_data_version = orig_dvnode->status.data_version; + new_data_version = new_dvnode->status.data_version; + } + } + ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) { if (orig_dvnode != new_dvnode) { if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); - goto error_key; + goto error_rehash; } } while (afs_select_fileserver(&fc)) { @@ -1599,25 +1717,42 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, mutex_unlock(&new_dvnode->io_lock); ret = afs_end_vnode_operation(&fc); if (ret < 0) - goto error_key; + goto error_rehash; } if (ret == 0) { + if (rehash) + d_rehash(rehash); if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, - afs_edit_dir_for_rename); + afs_edit_dir_for_rename_0); if (!new_negative && test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, - afs_edit_dir_for_rename); + afs_edit_dir_for_rename_1); if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) afs_edit_dir_add(new_dvnode, &new_dentry->d_name, - &vnode->fid, afs_edit_dir_for_rename); + &vnode->fid, afs_edit_dir_for_rename_2); + + new_inode = d_inode(new_dentry); + if (new_inode) { + spin_lock(&new_inode->i_lock); + if (new_inode->i_nlink > 0) + drop_nlink(new_inode); + spin_unlock(&new_inode->i_lock); + } + d_move(old_dentry, new_dentry); + goto error_tmp; } -error_key: +error_rehash: + if (rehash) + d_rehash(rehash); +error_tmp: + if (tmp) + dput(tmp); key_put(key); error: _leave(" = %d", ret); diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c new file mode 100644 index 000000000000..f6f89fdab6b2 --- /dev/null +++ b/fs/afs/dir_silly.c @@ -0,0 +1,239 @@ +/* AFS silly rename handling + * + * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * - Derived from NFS's sillyrename. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/fsnotify.h> +#include "internal.h" + +/* + * Actually perform the silly rename step. + */ +static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode, + struct dentry *old, struct dentry *new, + struct key *key) +{ + struct afs_fs_cursor fc; + u64 dir_data_version = dvnode->status.data_version; + int ret = -ERESTARTSYS; + + _enter("%pd,%pd", old, new); + + trace_afs_silly_rename(vnode, false); + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = afs_calc_vnode_cb_break(dvnode); + afs_fs_rename(&fc, old->d_name.name, + dvnode, new->d_name.name, + dir_data_version, dir_data_version); + } + + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + if (ret == 0) { + spin_lock(&old->d_lock); + old->d_flags |= DCACHE_NFSFS_RENAMED; + spin_unlock(&old->d_lock); + if (dvnode->silly_key != key) { + key_put(dvnode->silly_key); + dvnode->silly_key = key_get(key); + } + + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_remove(dvnode, &old->d_name, + afs_edit_dir_for_silly_0); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_add(dvnode, &new->d_name, + &vnode->fid, afs_edit_dir_for_silly_1); + + /* vfs_unlink and the like do not issue this when a file is + * sillyrenamed, so do it here. + */ + fsnotify_nameremove(old, 0); + } + + _leave(" = %d", ret); + return ret; +} + +/** + * afs_sillyrename - Perform a silly-rename of a dentry + * + * AFS is stateless and the server doesn't know when the client is holding a + * file open. To prevent application problems when a file is unlinked while + * it's still open, the client performs a "silly-rename". That is, it renames + * the file to a hidden file in the same directory, and only performs the + * unlink once the last reference to it is put. + * + * The final cleanup is done during dentry_iput. + */ +int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode, + struct dentry *dentry, struct key *key) +{ + static unsigned int sillycounter; + struct dentry *sdentry = NULL; + unsigned char silly[16]; + int ret = -EBUSY; + + _enter(""); + + /* We don't allow a dentry to be silly-renamed twice. */ + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + return -EBUSY; + + sdentry = NULL; + do { + int slen; + + dput(sdentry); + sillycounter++; + + /* Create a silly name. Note that the ".__afs" prefix is + * understood by the salvager and must not be changed. + */ + slen = scnprintf(silly, sizeof(silly), ".__afs%04X", sillycounter); + sdentry = lookup_one_len(silly, dentry->d_parent, slen); + + /* N.B. Better to return EBUSY here ... it could be dangerous + * to delete the file while it's in use. + */ + if (IS_ERR(sdentry)) + goto out; + } while (!d_is_negative(sdentry)); + + ihold(&vnode->vfs_inode); + + ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key); + switch (ret) { + case 0: + /* The rename succeeded. */ + d_move(dentry, sdentry); + break; + case -ERESTARTSYS: + /* The result of the rename is unknown. Play it safe by forcing + * a new lookup. + */ + d_drop(dentry); + d_drop(sdentry); + } + + iput(&vnode->vfs_inode); + dput(sdentry); +out: + _leave(" = %d", ret); + return ret; +} + +/* + * Tell the server to remove a sillyrename file. + */ +static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode, + struct dentry *dentry, struct key *key) +{ + struct afs_fs_cursor fc; + u64 dir_data_version = dvnode->status.data_version; + int ret = -ERESTARTSYS; + + _enter(""); + + trace_afs_silly_rename(vnode, true); + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = afs_calc_vnode_cb_break(dvnode); + + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) && + !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) { + yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name, + dir_data_version); + if (fc.ac.error != -ECONNABORTED || + fc.ac.abort_code != RXGEN_OPCODE) + continue; + set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags); + } + + afs_fs_remove(&fc, vnode, dentry->d_name.name, false, + dir_data_version); + } + + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + if (ret == 0) { + drop_nlink(&vnode->vfs_inode); + if (vnode->vfs_inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } + } + if (ret == 0 && + test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + afs_edit_dir_remove(dvnode, &dentry->d_name, + afs_edit_dir_for_unlink); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Remove sillyrename file on iput. + */ +int afs_silly_iput(struct dentry *dentry, struct inode *inode) +{ + struct afs_vnode *dvnode = AFS_FS_I(d_inode(dentry->d_parent)); + struct afs_vnode *vnode = AFS_FS_I(inode); + struct dentry *alias; + int ret; + + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + + _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode); + + down_read(&dvnode->rmdir_lock); + + alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq); + if (IS_ERR(alias)) { + up_read(&dvnode->rmdir_lock); + return 0; + } + + if (!d_in_lookup(alias)) { + /* We raced with lookup... See if we need to transfer the + * sillyrename information to the aliased dentry. + */ + ret = 0; + spin_lock(&alias->d_lock); + if (d_really_is_positive(alias) && + !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { + alias->d_flags |= DCACHE_NFSFS_RENAMED; + ret = 1; + } + spin_unlock(&alias->d_lock); + up_read(&dvnode->rmdir_lock); + dput(alias); + return ret; + } + + /* Stop lock-release from complaining. */ + spin_lock(&vnode->lock); + vnode->lock_state = AFS_VNODE_LOCK_DELETED; + trace_afs_flock_ev(vnode, NULL, afs_flock_silly_delete, 0); + spin_unlock(&vnode->lock); + + afs_do_silly_unlink(dvnode, vnode, dentry, dvnode->silly_key); + up_read(&dvnode->rmdir_lock); + d_lookup_done(alias); + dput(alias); + return 1; +} diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 6a0174258382..adc88eff7849 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -13,9 +13,11 @@ #define AFS_LOCK_GRANTED 0 #define AFS_LOCK_PENDING 1 +#define AFS_LOCK_YOUR_TRY 2 struct workqueue_struct *afs_lock_manager; +static void afs_next_locker(struct afs_vnode *vnode, int error); static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl); static void afs_fl_release_private(struct file_lock *fl); @@ -24,6 +26,14 @@ static const struct file_lock_operations afs_lock_ops = { .fl_release_private = afs_fl_release_private, }; +static inline void afs_set_lock_state(struct afs_vnode *vnode, enum afs_lock_state state) +{ + _debug("STATE %u -> %u", vnode->lock_state, state); + vnode->lock_state = state; +} + +static atomic_t afs_file_lock_debug_id; + /* * if the callback is broken on this vnode, then the lock may now be available */ @@ -31,7 +41,14 @@ void afs_lock_may_be_available(struct afs_vnode *vnode) { _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); - queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0); + if (vnode->lock_state != AFS_VNODE_LOCK_WAITING_FOR_CB) + return; + + spin_lock(&vnode->lock); + if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) + afs_next_locker(vnode, 0); + trace_afs_flock_ev(vnode, NULL, afs_flock_callback_break, 0); + spin_unlock(&vnode->lock); } /* @@ -40,8 +57,35 @@ void afs_lock_may_be_available(struct afs_vnode *vnode) */ static void afs_schedule_lock_extension(struct afs_vnode *vnode) { - queue_delayed_work(afs_lock_manager, &vnode->lock_work, - AFS_LOCKWAIT * HZ / 2); + ktime_t expires_at, now, duration; + u64 duration_j; + + expires_at = ktime_add_ms(vnode->locked_at, AFS_LOCKWAIT * 1000 / 2); + now = ktime_get_real(); + duration = ktime_sub(expires_at, now); + if (duration <= 0) + duration_j = 0; + else + duration_j = nsecs_to_jiffies(ktime_to_ns(duration)); + + queue_delayed_work(afs_lock_manager, &vnode->lock_work, duration_j); +} + +/* + * In the case of successful completion of a lock operation, record the time + * the reply appeared and start the lock extension timer. + */ +void afs_lock_op_done(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + + if (call->error == 0) { + spin_lock(&vnode->lock); + trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0); + vnode->locked_at = call->reply_time; + afs_schedule_lock_extension(vnode); + spin_unlock(&vnode->lock); + } } /* @@ -49,22 +93,90 @@ static void afs_schedule_lock_extension(struct afs_vnode *vnode) * first lock in the queue is itself a readlock) * - the caller must hold the vnode lock */ -static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl) +static void afs_grant_locks(struct afs_vnode *vnode) { struct file_lock *p, *_p; + bool exclusive = (vnode->lock_type == AFS_LOCK_WRITE); - list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks); - if (fl->fl_type == F_RDLCK) { - list_for_each_entry_safe(p, _p, &vnode->pending_locks, - fl_u.afs.link) { - if (p->fl_type == F_RDLCK) { - p->fl_u.afs.state = AFS_LOCK_GRANTED; - list_move_tail(&p->fl_u.afs.link, - &vnode->granted_locks); - wake_up(&p->fl_wait); - } + list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) { + if (!exclusive && p->fl_type == F_WRLCK) + continue; + + list_move_tail(&p->fl_u.afs.link, &vnode->granted_locks); + p->fl_u.afs.state = AFS_LOCK_GRANTED; + trace_afs_flock_op(vnode, p, afs_flock_op_grant); + wake_up(&p->fl_wait); + } +} + +/* + * If an error is specified, reject every pending lock that matches the + * authentication and type of the lock we failed to get. If there are any + * remaining lockers, try to wake up one of them to have a go. + */ +static void afs_next_locker(struct afs_vnode *vnode, int error) +{ + struct file_lock *p, *_p, *next = NULL; + struct key *key = vnode->lock_key; + unsigned int fl_type = F_RDLCK; + + _enter(""); + + if (vnode->lock_type == AFS_LOCK_WRITE) + fl_type = F_WRLCK; + + list_for_each_entry_safe(p, _p, &vnode->pending_locks, fl_u.afs.link) { + if (error && + p->fl_type == fl_type && + afs_file_key(p->fl_file) == key) { + list_del_init(&p->fl_u.afs.link); + p->fl_u.afs.state = error; + wake_up(&p->fl_wait); } + + /* Select the next locker to hand off to. */ + if (next && + (next->fl_type == F_WRLCK || p->fl_type == F_RDLCK)) + continue; + next = p; } + + vnode->lock_key = NULL; + key_put(key); + + if (next) { + afs_set_lock_state(vnode, AFS_VNODE_LOCK_SETTING); + next->fl_u.afs.state = AFS_LOCK_YOUR_TRY; + trace_afs_flock_op(vnode, next, afs_flock_op_wake); + wake_up(&next->fl_wait); + } else { + afs_set_lock_state(vnode, AFS_VNODE_LOCK_NONE); + trace_afs_flock_ev(vnode, NULL, afs_flock_no_lockers, 0); + } + + _leave(""); +} + +/* + * Kill off all waiters in the the pending lock queue due to the vnode being + * deleted. + */ +static void afs_kill_lockers_enoent(struct afs_vnode *vnode) +{ + struct file_lock *p; + + afs_set_lock_state(vnode, AFS_VNODE_LOCK_DELETED); + + while (!list_empty(&vnode->pending_locks)) { + p = list_entry(vnode->pending_locks.next, + struct file_lock, fl_u.afs.link); + list_del_init(&p->fl_u.afs.link); + p->fl_u.afs.state = -ENOENT; + wake_up(&p->fl_wait); + } + + key_put(vnode->lock_key); + vnode->lock_key = NULL; } /* @@ -170,8 +282,6 @@ void afs_lock_work(struct work_struct *work) { struct afs_vnode *vnode = container_of(work, struct afs_vnode, lock_work.work); - struct file_lock *fl, *next; - afs_lock_type_t type; struct key *key; int ret; @@ -183,35 +293,28 @@ again: _debug("wstate %u for %p", vnode->lock_state, vnode); switch (vnode->lock_state) { case AFS_VNODE_LOCK_NEED_UNLOCK: - _debug("unlock"); - vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING; + afs_set_lock_state(vnode, AFS_VNODE_LOCK_UNLOCKING); + trace_afs_flock_ev(vnode, NULL, afs_flock_work_unlocking, 0); spin_unlock(&vnode->lock); /* attempt to release the server lock; if it fails, we just * wait 5 minutes and it'll expire anyway */ ret = afs_release_lock(vnode, vnode->lock_key); - if (ret < 0) + if (ret < 0 && vnode->lock_state != AFS_VNODE_LOCK_DELETED) { + trace_afs_flock_ev(vnode, NULL, afs_flock_release_fail, + ret); printk(KERN_WARNING "AFS:" " Failed to release lock on {%llx:%llx} error %d\n", vnode->fid.vid, vnode->fid.vnode, ret); - - spin_lock(&vnode->lock); - key_put(vnode->lock_key); - vnode->lock_key = NULL; - vnode->lock_state = AFS_VNODE_LOCK_NONE; - - if (list_empty(&vnode->pending_locks)) { - spin_unlock(&vnode->lock); - return; } - /* The new front of the queue now owns the state variables. */ - next = list_entry(vnode->pending_locks.next, - struct file_lock, fl_u.afs.link); - vnode->lock_key = key_get(afs_file_key(next->fl_file)); - vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; - vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB; - goto again; + spin_lock(&vnode->lock); + if (ret == -ENOENT) + afs_kill_lockers_enoent(vnode); + else + afs_next_locker(vnode, 0); + spin_unlock(&vnode->lock); + return; /* If we've already got a lock, then it must be time to extend that * lock as AFS locks time out after 5 minutes. @@ -222,86 +325,55 @@ again: ASSERT(!list_empty(&vnode->granted_locks)); key = key_get(vnode->lock_key); - vnode->lock_state = AFS_VNODE_LOCK_EXTENDING; + afs_set_lock_state(vnode, AFS_VNODE_LOCK_EXTENDING); + trace_afs_flock_ev(vnode, NULL, afs_flock_work_extending, 0); spin_unlock(&vnode->lock); ret = afs_extend_lock(vnode, key); /* RPC */ key_put(key); - if (ret < 0) + if (ret < 0) { + trace_afs_flock_ev(vnode, NULL, afs_flock_extend_fail, + ret); pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n", vnode->fid.vid, vnode->fid.vnode, ret); + } spin_lock(&vnode->lock); + if (ret == -ENOENT) { + afs_kill_lockers_enoent(vnode); + spin_unlock(&vnode->lock); + return; + } + if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING) goto again; - vnode->lock_state = AFS_VNODE_LOCK_GRANTED; + afs_set_lock_state(vnode, AFS_VNODE_LOCK_GRANTED); - if (ret == 0) - afs_schedule_lock_extension(vnode); - else + if (ret != 0) queue_delayed_work(afs_lock_manager, &vnode->lock_work, HZ * 10); spin_unlock(&vnode->lock); _leave(" [ext]"); return; - /* If we don't have a granted lock, then we must've been called - * back by the server, and so if might be possible to get a - * lock we're currently waiting for. - */ + /* If we're waiting for a callback to indicate lock release, we can't + * actually rely on this, so need to recheck at regular intervals. The + * problem is that the server might not notify us if the lock just + * expires (say because a client died) rather than being explicitly + * released. + */ case AFS_VNODE_LOCK_WAITING_FOR_CB: - _debug("get"); - - key = key_get(vnode->lock_key); - type = vnode->lock_type; - vnode->lock_state = AFS_VNODE_LOCK_SETTING; + _debug("retry"); + afs_next_locker(vnode, 0); spin_unlock(&vnode->lock); + return; - ret = afs_set_lock(vnode, key, type); /* RPC */ - key_put(key); - - spin_lock(&vnode->lock); - switch (ret) { - case -EWOULDBLOCK: - _debug("blocked"); - break; - case 0: - _debug("acquired"); - vnode->lock_state = AFS_VNODE_LOCK_GRANTED; - /* Fall through */ - default: - /* Pass the lock or the error onto the first locker in - * the list - if they're looking for this type of lock. - * If they're not, we assume that whoever asked for it - * took a signal. - */ - if (list_empty(&vnode->pending_locks)) { - _debug("withdrawn"); - vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK; - goto again; - } - - fl = list_entry(vnode->pending_locks.next, - struct file_lock, fl_u.afs.link); - type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; - if (vnode->lock_type != type) { - _debug("changed"); - vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK; - goto again; - } - - fl->fl_u.afs.state = ret; - if (ret == 0) - afs_grant_locks(vnode, fl); - else - list_del_init(&fl->fl_u.afs.link); - wake_up(&fl->fl_wait); - spin_unlock(&vnode->lock); - _leave(" [granted]"); - return; - } + case AFS_VNODE_LOCK_DELETED: + afs_kill_lockers_enoent(vnode); + spin_unlock(&vnode->lock); + return; /* Fall through */ defaul |