summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/porting16
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c2
-rw-r--r--arch/s390/hypfs/inode.c2
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/afs/inode.c2
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/exofs/inode.c4
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext3/inode.c6
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fs-writeback.c336
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c15
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/minix/inode.c2
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/pstore/inode.c2
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--include/linux/fs.h13
-rw-r--r--include/linux/writeback.h10
-rw-r--r--include/trace/events/writeback.h36
-rw-r--r--ipc/mqueue.c2
-rw-r--r--mm/page-writeback.c3
-rw-r--r--mm/shmem.c2
56 files changed, 319 insertions, 220 deletions
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 74acd9618819..8c91d1057d9a 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -297,7 +297,8 @@ in the beginning of ->setattr unconditionally.
be used instead. It gets called whenever the inode is evicted, whether it has
remaining links or not. Caller does *not* evict the pagecache or inode-associated
metadata buffers; getting rid of those is responsibility of method, as it had
-been for ->delete_inode().
+been for ->delete_inode(). Caller makes sure async writeback cannot be running
+for the inode while (or after) ->evict_inode() is called.
->drop_inode() returns int now; it's called on final iput() with
inode->i_lock held and it returns true if filesystems wants the inode to be
@@ -306,14 +307,11 @@ updated appropriately. generic_delete_inode() is also alive and it consists
simply of return 1. Note that all actual eviction work is done by caller after
->drop_inode() returns.
- clear_inode() is gone; use end_writeback() instead. As before, it must
-be called exactly once on each call of ->evict_inode() (as it used to be for
-each call of ->delete_inode()). Unlike before, if you are using inode-associated
-metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to
-call invalidate_inode_buffers() before end_writeback().
- No async writeback (and thus no calls of ->write_inode()) will happen
-after end_writeback() returns, so actions that should not overlap with ->write_inode()
-(e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call.
+ As before, clear_inode() must be called exactly once on each call of
+->evict_inode() (as it used to be for each call of ->delete_inode()). Unlike
+before, if you are using inode-associated metadata buffers (i.e.
+mark_buffer_dirty_inode()), it's your responsibility to call
+invalidate_inode_buffers() before clear_inode().
NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out
if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput()
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 1d75c92ea8fb..66519d263da7 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -151,7 +151,7 @@ static void
spufs_evict_inode(struct inode *inode)
{
struct spufs_inode_info *ei = SPUFS_I(inode);
- end_writeback(inode);
+ clear_inode(inode);
if (ei->i_ctx)
put_spu_context(ei->i_ctx);
if (ei->i_gang)
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 6a2cb560e968..73dae8b9b77a 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -115,7 +115,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
static void hypfs_evict_inode(struct inode *inode)
{
- end_writeback(inode);
+ clear_inode(inode);
kfree(inode->i_private);
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 014c8dd62962..57ccb7537dae 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -448,7 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
struct v9fs_inode *v9inode = V9FS_I(inode);
truncate_inode_pages(inode->i_mapping, 0);
- end_writeback(inode);
+ clear_inode(inode);
filemap_fdatawrite(inode->i_mapping);
#ifdef CONFIG_9P_FSCACHE
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 88a4b0b50058..8bc4a59f4e7e 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -264,7 +264,7 @@ affs_evict_inode(struct inode *inode)
}
invalidate_inode_buffers(inode);
- end_writeback(inode);
+ clear_inode(inode);
affs_free_prealloc(inode);
cache_page = (unsigned long)AFFS_I(inode)->i_lc;
if (cache_page) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d890ae3b2ce6..95cffd38239f 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -423,7 +423,7 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ clear_inode(inode);
afs_give_up_callback(vnode);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 6e488ebe7784..8a4fed8ead30 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -100,7 +100,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
static void autofs4_evict_inode(struct inode *inode)
{
- end_writeback(inode);
+ clear_inode(inode);
kfree(inode->i_private);
}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index e23dc7c8b884..9870417c26e7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -174,7 +174,7 @@ static void bfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
invalidate_inode_buffers(inode);
- end_writeback(inode);
+ clear_inode(inode);
if (inode->i_nlink)
return;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 613aa0618235..790b3cddca67 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -505,7 +505,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
static void bm_evict_inode(struct inode *inode)
{
- end_writeback(inode);
+ clear_inode(inode);
kfree(inode->i_private);
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ba11c30f302d..c2bbe1fb1326 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -487,7 +487,7 @@ static void bdev_evict_inode(struct inode *inode)
struct list_head *p;
truncate_inode_pages(&inode->i_data, 0);
invalidate_inode_buffers(inode); /* is it needed here? */
- end_writeback(inode);
+ clear_inode(inode);
spin_lock(&bdev_lock);
while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
__bd_forget(list_entry(p, struct inode, i_devices));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 61b16c641ce0..ceb7b9c9edcc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3756,7 +3756,7 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
no_delete:
- end_writeback(inode);
+ clear_inode(inode);
return;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 541ef81f6ae8..0a0fa0250e99 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -272,7 +272,7 @@ static void
cifs_evict_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ clear_inode(inode);
cifs_fscache_release_inode_cookie(inode);
}
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2870597b5c9d..f1813120d753 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -244,7 +244,7 @@ static void coda_put_super(struct super_block *sb)
static void coda_evict_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ clear_inode(inode);
coda_cache_clear_inode(inode);
}
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2dd946b636d2..e879cf8ff0b1 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -133,7 +133,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
static void ecryptfs_evict_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ clear_inode(inode);
iput(ecryptfs_inode_to_lower(inode));
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ea5e1f97806a..5badb0c039de 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1473,7 +1473,7 @@ void exofs_evict_inode(struct inode *inode)
goto no_delete;
inode->i_size = 0;
- end_writeback(inode);
+ clear_inode(inode);
/* if we are deleting an obj that hasn't been created yet, wait.
* This also makes sure that create_done cannot be called with an
@@ -1503,5 +1503,5 @@ void exofs_evict_inode(struct inode *inode)
return;
no_delete:
- end_writeback(inode);
+ clear_inode(inode);
}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index f9fa95f8443d..264d315f6c47 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -90,7 +90,7 @@ void ext2_evict_inode(struct inode * inode)
}
invalidate_inode_buffers(inode);
- end_writeback(inode);
+ clear_inode(inode);
ext2_discard_reservation(inode);
rsv = EXT2_I(inode)->i_block_alloc_info;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a09790a412b1..9a4a5c48b1c9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -272,18 +272,18 @@ void ext3_evict_inode (struct inode *inode)
if (ext3_mark_inode_dirty(handle, inode)) {
/* If that failed, just dquot_drop() and be done with that */
dquot_drop(inode);
- end_writeback(inode);
+ clear_inode(inode);
} else {
ext3_xattr_delete_inode(handle, inode);
dquot_free_inode(inode);
dquot_drop(inode);
- end_writeback(inode);
+ clear_inode(inode);
ext3_free_inode(handle, inode);
}
ext3_journal_stop(handle);
return;
no_delete:
- end_writeback(inode);
+ clear_inode(inode);
dquot_drop(inode);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1867a98e0c49..35b5954489ee 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1007,7 +1007,7 @@ static void destroy_inodecache(void)
void ext4_clear_inode(struct inode *inode)
{
invalidate_inode_buffers(inode);
- end_writeback(inode);
+ clear_inode(inode);
dquot_drop(inode);
ext4_discard_preallocations(inode);
if (EXT4_I(inode)->jinode) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 21687e31acc0..b3d290c1b513 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -454,7 +454,7 @@ static void fat_evict_inode(struct inode *inode)
fat_truncate_blocks(inode, 0);
}
invalidate_inode_buffers(inode);
- end_writeback(inode);
+ clear_inode(inode);
fat_cache_inval_inode(inode);
fat_detach(inode);
}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index cf9ef918a2a9..ef67c95f12d4 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -355,6 +355,6 @@ void
vxfs_evict_inode(struct inode *ip)
{
truncate_inode_pages(&ip->i_data, 0);
- end_writeback(ip);
+ clear_inode(ip);
call_rcu(&ip->i_rcu, vxfs_i_callback);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 539f36cf3e4a..8d2fb8c88cf3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -231,11 +231,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
static void inode_sync_complete(struct inode *inode)
{
- /*
- * Prevent speculative execution through
- * spin_unlock(&wb->list_lock);
- */
-
+ inode->i_state &= ~I_SYNC;
+ /* Waiters must see I_SYNC cleared before being woken up */
smp_mb();
wake_up_bit(&inode->i_state, __I_SYNC);
}
@@ -329,10 +326,12 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
- * Wait for writeback on an inode to complete.
+ * Wait for writeback on an inode to complete. Called with i_lock held.
+ * Caller must make sure inode cannot go away when we drop i_lock.
*/
-static void inode_wait_for_writeback(struct inode *inode,
- struct bdi_writeback *wb)
+static void __inode_wait_for_writeback(struct inode *inode)
+ __releases(inode->i_lock)
+ __acquires(inode->i_lock)
{
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
wait_queue_head_t *wqh;
@@ -340,70 +339,119 @@ static void inode_wait_for_writeback(struct inode *inode,
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
while (inode->i_state & I_SYNC) {
spin_unlock(&inode->i_lock);
- spin_unlock(&wb->list_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
- spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
}
}
/*
- * Write out an inode's dirty pages. Called under wb->list_lock and
- * inode->i_lock. Either the caller has an active reference on the inode or
- * the inode has I_WILL_FREE set.
- *
- * If `wait' is set, wait on the writeout.
- *
- * The whole writeout design is quite complex and fragile. We want to avoid
- * starvation of particular inodes when others are being redirtied, prevent
- * livelocks, etc.
+ * Wait for writeback on an inode to complete. Caller must have inode pinned.
*/
-static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
- struct writeback_control *wbc)
+void inode_wait_for_writeback(struct inode *inode)
{
- struct address_space *mapping = inode->i_mapping;
- long nr_to_write = wbc->nr_to_write;
- unsigned dirty;
- int ret;
+ spin_lock(&inode->i_lock);
+ __inode_wait_for_writeback(inode);
+ spin_unlock(&inode->i_lock);
+}
- assert_spin_locked(&wb->list_lock);
- assert_spin_locked(&inode->i_lock);
+/*
+ * Sleep until I_SYNC is cleared. This function must be called with i_lock
+ * held and drops it. It is aimed for callers not holding any inode reference
+ * so once i_lock is dropped, inode can go away.
+ */
+static void inode_sleep_on_writeback(struct inode *inode)
+ __releases(inode->i_lock)
+{
+ DEFINE_WAIT(wait);
+ wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+ int sleep;
- if (!atomic_read(&inode->i_count))
- WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
- else
- WARN_ON(inode->i_state & I_WILL_FREE);
+ prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+ sleep = inode->i_state & I_SYNC;
+ spin_unlock(&inode->i_lock);
+ if (sleep)
+ schedule();
+ finish_wait(wqh, &wait);
+}
- if (inode->i_state & I_SYNC) {
+/*
+ * Find proper writeback list for the inode depending on its current state and
+ * possibly also change of its state while we were doing writeback. Here we
+ * handle things such as livelock prevention or fairness of writeback among
+ * inodes. This function can be called only by flusher thread - noone else
+ * processes all inodes in writeback lists and requeueing inodes behind flusher
+ * thread's back can have unexpected consequences.
+ */
+static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
+ struct writeback_control *wbc)
+{
+ if (inode->i_state & I_FREEING)
+ return;
+
+ /*
+ * Sync livelock prevention. Each inode is tagged and synced in one
+ * shot. If still dirty, it will be redirty_tail()'ed below. Update
+ * the dirty time to prevent enqueue and sync it again.
+ */
+ if ((inode->i_state & I_DIRTY) &&
+ (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
+ inode->dirtied_when = jiffies;
+
+ if (wbc->pages_skipped) {
/*
- * If this inode is locked for writeback and we are not doing
- * writeback-for-data-integrity, move it to b_more_io so that
- * writeback can proceed with the other inodes on s_io.
- *
- * We'll have another go at writing back this inode when we
- * completed a full scan of b_io.
+ * writeback is not making progress due to locked
+ * buffers. Skip this inode for now.
*/
- if (wbc->sync_mode != WB_SYNC_ALL) {
+ redirty_tail(inode, wb);
+ return;
+ }
+
+ if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+ /*
+ * We didn't write back all the pages. nfs_writepages()
+ * sometimes bales out without doing anything.
+ */
+ if (wbc->nr_to_write <= 0) {
+ /* Slice used up. Queue for next turn. */
requeue_io(inode, wb);
- trace_writeback_single_inode_requeue(inode, wbc,
- nr_to_write);
- return 0;
+ } else {
+ /*
+ * Writeback blocked by something other than
+ * congestion. Delay the inode for some time to
+ * avoid spinning on the CPU (100% iowait)
+ * retrying writeback of the dirty page/inode
+ * that cannot be performed immediately.
+ */
+ redirty_tail(inode, wb);
}
-
+ } else if (inode->i_state & I_DIRTY) {
/*
- * It's a data-integrity sync. We must wait.
+ * Filesystems can dirty the inode during writeback operations,
+ * such as delayed allocation during submission or metadata
+ * updates after data IO completion.
*/
- inode_wait_for_writeback(inode, wb);
+ redirty_tail(inode, wb);
+ } else {
+ /* The inode is clean. Remove from writeback lists. */
+ list_del_init(&inode->i_wb_list);
}
+}
- BUG_ON(inode->i_state & I_SYNC);
+/*
+ * Write out an inode and its dirty pages. Do not update the writeback list
+ * linkage. That is left to the caller. The caller is also responsible for
+ * setting I_SYNC flag and calling inode_sync_complete() to clear it.
+ */
+static int
+__writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
+ struct writeback_control *wbc)
+{
+ struct address_space *mapping = inode->i_mapping;
+ long nr_to_write = wbc->nr_to_write;
+ unsigned dirty;
+ int ret;
- /* Set I_SYNC, reset I_DIRTY_PAGES */
- inode->i_state |= I_SYNC;
- inode->i_state &= ~I_DIRTY_PAGES;
- spin_unlock(&inode->i_lock);
- spin_unlock(&wb->list_lock);
+ WARN_ON(!(inode->i_state & I_SYNC));
ret = do_writepages(mapping, wbc);
@@ -424,6 +472,9 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* write_inode()
*/
spin_lock(&inode->i_lock);
+ /* Clear I_DIRTY_PAGES if we've written out all dirty pages */
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ inode->i_state &= ~I_DIRTY_PAGES;
dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
spin_unlock(&inode->i_lock);
@@ -433,60 +484,67 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
if (ret == 0)
ret = err;
}
+ trace_writeback_single_inode(inode, wbc, nr_to_write);
+ return ret;
+}
+
+/*
+ * Write out an inode's dirty pages. Either the caller has an active reference
+ * on the inode or the inode has I_WILL_FREE set.
+ *
+ * This function is designed to be called for writing back one inode which
+ * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
+ * and does more profound writeback list handling in writeback_sb_inodes().
+ */
+static int
+writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
+ struct writeback_control *wbc)
+{
+ int ret = 0;
- spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
- inode->i_state &= ~I_SYNC;
- if (!(inode->i_state & I_FREEING)) {
+ if (!atomic_read(&inode->i_count))
+ WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
+ else
+ WARN_ON(inode->i_state & I_WILL_FREE);
+
+ if (inode->i_state & I_SYNC) {
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ goto out;
/*
- * Sync livelock prevention. Each inode is tagged and synced in
- * one shot. If still dirty, it will be redirty_tail()'ed below.
- * Update the dirty time to prevent enqueue and sync it again.
+ * It's a data-integrity sync. We must wait. Since callers hold
+ * inode reference or inode has I_WILL_FREE set, it cannot go
+ * away under us.
*/
- if ((inode->i_state & I_DIRTY) &&
- (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
- inode->dirtied_when = jiffies;
-
- if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- /*
- * We didn't write back all the pages. nfs_writepages()
- * sometimes bales out without doing anything.
- */
- inode->i_state |= I_DIRTY_PAGES;
- if (wbc->nr_to_write <= 0) {
- /*
- * slice used up: queue for next turn
- */
- requeue_io(inode, wb);
- } else {
- /*
- * Writeback blocked by something other than
- * congestion. Delay the inode for some time to
- * avoid spinning on the CPU (100% iowait)
- * retrying writeback of the dirty page/inode
- * that cannot be performed immediately.
- */
- redirty_tail(inode, wb);
- }
- } else if (inode->i_state & I_DIRTY) {
- /*
- * Filesystems can dirty the inode during writeback
- * operations, such as delayed allocation during
- * submission or metadata updates after data IO
- * completion.
- */
- redirty_tail(inode, wb);
- } else {
- /*
- * The inode is clean. At this point we either have
- * a reference to the inode or it's on it's way out.
- * No need to add it back to the LRU.
- */
- list_del_init(&inode->i_wb_list);
- }
+ __inode_wait_for_writeback(inode);
}
+ WARN_ON(inode->i_state & I_SYNC);
+ /*
+ * Skip inode if it is clean. We don't want to mess with writeback
+ * lists in this function since flusher thread may be doing for example
+ * sync in parallel and if we move the inode, it could get skipped. So
+ * here we make sure inode is on some writeback list and leave it there
+ * unless we have completely cleaned the inode.
+ */
+ if (!(inode->i_state & I_DIRTY))
+ goto out;
+ inode->i_state |= I_SYNC;
+ spin_unlock(&inode->i_lock);
+
+ ret = __writeback_single_inode(inode, wb, wbc);
+
+ spin_lock(&wb->list_lock);
+ spin_lock(&inode->i_lock);
+ /*
+ * If inode is clean, remove it from writeback lists. Otherwise don't
+ * touch it. See comment above for explanation.
+ */
+ if (!(inode->i_state & I_DIRTY))
+ list_del_init(&inode->i_wb_list);
+ spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
- trace_writeback_single_inode(inode, wbc, nr_to_write);
+out:
+ spin_unlock(&inode->i_lock);
return ret;
}
@@ -580,29 +638,57 @@ static long writeback_sb_inodes(struct super_block *sb,
redirty_tail(inode, wb);
continue;
}
- __iget(inode);
+ if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
+ /*
+ * If this inode is locked for writeback and we are not
+ * doing writeback-for-data-integrity, move it to
+ * b_more_io so that writeback can proceed with the
+ * other inodes on s_io.
+ *
+ * We'll have another go at writing back this inode
+ * when we completed a full scan of b_io.
+ */
+ spin_unlock(&inode->i_lock);
+ requeue_io(inode, wb);
+ trace_writeback_sb_inodes_requeue(inode);
+ continue;
+ }
+ spin_unlock(&wb->list_lock);
+
+ /*
+ * We already requeued the inode if it had I_SYNC set and we
+ * are doing WB_SYNC_NONE writeback. So this catches only the
+ * WB_SYNC_ALL case.
+ */
+ if (