summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2019-11-05 16:21:09 -0500
committerTheodore Ts'o <tytso@mit.edu>2019-11-05 16:21:09 -0500
commit8d0d47ea1640b23678306c007ccc813b5b930af4 (patch)
tree79c4be5111bb6c1cbf495ea8d98e008da46cb6c2
parenta6d4040846bff49c7e870cee5693245f87f2cfce (diff)
parent378f32bab3714f04c4e0c3aee4129f6703805550 (diff)
Merge branch 'mb/dio' into master
-rw-r--r--fs/dax.c13
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/extents.c11
-rw-r--r--fs/ext4/file.c412
-rw-r--r--fs/ext4/fsync.c72
-rw-r--r--fs/ext4/inode.c720
-rw-r--r--fs/gfs2/bmap.c3
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/iomap/Makefile16
-rw-r--r--fs/iomap/apply.c25
-rw-r--r--fs/iomap/buffered-io.c749
-rw-r--r--fs/iomap/direct-io.c11
-rw-r--r--fs/iomap/fiemap.c4
-rw-r--r--fs/iomap/seek.c4
-rw-r--r--fs/iomap/swapfile.c3
-rw-r--r--fs/iomap/trace.c12
-rw-r--r--fs/iomap/trace.h88
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c14
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h3
-rw-r--r--fs/xfs/xfs_aops.c754
-rw-r--r--fs/xfs/xfs_aops.h17
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_iomap.c51
-rw-r--r--fs/xfs/xfs_iomap.h2
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_reflink.c2
-rw-r--r--fs/xfs/xfs_super.c11
-rw-r--r--fs/xfs/xfs_trace.h65
-rw-r--r--include/linux/iomap.h129
30 files changed, 1648 insertions, 1570 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 6bf81f931de3..68eef98cd9c4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
static loff_t
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
- struct iomap *iomap)
+ struct iomap *iomap, struct iomap *srcmap)
{
struct block_device *bdev = iomap->bdev;
struct dax_device *dax_dev = iomap->dax_dev;
@@ -1247,7 +1247,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
struct inode *inode = mapping->host;
unsigned long vaddr = vmf->address;
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
- struct iomap iomap = { 0 };
+ struct iomap iomap = { .type = IOMAP_HOLE };
+ struct iomap srcmap = { .type = IOMAP_HOLE };
unsigned flags = IOMAP_FAULT;
int error, major = 0;
bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
* the file system block size to be equal the page size, which means
* that we never have to deal with more than a single extent here.
*/
- error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
if (iomap_errp)
*iomap_errp = error;
if (error) {
@@ -1471,7 +1472,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
struct inode *inode = mapping->host;
vm_fault_t result = VM_FAULT_FALLBACK;
- struct iomap iomap = { 0 };
+ struct iomap iomap = { .type = IOMAP_HOLE };
+ struct iomap srcmap = { .type = IOMAP_HOLE };
pgoff_t max_pgoff;
void *entry;
loff_t pos;
@@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
* to look up our filesystem block.
*/
pos = (loff_t)xas.xa_index << PAGE_SHIFT;
- error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
+ error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
+ &srcmap);
if (error)
goto unlock_entry;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 7004ce581a32..467c13ff6b40 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
#ifdef CONFIG_FS_DAX
static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
- unsigned flags, struct iomap *iomap)
+ unsigned flags, struct iomap *iomap, struct iomap *srcmap)
{
unsigned int blkbits = inode->i_blkbits;
unsigned long first_block = offset >> blkbits;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c3ea2c818542..61987c106511 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1584,7 +1584,6 @@ enum {
EXT4_STATE_NO_EXPAND, /* No space for expansion */
EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
- EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
@@ -2565,8 +2564,6 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
-int ext4_dio_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create);
int ext4_walk_page_buffers(handle_t *handle,
@@ -3391,6 +3388,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
}
extern const struct iomap_ops ext4_iomap_ops;
+extern const struct iomap_ops ext4_iomap_report_ops;
static inline int ext4_buffer_uptodate(struct buffer_head *bh)
{
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 59f741bedf2f..0e8708b77da6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1765,16 +1765,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0;
- /*
- * The check for IO to unwritten extent is somewhat racy as we
- * increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
- * dropping i_data_sem. But reserved blocks should save us in that
- * case.
- */
+
if (ext4_ext_is_unwritten(ex1) &&
- (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
- atomic_read(&EXT4_I(inode)->i_unwritten) ||
- (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)))
+ ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
return 0;
#ifdef AGGRESSIVE_TEST
if (ext1_ee_len >= 4)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8d2bbcc2d813..6a7293a5cda2 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -29,10 +29,58 @@
#include <linux/pagevec.h>
#include <linux/uio.h>
#include <linux/mman.h>
+#include <linux/backing-dev.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
+#include "truncate.h"
+
+static bool ext4_dio_supported(struct inode *inode)
+{
+ if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
+ return false;
+ if (fsverity_active(inode))
+ return false;
+ if (ext4_should_journal_data(inode))
+ return false;
+ if (ext4_has_inline_data(inode))
+ return false;
+ return true;
+}
+
+static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ ssize_t ret;
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock_shared(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock_shared(inode);
+ }
+
+ if (!ext4_dio_supported(inode)) {
+ inode_unlock_shared(inode);
+ /*
+ * Fallback to buffered I/O if the operation being performed on
+ * the inode is not supported by direct I/O. The IOCB_DIRECT
+ * flag needs to be cleared here in order to ensure that the
+ * direct I/O path within generic_file_read_iter() is not
+ * taken.
+ */
+ iocb->ki_flags &= ~IOCB_DIRECT;
+ return generic_file_read_iter(iocb, to);
+ }
+
+ ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
+ is_sync_kiocb(iocb));
+ inode_unlock_shared(inode);
+
+ file_accessed(iocb->ki_filp);
+ return ret;
+}
#ifdef CONFIG_FS_DAX
static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -64,16 +112,21 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb))))
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
if (!iov_iter_count(to))
return 0; /* skip atime */
#ifdef CONFIG_FS_DAX
- if (IS_DAX(file_inode(iocb->ki_filp)))
+ if (IS_DAX(inode))
return ext4_dax_read_iter(iocb, to);
#endif
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return ext4_dio_read_iter(iocb, to);
+
return generic_file_read_iter(iocb, to);
}
@@ -103,13 +156,6 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
return 0;
}
-static void ext4_unwritten_wait(struct inode *inode)
-{
- wait_queue_head_t *wq = ext4_ioend_wq(inode);
-
- wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
-}
-
/*
* This tests whether the IO in question is block-aligned or not.
* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
@@ -162,13 +208,13 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
ret = generic_write_checks(iocb, from);
if (ret <= 0)
return ret;
- if (unlikely(IS_IMMUTABLE(inode)))
- return -EPERM;
-
/*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
@@ -180,56 +226,266 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
return -EFBIG;
iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
}
+
+ ret = file_modified(iocb->ki_filp);
+ if (ret)
+ return ret;
+
return iov_iter_count(from);
}
-#ifdef CONFIG_FS_DAX
-static ssize_t
-ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
{
- struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
+ struct inode *inode = file_inode(iocb->ki_filp);
- if (!inode_trylock(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
- return -EAGAIN;
- inode_lock(inode);
- }
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
+ inode_lock(inode);
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
- ret = file_remove_privs(iocb->ki_filp);
- if (ret)
- goto out;
- ret = file_update_time(iocb->ki_filp);
- if (ret)
- goto out;
- ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
+ current->backing_dev_info = inode_to_bdi(inode);
+ ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
+ current->backing_dev_info = NULL;
+
out:
inode_unlock(inode);
- if (ret > 0)
+ if (likely(ret > 0)) {
+ iocb->ki_pos += ret;
ret = generic_write_sync(iocb, ret);
+ }
+
return ret;
}
-#endif
-static ssize_t
-ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
+ ssize_t written, size_t count)
{
+ handle_t *handle;
+ bool truncate = false;
+ u8 blkbits = inode->i_blkbits;
+ ext4_lblk_t written_blk, end_blk;
+
+ /*
+ * Note that EXT4_I(inode)->i_disksize can get extended up to
+ * inode->i_size while the I/O was running due to writeback of delalloc
+ * blocks. But, the code in ext4_iomap_alloc() is careful to use
+ * zeroed/unwritten extents if this is possible; thus we won't leave
+ * uninitialized blocks in a file even if we didn't succeed in writing
+ * as much as we intended.
+ */
+ WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
+ if (offset + count <= EXT4_I(inode)->i_disksize) {
+ /*
+ * We need to ensure that the inode is removed from the orphan
+ * list if it has been added prematurely, due to writeback of
+ * delalloc blocks.
+ */
+ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+
+ if (IS_ERR(handle)) {
+ ext4_orphan_del(NULL, inode);
+ return PTR_ERR(handle);
+ }
+
+ ext4_orphan_del(handle, inode);
+ ext4_journal_stop(handle);
+ }
+
+ return written;
+ }
+
+ if (written < 0)
+ goto truncate;
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+ if (IS_ERR(handle)) {
+ written = PTR_ERR(handle);
+ goto truncate;
+ }
+
+ if (ext4_update_inode_size(inode, offset + written))
+ ext4_mark_inode_dirty(handle, inode);
+
+ /*
+ * We may need to truncate allocated but not written blocks beyond EOF.
+ */
+ written_blk = ALIGN(offset + written, 1 << blkbits);
+ end_blk = ALIGN(offset + count, 1 << blkbits);
+ if (written_blk < end_blk && ext4_can_truncate(inode))
+ truncate = true;
+
+ /*
+ * Remove the inode from the orphan list if it has been extended and
+ * everything went OK.
+ */
+ if (!truncate && inode->i_nlink)
+ ext4_orphan_del(handle, inode);
+ ext4_journal_stop(handle);
+
+ if (truncate) {
+truncate:
+ ext4_truncate_failed_write(inode);
+ /*
+ * If the truncate operation failed early, then the inode may
+ * still be on the orphan list. In that case, we need to try
+ * remove the inode from the in-memory linked list.
+ */
+ if (inode->i_nlink)
+ ext4_orphan_del(NULL, inode);
+ }
+
+ return written;
+}
+
+static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
+ int error, unsigned int flags)
+{
+ loff_t offset = iocb->ki_pos;
struct inode *inode = file_inode(iocb->ki_filp);
- int o_direct = iocb->ki_flags & IOCB_DIRECT;
- int unaligned_aio = 0;
- int overwrite = 0;
+
+ if (error)
+ return error;
+
+ if (size && flags & IOMAP_DIO_UNWRITTEN)
+ return ext4_convert_unwritten_extents(NULL, inode,
+ offset, size);
+
+ return 0;
+}
+
+static const struct iomap_dio_ops ext4_dio_write_ops = {
+ .end_io = ext4_dio_write_end_io,
+};
+
+static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
ssize_t ret;
+ size_t count;
+ loff_t offset;
+ handle_t *handle;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ bool extend = false, overwrite = false, unaligned_aio = false;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
- return -EIO;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock(inode);
+ }
+
+ if (!ext4_dio_supported(inode)) {
+ inode_unlock(inode);
+ /*
+ * Fallback to buffered I/O if the inode does not support
+ * direct I/O.
+ */
+ return ext4_buffered_write_iter(iocb, from);
+ }
+
+ ret = ext4_write_checks(iocb, from);
+ if (ret <= 0) {
+ inode_unlock(inode);
+ return ret;
+ }
+
+ /*
+ * Unaligned asynchronous direct I/O must be serialized among each
+ * other as the zeroing of partial blocks of two competing unaligned
+ * asynchronous direct I/O writes can result in data corruption.
+ */
+ offset = iocb->ki_pos;
+ count = iov_iter_count(from);
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
+ !is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) {
+ unaligned_aio = true;
+ inode_dio_wait(inode);
+ }
+
+ /*
+ * Determine whether the I/O will overwrite allocated and initialized
+ * blocks. If so, check to see whether it is possible to take the
+ * dioread_nolock path.
+ */
+ if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) &&
+ ext4_should_dioread_nolock(inode)) {
+ overwrite = true;
+ downgrade_write(&inode->i_rwsem);
+ }
+
+ if (offset + count > EXT4_I(inode)->i_disksize) {
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+
+ ret = ext4_orphan_add(handle, inode);
+ if (ret) {
+ ext4_journal_stop(handle);
+ goto out;
+ }
+
+ extend = true;
+ ext4_journal_stop(handle);
+ }
+
+ ret = iomap_dio_rw(iocb, from, &ext4_iomap_ops, &ext4_dio_write_ops,
+ is_sync_kiocb(iocb) || unaligned_aio || extend);
+
+ if (extend)
+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
+
+out:
+ if (overwrite)
+ inode_unlock_shared(inode);
+ else
+ inode_unlock(inode);
+
+ if (ret >= 0 && iov_iter_count(from)) {
+ ssize_t err;
+ loff_t endbyte;
+
+ offset = iocb->ki_pos;
+ err = ext4_buffered_write_iter(iocb, from);
+ if (err < 0)
+ return err;
+
+ /*
+ * We need to ensure that the pages within the page cache for
+ * the range covered by this I/O are written to disk and
+ * invalidated. This is in attempt to preserve the expected
+ * direct I/O semantics in the case we fallback to buffered I/O
+ * to complete off the I/O request.
+ */
+ ret += err;
+ endbyte = offset + err - 1;
+ err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
+ offset, endbyte);
+ if (!err)
+ invalidate_mapping_pages(iocb->ki_filp->f_mapping,
+ offset >> PAGE_SHIFT,
+ endbyte >> PAGE_SHIFT);
+ }
+
+ return ret;
+}
#ifdef CONFIG_FS_DAX
- if (IS_DAX(inode))
- return ext4_dax_write_iter(iocb, from);
-#endif
+static ssize_t
+ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ ssize_t ret;
+ size_t count;
+ loff_t offset;
+ handle_t *handle;
+ bool extend = false;
+ struct inode *inode = file_inode(iocb->ki_filp);
if (!inode_trylock(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT)
@@ -241,49 +497,55 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret <= 0)
goto out;
- /*
- * Unaligned direct AIO must be serialized among each other as zeroing
- * of partial blocks of two competing unaligned AIOs can result in data
- * corruption.
- */
- if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
- !is_sync_kiocb(iocb) &&
- ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
- unaligned_aio = 1;
- ext4_unwritten_wait(inode);
- }
+ offset = iocb->ki_pos;
+ count = iov_iter_count(from);
- iocb->private = &overwrite;
- /* Check whether we do a DIO overwrite or not */
- if (o_direct && !unaligned_aio) {
- if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
- if (ext4_should_dioread_nolock(inode))
- overwrite = 1;
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
- ret = -EAGAIN;
+ if (offset + count > EXT4_I(inode)->i_disksize) {
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
goto out;
}
- }
- ret = __generic_file_write_iter(iocb, from);
- /*
- * Unaligned direct AIO must be the only IO in flight. Otherwise
- * overlapping aligned IO after unaligned might result in data
- * corruption.
- */
- if (ret == -EIOCBQUEUED && unaligned_aio)
- ext4_unwritten_wait(inode);
- inode_unlock(inode);
+ ret = ext4_orphan_add(handle, inode);
+ if (ret) {
+ ext4_journal_stop(handle);
+ goto out;
+ }
- if (ret > 0)
- ret = generic_write_sync(iocb, ret);
+ extend = true;
+ ext4_journal_stop(handle);
+ }
- return ret;
+ ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
+ if (extend)
+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
out:
inode_unlock(inode);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
return ret;
}
+#endif
+
+static ssize_t
+ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
+#ifdef CONFIG_FS_DAX
+ if (IS_DAX(inode))
+ return ext4_dax_write_iter(iocb, from);
+#endif
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return ext4_dio_write_iter(iocb, from);
+
+ return ext4_buffered_write_iter(iocb, from);
+}
#ifdef CONFIG_FS_DAX
static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
@@ -494,12 +756,14 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
maxbytes, i_size_read(inode));
case SEEK_HOLE:
inode_lock_shared(inode);
- offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
+ offset = iomap_seek_hole(inode, offset,
+ &ext4_iomap_report_ops);
inode_unlock_shared(inode);
break;
case SEEK_DATA:
inode_lock_shared(inode);
- offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
+ offset = iomap_seek_data(inode, offset,
+ &ext4_iomap_report_ops);
inode_unlock_shared(inode);
break;
}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5508baa11bb6..e10206e7f4bb 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -80,6 +80,43 @@ static int ext4_sync_parent(struct inode *inode)
return ret;
}
+static int ext4_fsync_nojournal(struct inode *inode, bool datasync,
+ bool *needs_barrier)
+{
+ int ret, err;
+
+ ret = sync_mapping_buffers(inode->i_mapping);
+ if (!(inode->i_state & I_DIRTY_ALL))
+ return ret;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return ret;
+
+ err = sync_inode_metadata(inode, 1);
+ if (!ret)
+ ret = err;
+
+ if (!ret)
+ ret = ext4_sync_parent(inode);
+ if (test_opt(inode->i_sb, BARRIER))
+ *needs_barrier = true;
+
+ return ret;
+}
+
+static int ext4_fsync_journal(struct inode *inode, bool datasync,
+ bool *needs_barrier)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+ tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
+
+ if (journal->j_flags & JBD2_BARRIER &&
+ !jbd2_trans_will_send_data_barrier(journal, commit_tid))
+ *needs_barrier = true;
+
+ return jbd2_complete_transaction(journal, commit_tid);
+}
+
/*
* akpm: A new design for ext4_sync_file().
*
@@ -91,17 +128,14 @@ static int ext4_sync_parent(struct inode *inode)
* What we do is just kick off a commit and wait on it. This will snapshot the
* inode to disk.
*/
-
int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct inode *inode = file->f_mapping->host;
- struct ext4_inode_info *ei = EXT4_I(inode);
- journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int ret = 0, err;
- tid_t commit_tid;
bool needs_barrier = false;
+ struct inode *inode = file->f_mapping->host;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(sbi)))
return -EIO;
J_ASSERT(ext4_journal_current_handle() == NULL);
@@ -111,23 +145,15 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (sb_rdonly(inode->i_sb)) {
/* Make sure that we read updated s_mount_flags value */
smp_rmb();
- if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ret = -EROFS;
goto out;
}
- if (!journal) {
- ret = __generic_file_fsync(file, start, end, datasync);
- if (!ret)
- ret = ext4_sync_parent(inode);
- if (test_opt(inode->i_sb, BARRIER))
- goto issue_flush;
- goto out;
- }
-
ret = file_write_and_wait_range(file, start, end);
if (ret)
return ret;
+
/*
* data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data.
@@ -142,18 +168,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure.
*/
- if (ext4_should_journal_data(inode)) {
+ if (!sbi->s_journal)
+ ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier);
+ else if (ext4_should_journal_data(inode))
ret = ext4_force_commit(inode->i_sb);
- goto out;
- }
+ else
+ ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
- commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
- if (journal->j_flags & JBD2_BARRIER &&
- !jbd2_trans_will_send_data_barrier(journal, commit_tid))
- needs_barrier = true;
- ret = jbd2_complete_transaction(journal, commit_tid);
if (needs_barrier) {
- issue_flush:
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
if (!ret)
ret = err;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 61052c67952e..381813205f99 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -810,136 +810,6 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
#define DIO_MAX_BLOCKS 4096
/*
- * Get blocks function for the cases that need to start a transaction -
- * generally difference cases of direct IO and DAX IO. It also handles retries
- * in case of ENOSPC.
- */
-static int ext4_get_block_trans(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int flags)
-{
- int dio_credits;
- handle_t *handle;
- int retries = 0;
- int ret;
-
- /* Trim mapping request to maximum we can map at once for DIO */
- if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS)
- bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits;
- dio_credits = ext4_chunk_trans_blocks(inode,
- bh_result->b_size >> inode->i_blkbits);
-retry:
- handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- ret = _ext4_get_block(inode, iblock, bh_result, flags);
- ext4_journal_stop(handle);
-
- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
- return ret;
-}
-
-/* Get block function for DIO reads and writes to inodes without extents */
-int ext4_dio_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- /* We don't expect handle for direct IO */
- WARN_ON_ONCE(ext4_journal_current_handle());
-
- if (!create)
- return _ext4_get_block(inode, iblock, bh, 0);
- return ext4_get_block_trans(inode, iblock, bh, EXT4_GET_BLOCKS_CREATE);
-}
-
-/*
- * Get block function for AIO DIO writes when we create unwritten extent if
- * blocks are not allocated yet. The extent will be converted to written
- * after IO is complete.
- */
-static int ext4_dio_get_block_unwritten_async(struct inode *inode,
- sector_t iblock, struct buffer_head *bh_result, int create)
-{
- int ret;
-
- /* We don't expect handle for direct IO */
- WARN_ON_ONCE(ext4_journal_current_handle());
-
- ret = ext4_get_block_trans(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_IO_CREATE_EXT);
-
- /*
- * When doing DIO using unwritten extents, we need io_end to convert
- * unwritten extents to written on IO completion. We allocate io_end
- * once we spot unwritten extent and store it in b_private. Generic
- * DIO code keeps b_private set and furthermore passes the value to
- * our completion callback in 'private' argument.
- */
- if (!ret && buffer_unwritten(bh_result)) {
- if (!bh_result->b_private) {
- ext4_io_end_t *io_end;
-
- io_end = ext4_init_io_end(inode, GFP_KERNEL);
- if (!io_end)
- return -ENOMEM;
- bh_result->b_private = io_end;
- ext4_set_io_unwritten_flag(inode, io_end);
- }
- set_buffer_defer_completion(bh_result);
- }
-
- return ret;
-}
-
-/*
- * Get block function for non-AIO DIO writes when we create unwritten extent if
- * blocks are not allocated yet. The extent will be converted to written
- * after IO is complete by ext4_direct_IO_write().
- */
-static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
- sector_t iblock, struct buffer_head *bh_result, int create)
-{
- int ret;
-
- /* We don't expect handle for direct IO */
- WARN_ON_ONCE(ext4_journal_current_handle());
-
- ret = ext4_get_block_trans(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_IO_CREATE_EXT);
-
- /*
- * Mark inode as having pending DIO writes to unwritten extents.
- * ext4_direct_IO_write() checks this flag and converts extents to
- * written.
- */
- if (!ret && buffer_unwritten(bh_result))
- ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
-
- return ret;
-}
-
-static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- int ret;
-
- ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n",
- inode->i_ino, create);
- /* We don't expect handle for direct IO */
- WARN_ON_ONCE(ext4_journal_current_handle());
-
- ret = _ext4_get_block(inode, iblock, bh_result, 0);
- /*
- * Blocks should have been preallocated! ext4_file_write_iter() checks
- * that.
- */
- WARN_ON_ONCE(!buffer_mapped(bh_result) || buffer_unwritten(bh_result));
-
- return ret;
-}
-
-
-/*
* `handle' can be NULL if create is zero