summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 09:17:42 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 09:17:42 -0800
commit5084fdf081739b7455c7aeecda6d7b83ec59c85f (patch)
tree7e99b684979d0020213efa095c73b67810a740ca /fs/ext4
parent09cb6464fe5e7fcd5177911429badd139c4481b7 (diff)
parenta551d7c8deefb6d9fb45a1de03a617dd064e0419 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "This merge request includes the dax-4.0-iomap-pmd branch which is needed for both ext4 and xfs dax changes to use iomap for DAX. It also includes the fscrypt branch which is needed for ubifs encryption work as well as ext4 encryption and fscrypt cleanups. Lots of cleanups and bug fixes, especially making sure ext4 is robust against maliciously corrupted file systems --- especially maliciously corrupted xattr blocks and a maliciously corrupted superblock. Also fix ext4 support for 64k block sizes so it works well on ppcle. Fixed mbcache so we don't miss some common xattr blocks that can be merged" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (86 commits) dax: Fix sleep in atomic contex in grab_mapping_entry() fscrypt: Rename FS_WRITE_PATH_FL to FS_CTX_HAS_BOUNCE_BUFFER_FL fscrypt: Delay bounce page pool allocation until needed fscrypt: Cleanup page locking requirements for fscrypt_{decrypt,encrypt}_page() fscrypt: Cleanup fscrypt_{decrypt,encrypt}_page() fscrypt: Never allocate fscrypt_ctx on in-place encryption fscrypt: Use correct index in decrypt path. fscrypt: move the policy flags and encryption mode definitions to uapi header fscrypt: move non-public structures and constants to fscrypt_private.h fscrypt: unexport fscrypt_initialize() fscrypt: rename get_crypt_info() to fscrypt_get_crypt_info() fscrypto: move ioctl processing more fully into common code fscrypto: remove unneeded Kconfig dependencies MAINTAINERS: fscrypto: recommend linux-fsdevel for fscrypto patches ext4: do not perform data journaling when data is encrypted ext4: return -ENOMEM instead of success ext4: reject inodes with negative size ext4: remove another test in ext4_alloc_file_blocks() Documentation: fix description of ext4's block_validity mount option ext4: fix checks for data=ordered and journal_async_commit options ...
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/acl.c2
-rw-r--r--fs/ext4/ext4.h31
-rw-r--r--fs/ext4/ext4_jbd2.h14
-rw-r--r--fs/ext4/extents.c27
-rw-r--r--fs/ext4/file.c184
-rw-r--r--fs/ext4/ialloc.c5
-rw-r--r--fs/ext4/inline.c18
-rw-r--r--fs/ext4/inode.c349
-rw-r--r--fs/ext4/ioctl.c82
-rw-r--r--fs/ext4/mballoc.c4
-rw-r--r--fs/ext4/namei.c24
-rw-r--r--fs/ext4/page-io.c3
-rw-r--r--fs/ext4/super.c152
-rw-r--r--fs/ext4/xattr.c45
15 files changed, 615 insertions, 326 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e38039fd96ff..7b90691e98c4 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -37,6 +37,7 @@ config EXT4_FS
select CRC16
select CRYPTO
select CRYPTO_CRC32C
+ select FS_IOMAP if FS_DAX
help
This is the next generation of the ext3 filesystem.
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index dfa519979038..fd389935ecd1 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -196,7 +196,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
if (error)
return error;
- inode->i_ctime = ext4_current_time(inode);
+ inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
}
break;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a8a750f59621..2163c1e69f2a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -397,8 +397,9 @@ struct flex_groups {
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
+#define EXT4_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
+/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
EXT4_IMMUTABLE_FL | \
EXT4_APPEND_FL | \
@@ -1533,12 +1534,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}
-static inline struct timespec ext4_current_time(struct inode *inode)
-{
- return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
- current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
-}
-
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
@@ -2277,11 +2272,6 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
-static inline int ext4_sb_has_crypto(struct super_block *sb)
-{
- return ext4_has_feature_encrypt(sb);
-}
-
static inline bool ext4_encrypted_inode(struct inode *inode)
{
return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT);
@@ -2339,8 +2329,8 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page
#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page
#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range
-#define fscrypt_process_policy fscrypt_notsupp_process_policy
-#define fscrypt_get_policy fscrypt_notsupp_get_policy
+#define fscrypt_ioctl_set_policy fscrypt_notsupp_ioctl_set_policy
+#define fscrypt_ioctl_get_policy fscrypt_notsupp_ioctl_get_policy
#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context
#define fscrypt_inherit_context fscrypt_notsupp_inherit_context
#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info
@@ -2458,8 +2448,6 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
@@ -2492,7 +2480,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
-extern void ext4_truncate(struct inode *);
+extern int ext4_truncate(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
@@ -3129,7 +3117,7 @@ extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
-extern void ext4_ext_truncate(handle_t *, struct inode *);
+extern int ext4_ext_truncate(handle_t *, struct inode *);
extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end);
extern void ext4_ext_init(struct super_block *);
@@ -3265,12 +3253,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
}
}
-static inline bool ext4_aligned_io(struct inode *inode, loff_t off, loff_t len)
-{
- int blksize = 1 << inode->i_blkbits;
-
- return IS_ALIGNED(off, blksize) && IS_ALIGNED(len, blksize);
-}
+extern struct iomap_ops ext4_iomap_ops;
#endif /* __KERNEL__ */
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b1d52c14098e..f97611171023 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -414,17 +414,19 @@ static inline int ext4_inode_journal_mode(struct inode *inode)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
/* We do not support data journalling with delayed allocation */
if (!S_ISREG(inode->i_mode) ||
- test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
- !test_opt(inode->i_sb, DELALLOC))
+ test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
+ (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
+ !test_opt(inode->i_sb, DELALLOC))) {
+ /* We do not support data journalling for encrypted data */
+ if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode))
+ return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
+ }
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
- else
- BUG();
+ BUG();
}
static inline int ext4_should_journal_data(struct inode *inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c930a0110fb4..3e1014fe835e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4631,7 +4631,7 @@ out2:
return err ? err : allocated;
}
-void ext4_ext_truncate(handle_t *handle, struct inode *inode)
+int ext4_ext_truncate(handle_t *handle, struct inode *inode)
{
struct super_block *sb = inode->i_sb;
ext4_lblk_t last_block;
@@ -4645,7 +4645,9 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode)
/* we have to know where to truncate from in crash case */
EXT4_I(inode)->i_disksize = inode->i_size;
- ext4_mark_inode_dirty(handle, inode);
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (err)
+ return err;
last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
@@ -4657,12 +4659,9 @@ retry:
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
- if (err) {
- ext4_std_error(inode->i_sb, err);
- return;
- }
- err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
- ext4_std_error(inode->i_sb, err);
+ if (err)
+ return err;
+ return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
}
static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
@@ -4701,7 +4700,7 @@ retry:
/*
* Recalculate credits when extent tree depth changes.
*/
- if (depth >= 0 && depth != ext_depth(inode)) {
+ if (depth != ext_depth(inode)) {
credits = ext4_chunk_trans_blocks(inode, len);
depth = ext_depth(inode);
}
@@ -4725,7 +4724,7 @@ retry:
map.m_lblk += ret;
map.m_len = len = len - ret;
epos = (loff_t)map.m_lblk << inode->i_blkbits;
- inode->i_ctime = ext4_current_time(inode);
+ inode->i_ctime = current_time(inode);
if (new_size) {
if (epos > new_size)
epos = new_size;
@@ -4853,7 +4852,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
/* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1);
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
@@ -4878,7 +4877,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
goto out_dio;
}
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
if (new_size) {
ext4_update_inode_size(inode, new_size);
} else {
@@ -5568,7 +5567,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
out_stop:
@@ -5678,7 +5677,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
EXT4_I(inode)->i_disksize += len;
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
ret = ext4_mark_inode_dirty(handle, inode);
if (ret)
goto out_stop;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2a822d30e73f..b5f184493c57 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -31,6 +31,42 @@
#include "xattr.h"
#include "acl.h"
+#ifdef CONFIG_FS_DAX
+static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+
+ inode_lock_shared(inode);
+ /*
+ * Recheck under inode lock - at this point we are sure it cannot
+ * change anymore
+ */
+ if (!IS_DAX(inode)) {
+ inode_unlock_shared(inode);
+ /* Fallback to buffered IO in case we cannot support DAX */
+ return generic_file_read_iter(iocb, to);
+ }
+ ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
+ inode_unlock_shared(inode);
+
+ file_accessed(iocb->ki_filp);
+ return ret;
+}
+#endif
+
+static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ if (!iov_iter_count(to))
+ return 0; /* skip atime */
+
+#ifdef CONFIG_FS_DAX
+ if (IS_DAX(file_inode(iocb->ki_filp)))
+ return ext4_dax_read_iter(iocb, to);
+#endif
+ return generic_file_read_iter(iocb, to);
+}
+
/*
* Called when an inode is released. Note that this is different
* from ext4_file_open: open gets called at every open, but release
@@ -88,6 +124,86 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
return 0;
}
+/* Is IO overwriting allocated and initialized blocks? */
+static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
+{
+ struct ext4_map_blocks map;
+ unsigned int blkbits = inode->i_blkbits;
+ int err, blklen;
+
+ if (pos + len > i_size_read(inode))
+ return false;
+
+ map.m_lblk = pos >> blkbits;
+ map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
+ blklen = map.m_len;
+
+ err = ext4_map_blocks(NULL, inode, &map, 0);
+ /*
+ * 'err==len' means that all of the blocks have been preallocated,
+ * regardless of whether they have been initialized or not. To exclude
+ * unwritten extents, we need to check m_flags.
+ */
+ return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
+}
+
+static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ return ret;
+ /*
+ * If we have encountered a bitmap-format file, the size limit
+ * is smaller than s_maxbytes, which is for extent-mapped files.
+ */
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
+ return -EFBIG;
+ iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
+ }
+ return iov_iter_count(from);
+}
+
+#ifdef CONFIG_FS_DAX
+static ssize_t
+ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+ bool overwrite = false;
+
+ inode_lock(inode);
+ ret = ext4_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out;
+ ret = file_remove_privs(iocb->ki_filp);
+ if (ret)
+ goto out;
+ ret = file_update_time(iocb->ki_filp);
+ if (ret)
+ goto out;
+
+ if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+ overwrite = true;
+ downgrade_write(&inode->i_rwsem);
+ }
+ ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
+out:
+ if (!overwrite)
+ inode_unlock(inode);
+ else
+ inode_unlock_shared(inode);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ return ret;
+}
+#endif
+
static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
@@ -97,8 +213,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
int overwrite = 0;
ssize_t ret;
+#ifdef CONFIG_FS_DAX
+ if (IS_DAX(inode))
+ return ext4_dax_write_iter(iocb, from);
+#endif
+
inode_lock(inode);
- ret = generic_write_checks(iocb, from);
+ ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -114,53 +235,11 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ext4_unwritten_wait(inode);
}
- /*
- * If we have encountered a bitmap-format file, the size limit
- * is smaller than s_maxbytes, which is for extent-mapped files.
- */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-
- if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
- ret = -EFBIG;
- goto out;
- }
- iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
- }
-
iocb->private = &overwrite;
- if (o_direct) {
- size_t length = iov_iter_count(from);
- loff_t pos = iocb->ki_pos;
-
- /* check whether we do a DIO overwrite or not */
- if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- pos + length <= i_size_read(inode)) {
- struct ext4_map_blocks map;
- unsigned int blkbits = inode->i_blkbits;
- int err, len;
-
- map.m_lblk = pos >> blkbits;
- map.m_len = EXT4_MAX_BLOCKS(length, pos, blkbits);
- len = map.m_len;
-
- err = ext4_map_blocks(NULL, inode, &map, 0);
- /*
- * 'err==len' means that all of blocks has
- * been preallocated no matter they are
- * initialized or not. For excluding
- * unwritten extents, we need to check
- * m_flags. There are two conditions that
- * indicate for initialized extents. 1) If we
- * hit extent cache, EXT4_MAP_MAPPED flag is
- * returned; 2) If we do a real lookup,
- * non-flags are returned. So we should check
- * these two conditions.
- */
- if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
- overwrite = 1;
- }
- }
+ /* Check whether we do a DIO overwrite or not */
+ if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
+ ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
+ overwrite = 1;
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
@@ -196,7 +275,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
- result = dax_fault(vma, vmf, ext4_dax_get_block);
+ result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
if (write) {
if (!IS_ERR(handle))
@@ -230,9 +309,10 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
- else
- result = dax_pmd_fault(vma, addr, pmd, flags,
- ext4_dax_get_block);
+ else {
+ result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
+ &ext4_iomap_ops);
+ }
if (write) {
if (!IS_ERR(handle))
@@ -687,7 +767,7 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
const struct file_operations ext4_file_operations = {
.llseek = ext4_llseek,
- .read_iter = generic_file_read_iter,
+ .read_iter = ext4_file_read_iter,
.write_iter = ext4_file_write_iter,
.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 170421edfdfe..e57e8d90ea54 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1039,7 +1039,7 @@ got:
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
- ext4_current_time(inode);
+ current_time(inode);
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
@@ -1115,8 +1115,7 @@ got:
}
if (encrypt) {
- /* give pointer to avoid set_context with journal ops. */
- err = fscrypt_inherit_context(dir, inode, &encrypt, true);
+ err = fscrypt_inherit_context(dir, inode, handle, true);
if (err)
goto fail_free_drop;
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index f74d5ee2cdec..437df6a1a841 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -299,6 +299,11 @@ static int ext4_create_inline_data(handle_t *handle,
EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
+ /*
+ * Propagate changes to inode->i_flags as well - e.g. S_DAX may
+ * get cleared
+ */
+ ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
@@ -336,8 +341,10 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
len -= EXT4_MIN_INLINE_DATA_SIZE;
value = kzalloc(len, GFP_NOFS);
- if (!value)
+ if (!value) {
+ error = -ENOMEM;
goto out;
+ }
error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
value, len);
@@ -442,6 +449,11 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
}
}
ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
+ /*
+ * Propagate changes to inode->i_flags as well - e.g. S_DAX may
+ * get set.
+ */
+ ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
@@ -1028,7 +1040,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
- dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
+ dir->i_mtime = dir->i_ctime = current_time(dir);
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
@@ -1971,7 +1983,7 @@ out:
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9c064727ed62..72d593fa690d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,6 +37,7 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/bitops.h>
+#include <linux/iomap.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -71,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
csum_size);
offset += csum_size;
- csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
- EXT4_INODE_SIZE(inode->i_sb) -
- offset);
}
+ csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+ EXT4_INODE_SIZE(inode->i_sb) - offset);
}
return csum;
@@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode)
"couldn't mark inode dirty (err %d)", err);
goto stop_handle;
}
- if (inode->i_blocks)
- ext4_truncate(inode);
+ if (inode->i_blocks) {
+ err = ext4_truncate(inode);
+ if (err) {
+ ext4_error(inode->i_sb,
+ "couldn't truncate inode %lu (err %d)",
+ inode->i_ino, err);
+ goto stop_handle;
+ }
+ }
/*
* ext4_ext_truncate() doesn't reserve any slop when it
@@ -767,6 +774,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
ext4_update_bh_state(bh, map.m_flags);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
ret = 0;
+ } else if (ret == 0) {
+ /* hole case, need to fill in bh->b_size */
+ bh->b_size = inode->i_sb->s_blocksize * map.m_len;
}
return ret;
}
@@ -1166,7 +1176,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (unlikely(err))
page_zero_new_buffers(page, from, to);
else if (decrypt)
- err = fscrypt_decrypt_page(page);
+ err = fscrypt_decrypt_page(page->mapping->host, page,
+ PAGE_SIZE, 0, page->index);
return err;
}
#endif
@@ -2891,7 +2902,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
index = pos >> PAGE_SHIFT;
- if (ext4_nonda_switch(inode->i_sb)) {
+ if (ext4_nonda_switch(inode->i_sb) ||
+ S_ISLNK(inode->i_mode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
len, flags, pagep, fsdata);
@@ -3268,53 +3280,159 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
}
#ifdef CONFIG_FS_DAX
-/*
- * Get block function for DAX IO and mmap faults. It takes care of converting
- * unwritten extents to written ones and initializes new / converted blocks
- * to zeros.
- */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned flags, struct iomap *iomap)
{
+ unsigned int blkbits = inode->i_blkbits;
+ unsigned long first_block = offset >> blkbits;
+ unsigned long last_block = (offset + length - 1) >> blkbits;
+ struct ext4_map_blocks map;
int ret;
- ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
- if (!create)
- return _ext4_get_block(inode, iblock, bh_result, 0);
+ if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+ return -ERANGE;
- ret = ext4_get_block_trans(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_PRE_IO |
- EXT4_GET_BLOCKS_CREATE_ZERO);
- if (ret < 0)
- return ret;
+ map.m_lblk = first_block;
+ map.m_len = last_block - first_block + 1;
+
+ if (!(flags & IOMAP_WRITE)) {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ } else {
+ int dio_credits;
+ handle_t *handle;
+ int retries = 0;
- if (buffer_unwritten(bh_result)) {
+ /* Trim mapping request to maximum we can map at once for DIO */
+ if (map.m_len > DIO_MAX_BLOCKS)
+ map.m_len = DIO_MAX_BLOCKS;
+ dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
+retry:
/*
- * We are protected by i_mmap_sem or i_mutex so we know block
- * cannot go away from under us even though we dropped
- * i_data_sem. Convert extent to written and write zeros there.
+ * Either we allocate blocks and then we don't get unwritten
+ * extent so we have reserved enough credits, or the blocks
+ * are already allocated and unwritten and in that case
+ * extent conversion fits in the credits as well.
*/
- ret = ext4_get_block_trans(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_CONVERT |
- EXT4_GET_BLOCKS_CREATE_ZERO);
- if (ret < 0)
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+ dio_credits);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ret = ext4_map_blocks(handle, inode, &map,
+ EXT4_GET_BLOCKS_CREATE_ZERO);
+ if (ret < 0) {
+ ext4_journal_stop(handle);
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
return ret;
+ }
+
+ /*
+ * If we added blocks beyond i_size, we need to make sure they
+ * will get truncated if we crash before updating i_size in
+ * ext4_iomap_end(). For faults we don't need to do that (and
+ * even cannot because for orphan list operations inode_lock is
+ * required) - if we happen to instantiate block beyond i_size,
+ * it is because we race with truncate which has already added
+ * the inode to the orphan list.
+ */
+ if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
+ (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
+ int err;
+
+ err = ext4_orphan_add(handle, inode);
+ if (err < 0) {
+ ext4_journal_stop(handle);
+ return err;
+ }
+ }
+ ext4_journal_stop(handle);
}
- /*
- * At least for now we have to clear BH_New so that DAX code
- * doesn't attempt to zero blocks again in a racy way.
- */
- clear_buffer_new(bh_result);
+
+ iomap->flags = 0;
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = first_block << blkbits;
+
+ if (ret == 0) {
+ iomap->type = IOMAP_HOLE;
+ iomap->blkno = IOMAP_NULL_BLOCK;
+ iomap->length = (u64)map.m_len << blkbits;
+ } else {
+ if (map.m_flags & EXT4_MAP_MAPPED) {
+ iomap->type = IOMAP_MAPPED;
+ } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ iomap->type = IOMAP_UNWRITTEN;
+ } else {
+ WARN_ON_ONCE(1);
+ return -EIO;
+ }
+ iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
+ iomap->length = (u64)map.m_len << blkbits;
+ }
+
+ if (map.m_flags & EXT4_MAP_NEW)
+ iomap->flags |= IOMAP_F_NEW;
return 0;
}
-#else
-/* Just define empty function, it will never get called. */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+
+static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+ ssize_t written, unsigned flags, struct iomap *iomap)
{
- BUG();
- return 0;
+ int ret = 0;
+ handle_t *handle;
+ int blkbits = inode->i_blkbits;
+ bool truncate = false;
+
+ if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
+ return 0;
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto orphan_del;
+ }
+ if (ext4_update_inode_size(inode, offset + written))
+ ext4_mark_inode_dirty(handle, inode);
+ /*
+ * We may need to truncate allocated but not written blocks beyond EOF.
+ */
+ if (iomap->offset + iomap->length >
+ ALIGN(inode->i_size, 1 << blkbits)) {
+ ext4_lblk_t written_blk, end_blk;
+
+ written_blk = (offset + written) >> blkbits;
+ end_blk = (offset + length) >> blkbits;
+ if (written_blk < end_blk && ext4_can_truncate(inode))
+ truncate = true;
+ }
+ /*
+ * Remove inode from orphan list if we were extending a inode and
+ * everything went fine.
+ */
+ if (!truncate && inode->i_nlink &&
+ !list_empty(&EXT4_I(inode)->i_orphan))
+ ext4_orphan_del(handle, inode);
+ ext4_journal_stop(handle);
+ if (truncate) {
+ ext4_truncate_failed_write(inode);
+orphan_del:
+ /*
+ * If truncate failed early the inode might still be on the
+ * orphan list; we need to make sure the inode is removed from
+ * the orphan list in that case.
+ */
+ if (inode->i_nlink)
+ ext4_orphan_del(NULL, inode);
+ }
+ return ret;
}
+
+struct iomap_ops ext4_iomap_ops = {
+ .iomap_begin = ext4_iomap_begin,
+ .iomap_end = ext4_iomap_end,
+};
+
#endif
static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -3436,19 +3554,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
iocb->private = NULL;
if (overwrite)
get_block_func = ext4_dio_get_block_overwrite;
- else if (IS_DAX(inode)) {
- /*
- * We can avoid zeroing for aligned DAX writes beyond EOF. Other
- * writes need zeroing either because they can race with page
- * faults or because they use partial blocks.
- */
- if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
- ext4_aligned_io(inode, offset, count))
- get_block_func = ext4_dio_get_block;
- else
- get_block_func = ext4_dax_get_block;
- dio_flags = DIO_LOCKING;
- } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+ else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
get_block_func = ext4_dio_get_block;
dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
@@ -3462,14 +3568,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
#ifdef CONFIG_EXT4_FS_ENCRYPTION
BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif
- if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, get_block_func,
- ext4_end_io_dio, dio_flags);
- } else
- ret = __blockdev_direct_IO(iocb, inode,
- inode->i_sb->s_bdev, iter,
- get_block_func,
- ext4_end_io_dio, NULL, dio_flags);
+ ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+ get_block_func, ext4_end_io_dio, NULL,
+ dio_flags);
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) {
@@ -3538,6 +3639,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
ssize_t ret;
/*
@@ -3546,19 +3648,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
* we are protected against page writeback as well.
*/
inode_lock_shared(inode);
- if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
- } else {
- size_t count = iov_iter_count(iter);
-
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- iocb->ki_pos + count);
- if (ret)
- goto out_unlock;
- ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,