diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 12:59:42 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 12:59:42 -0800 |
commit | ae9a8c4bdc91202b4236372eed53c54d2297c71b (patch) | |
tree | 4596680ee808334d246ad2f93bdd743d76c3741a /fs | |
parent | 32190f0afbf4f1c0a9142e5a886a078ee0b794fd (diff) | |
parent | 232530680290ba94ca37852ab10d9556ea28badf (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
- Add support for online resizing of file systems with bigalloc
- Fix a two data corruption bugs involving DAX, as well as a corruption
bug after a crash during a racing fallocate and delayed allocation.
- Finally, a number of cleanups and optimizations.
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: improve smp scalability for inode generation
ext4: add support for online resizing with bigalloc
ext4: mention noload when recovering on read-only device
Documentation: fix little inconsistencies
ext4: convert timers to use timer_setup()
jbd2: convert timers to use timer_setup()
ext4: remove duplicate extended attributes defs
ext4: add ext4_should_use_dax()
ext4: add sanity check for encryption + DAX
ext4: prevent data corruption with journaling + DAX
ext4: prevent data corruption with inline data + DAX
ext4: fix interaction between i_size, fallocate, and delalloc after a crash
ext4: retry allocations conservatively
ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
ext4: Add iomap support for inline data
iomap: Add IOMAP_F_DATA_INLINE flag
iomap: Switch from blkno to disk offset
Diffstat (limited to 'fs')
-rw-r--r-- | fs/buffer.c | 4 | ||||
-rw-r--r-- | fs/dax.c | 2 | ||||
-rw-r--r-- | fs/ext2/inode.c | 4 | ||||
-rw-r--r-- | fs/ext4/Kconfig | 1 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 15 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 50 | ||||
-rw-r--r-- | fs/ext4/extents.c | 6 | ||||
-rw-r--r-- | fs/ext4/file.c | 263 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 4 | ||||
-rw-r--r-- | fs/ext4/inline.c | 43 | ||||
-rw-r--r-- | fs/ext4/inode.c | 153 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 30 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 28 | ||||
-rw-r--r-- | fs/ext4/resize.c | 104 | ||||
-rw-r--r-- | fs/ext4/super.c | 27 | ||||
-rw-r--r-- | fs/iomap.c | 13 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 9 | ||||
-rw-r--r-- | fs/nfsd/blocklayout.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 6 |
19 files changed, 266 insertions, 500 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 32ce01f0f95f..49b7e9bdcd1d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1979,8 +1979,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, case IOMAP_MAPPED: if (offset >= i_size_read(inode)) set_buffer_new(bh); - bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) + - ((offset - iomap->offset) >> inode->i_blkbits); + bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> + inode->i_blkbits; set_buffer_mapped(bh); break; } @@ -938,7 +938,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range); static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) { - return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); + return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; } static loff_t diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1442a4c734c8..9b2ac55ac34f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -821,11 +821,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret == 0) { iomap->type = IOMAP_HOLE; - iomap->blkno = IOMAP_NULL_BLOCK; + iomap->addr = IOMAP_NULL_ADDR; iomap->length = 1 << blkbits; } else { iomap->type = IOMAP_MAPPED; - iomap->blkno = (sector_t)bno << (blkbits - 9); + iomap->addr = (u64)bno << blkbits; iomap->length = (u64)ret << blkbits; iomap->flags |= IOMAP_F_MERGED; } diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index e38039fd96ff..73b850f5659c 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -37,6 +37,7 @@ config EXT4_FS select CRC16 select CRYPTO select CRYPTO_CRC32C + select FS_IOMAP help This is the next generation of the ext3 filesystem. diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d5ddfb96c83c..a943e568292e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -601,22 +601,21 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, * ext4_should_retry_alloc() is called when ENOSPC is returned, and if * it is profitable to retry the operation, this function will wait * for the current or committing transaction to complete, and then - * return TRUE. - * - * if the total number of retries exceed three times, return FALSE. + * return TRUE. We will only retry once. */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || - (*retries)++ > 3 || + (*retries)++ > 1 || !EXT4_SB(sb)->s_journal) return 0; - jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); - smp_mb(); - if (EXT4_SB(sb)->s_mb_free_pending) - jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); + if (EXT4_SB(sb)->s_mb_free_pending == 0) + return 0; + + jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); + jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); return 1; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 27f38bb5046d..4e091eae38b1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -544,8 +544,8 @@ struct ext4_new_group_data { __u64 inode_table; __u32 blocks_count; __u16 reserved_blocks; - __u16 unused; - __u32 free_blocks_count; + __u16 mdata_blocks; + __u32 free_clusters_count; }; /* Indexes used to index group tables in ext4_new_group_data */ @@ -643,43 +643,6 @@ enum { #define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT #define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY -#ifndef FS_IOC_FSGETXATTR -/* Until the uapi changes get merged for project quota... */ - -#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr) -#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) - -/* - * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR. - */ -struct fsxattr { - __u32 fsx_xflags; /* xflags field value (get/set) */ - __u32 fsx_extsize; /* extsize field value (get/set)*/ - __u32 fsx_nextents; /* nextents field value (get) */ - __u32 fsx_projid; /* project identifier (get/set) */ - unsigned char fsx_pad[12]; -}; - -/* - * Flags for the fsx_xflags field - */ -#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ -#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ -#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ -#define FS_XFLAG_APPEND 0x00000010 /* all writes append */ -#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ -#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ -#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ -#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ -#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ -#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ -#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ -#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ -#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ -#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ -#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ -#endif /* !defined(FS_IOC_FSGETXATTR) */ - #define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR #define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR @@ -1391,8 +1354,6 @@ struct ext4_sb_info { int s_first_ino; unsigned int s_inode_readahead_blks; unsigned int s_inode_goal; - spinlock_t s_next_gen_lock; - u32 s_next_generation; u32 s_hash_seed[4]; int s_def_hash_version; int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ @@ -2514,9 +2475,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, ext4_lblk_t len); -extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, - unsigned int map_len, - struct extent_status *result); /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, @@ -3047,6 +3005,10 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, extern int ext4_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int *has_inline, __u64 start, __u64 len); + +struct iomap; +extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap); + extern int ext4_try_to_evict_inline_data(handle_t *handle, struct inode *inode, int needed); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 97f0fd06728d..07bca11749d4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4794,7 +4794,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, } if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { + (offset + len > i_size_read(inode) || + offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); if (ret) @@ -4965,7 +4966,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) } if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { + (offset + len > i_size_read(inode) || + offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); if (ret) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b937078bcff3..ad204d2724ac 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -21,6 +21,7 @@ #include <linux/time.h> #include <linux/fs.h> +#include <linux/iomap.h> #include <linux/mount.h> #include <linux/path.h> #include <linux/dax.h> @@ -424,248 +425,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp) } /* - * Here we use ext4_map_blocks() to get a block mapping for a extent-based - * file rather than ext4_ext_walk_space() because we can introduce - * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same - * function. When extent status tree has been fully implemented, it will - * track all extent status for a file and we can directly use it to - * retrieve the offset for SEEK_DATA/SEEK_HOLE. - */ - -/* - * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to - * lookup page cache to check whether or not there has some data between - * [startoff, endoff] because, if this range contains an unwritten extent, - * we determine this extent as a data or a hole according to whether the - * page cache has data or not. - */ -static int ext4_find_unwritten_pgoff(struct inode *inode, - int whence, - ext4_lblk_t end_blk, - loff_t *offset) -{ - struct pagevec pvec; - unsigned int blkbits; - pgoff_t index; - pgoff_t end; - loff_t endoff; - loff_t startoff; - loff_t lastoff; - int found = 0; - - blkbits = inode->i_sb->s_blocksize_bits; - startoff = *offset; - lastoff = startoff; - endoff = (loff_t)end_blk << blkbits; - - index = startoff >> PAGE_SHIFT; - end = (endoff - 1) >> PAGE_SHIFT; - - pagevec_init(&pvec, 0); - do { - int i; - unsigned long nr_pages; - - nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, - &index, end); - if (nr_pages == 0) - break; - - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - struct buffer_head *bh, *head; - - /* - * If current offset is smaller than the page offset, - * there is a hole at this offset. - */ - if (whence == SEEK_HOLE && lastoff < endoff && - lastoff < page_offset(pvec.pages[i])) { - found = 1; - *offset = lastoff; - goto out; - } - - lock_page(page); - - if (unlikely(page->mapping != inode->i_mapping)) { - unlock_page(page); - continue; - } - - if (!page_has_buffers(page)) { - unlock_page(page); - continue; - } - - if (page_has_buffers(page)) { - lastoff = page_offset(page); - bh = head = page_buffers(page); - do { - if (lastoff + bh->b_size <= startoff) - goto next; - if (buffer_uptodate(bh) || - buffer_unwritten(bh)) { - if (whence == SEEK_DATA) - found = 1; - } else { - if (whence == SEEK_HOLE) - found = 1; - } - if (found) { - *offset = max_t(loff_t, - startoff, lastoff); - unlock_page(page); - goto out; - } -next: - lastoff += bh->b_size; - bh = bh->b_this_page; - } while (bh != head); - } - - lastoff = page_offset(page) + PAGE_SIZE; - unlock_page(page); - } - - pagevec_release(&pvec); - } while (index <= end); - - /* There are no pages upto endoff - that would be a hole in there. */ - if (whence == SEEK_HOLE && lastoff < endoff) { - found = 1; - *offset = lastoff; - } -out: - pagevec_release(&pvec); - return found; -} - -/* - * ext4_seek_data() retrieves the offset for SEEK_DATA. - */ -static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) -{ - struct inode *inode = file->f_mapping->host; - struct extent_status es; - ext4_lblk_t start, last, end; - loff_t dataoff, isize; - int blkbits; - int ret; - - inode_lock(inode); - - isize = i_size_read(inode); - if (offset < 0 || offset >= isize) { - inode_unlock(inode); - return -ENXIO; - } - - blkbits = inode->i_sb->s_blocksize_bits; - start = offset >> blkbits; - last = start; - end = isize >> blkbits; - dataoff = offset; - - do { - ret = ext4_get_next_extent(inode, last, end - last + 1, &es); - if (ret <= 0) { - /* No extent found -> no data */ - if (ret == 0) - ret = -ENXIO; - inode_unlock(inode); - return ret; - } - - last = es.es_lblk; - if (last != start) - dataoff = (loff_t)last << blkbits; - if (!ext4_es_is_unwritten(&es)) - break; - - /* - * If there is a unwritten extent at this offset, - * it will be as a data or a hole according to page - * cache that has data or not. - */ - if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, - es.es_lblk + es.es_len, &dataoff)) - break; - last += es.es_len; - dataoff = (loff_t)last << blkbits; - cond_resched(); - } while (last <= end); - - inode_unlock(inode); - - if (dataoff > isize) - return -ENXIO; - - return vfs_setpos(file, dataoff, maxsize); -} - -/* - * ext4_seek_hole() retrieves the offset for SEEK_HOLE. - */ -static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) -{ - struct inode *inode = file->f_mapping->host; - struct extent_status es; - ext4_lblk_t start, last, end; - loff_t holeoff, isize; - int blkbits; - int ret; - - inode_lock(inode); - - isize = i_size_read(inode); - if (offset < 0 || offset >= isize) { - inode_unlock(inode); - return -ENXIO; - } - - blkbits = inode->i_sb->s_blocksize_bits; - start = offset >> blkbits; - last = start; - end = isize >> blkbits; - holeoff = offset; - - do { - ret = ext4_get_next_extent(inode, last, end - last + 1, &es); - if (ret < 0) { - inode_unlock(inode); - return ret; - } - /* Found a hole? */ - if (ret == 0 || es.es_lblk > last) { - if (last != start) - holeoff = (loff_t)last << blkbits; - break; - } - /* - * If there is a unwritten extent at this offset, - * it will be as a data or a hole according to page - * cache that has data or not. - */ - if (ext4_es_is_unwritten(&es) && - ext4_find_unwritten_pgoff(inode, SEEK_HOLE, - last + es.es_len, &holeoff)) - break; - - last += es.es_len; - holeoff = (loff_t)last << blkbits; - cond_resched(); - } while (last <= end); - - inode_unlock(inode); - - if (holeoff > isize) - holeoff = isize; - - return vfs_setpos(file, holeoff, maxsize); -} - -/* * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values * by calling generic_file_llseek_size() with the appropriate maxbytes * value for each. @@ -681,18 +440,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence) maxbytes = inode->i_sb->s_maxbytes; switch (whence) { - case SEEK_SET: - case SEEK_CUR: - case SEEK_END: + default: return generic_file_llseek_size(file, offset, whence, maxbytes, i_size_read(inode)); - case SEEK_DATA: - return ext4_seek_data(file, offset, maxbytes); case SEEK_HOLE: - return ext4_seek_hole(file, offset, maxbytes); + inode_lock_shared(inode); + offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops); + inode_unlock_shared(inode); + break; + case SEEK_DATA: + inode_lock_shared(inode); + offset = iomap_seek_data(inode, offset, &ext4_iomap_ops); + inode_unlock_shared(inode); + break; } - return -EINVAL; + if (offset < 0) + return offset; + return vfs_setpos(file, offset, maxbytes); } const struct file_operations ext4_file_operations = { diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index c5f697a3fad4..b4267d72f249 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1139,9 +1139,7 @@ got: inode->i_ino); goto out; } - spin_lock(&sbi->s_next_gen_lock); - inode->i_generation = sbi->s_next_generation++; - spin_unlock(&sbi->s_next_gen_lock); + inode->i_generation = prandom_u32(); /* Precompute checksum seed for inode metadata */ if (ext4_has_metadata_csum(sb)) { diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 28c5c3abddb3..1367553c43bb 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -12,6 +12,7 @@ * GNU General Public License for more details. */ +#include <linux/iomap.h> #include <linux/fiemap.h> #include "ext4_jbd2.h" @@ -302,11 +303,6 @@ static int ext4_create_inline_data(handle_t *handle, EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); - /* - * Propagate changes to inode->i_flags as well - e.g. S_DAX may - * get cleared - */ - ext4_set_inode_flags(inode); get_bh(is.iloc.bh); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); @@ -451,11 +447,6 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, } } ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); - /* - * Propagate changes to inode->i_flags as well - e.g. S_DAX may - * get set. - */ - ext4_set_inode_flags(inode); get_bh(is.iloc.bh); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); @@ -1827,6 +1818,38 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) return ret; } +int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) +{ + __u64 addr; + int error = -EAGAIN; + struct ext4_iloc iloc; + + down_read(&EXT4_I(inode)->xattr_sem); + if (!ext4_has_inline_data(inode)) + goto out; + + error = ext4_get_inode_loc(inode, &iloc); + if (error) + goto out; + + addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; + addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; + addr += offsetof(struct ext4_inode, i_block); + + brelse(iloc.bh); + + iomap->addr = addr; + iomap->offset = 0; + iomap->length = min_t(loff_t, ext4_get_inline_size(inode), + i_size_read(inode)); + iomap->type = 0; + iomap->flags = IOMAP_F_DATA_INLINE; + +out: + up_read(&EXT4_I(inode)->xattr_sem); + return error; +} + int ext4_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int *has_inline, __u64 start, __u64 len) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 168a1b499cdf..2633150e41b9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3394,7 +3394,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait) return try_to_free_buffers(page); } -#ifdef CONFIG_FS_DAX static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned flags, struct iomap *iomap) { @@ -3403,17 +3402,54 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned long first_block = offset >> blkbits; unsigned long last_block = (offset + length - 1) >> blkbits; struct ext4_map_blocks map; + bool delalloc = false; int ret; - if (WARN_ON_ONCE(ext4_has_inline_data(inode))) - return -ERANGE; + + if (flags & IOMAP_REPORT) { + if (ext4_has_inline_data(inode)) { + ret = ext4_inline_data_iomap(inode, iomap); + if (ret != -EAGAIN) { + if (ret == 0 && offset >= iomap->length) + ret = -ENOENT; + return ret; + } + } + } else { + if (WARN_ON_ONCE(ext4_has_inline_data(inode))) + return -ERANGE; + } map.m_lblk = first_block; map.m_len = last_block - first_block + 1; - if (!(flags & IOMAP_WRITE)) { + if (flags & IOMAP_REPORT) { ret = ext4_map_blocks(NULL, inode, &map, 0); - } else { + if (ret < 0) + return ret; + + if (ret == 0) { + ext4_lblk_t end = map.m_lblk + map.m_len - 1; + struct extent_status es; + + ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es); + + if (!es.es_len || es.es_lblk > end) { + /* entire range is a hole */ + } else if (es.es_lblk > map.m_lblk) { + /* range starts with a hole */ + map.m_len = es.es_lblk - map.m_lblk; + } else { + ext4_lblk_t offs = 0; + + if (es.es_lblk < map.m_lblk) + offs = map.m_lblk - es.es_lblk; + map.m_lblk = es.es_lblk + offs; + map.m_len = es.es_len - offs; + delalloc = true; + } + } + } else if (flags & IOMAP_WRITE) { int dio_credits; handle_t *handle; int retries = 0; @@ -3464,17 +3500,21 @@ retry: } } ext4_journal_stop(handle); + } else { + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + return ret; } iomap->flags = 0; iomap->bdev = inode->i_sb->s_bdev; iomap->dax_dev = sbi->s_daxdev; iomap->offset = first_block << blkbits; + iomap->length = (u64)map.m_len << blkbits; if (ret == 0) { - iomap->type = IOMAP_HOLE; - iomap->blkno = IOMAP_NULL_BLOCK; - iomap->length = (u64)map.m_len << blkbits; + iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE; + iomap->addr = IOMAP_NULL_ADDR; } else { if (map.m_flags & EXT4_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; @@ -3484,12 +3524,12 @@ retry: WARN_ON_ONCE(1); return -EIO; } - iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9); - iomap->length = (u64)map.m_len << blkbits; + iomap->addr = (u64)map.m_pblk << blkbits; } if (map.m_flags & EXT4_MAP_NEW) iomap->flags |= IOMAP_F_NEW; + return 0; } @@ -3550,8 +3590,6 @@ const struct iomap_ops ext4_iomap_ops = { .iomap_end = ext4_iomap_end, }; -#endif - static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private) { @@ -4573,6 +4611,21 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); } +static bool ext4_should_use_dax(struct inode *inode) +{ + if (!test_opt(inode->i_sb, DAX)) + return false; + if (!S_ISREG(inode->i_mode)) + return false; + if (ext4_should_journal_data(inode)) + return false; + if (ext4_has_inline_data(inode)) + return false; + if (ext4_encrypted_inode(inode)) + return false; + return true; +} + void ext4_set_inode_flags(struct inode *inode) { unsigned int flags = EXT4_I(inode)->i_flags; @@ -4588,9 +4641,7 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) new_fl |= S_DIRSYNC; - if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) && - !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) && - !(flags & EXT4_ENCRYPT_FL)) + if (ext4_should_use_dax(inode)) new_fl |= S_DAX; if (flags & EXT4_ENCRYPT_FL) new_fl |= S_ENCRYPTED; @@ -5966,11 +6017,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); } ext4_set_aops(inode); - /* - * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated. - * E.g. S_DAX may get cleared / set. - */ - ext4_set_inode_flags(inode); jbd2_journal_unlock_updates(journal); percpu_up_write(&sbi->s_journal_flag_rwsem); @@ -6106,70 +6152,3 @@ int ext4_filemap_fault(struct vm_fault *vmf) return err; } - -/* - * Find the first extent at or after @lblk in an inode that is not a hole. - * Search for @map_len blocks at most. The extent is returned in @result. - * - * The function returns 1 if we found an extent. The function returns 0 in - * case there is no extent at or after @lblk and in that case also sets - * @result->es_len to 0. In case of error, the error code is returned. - */ -int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, - unsigned int map_len, struct extent_status *result) -{ - struct ext4_map_blocks map; - struct extent_status es = {}; - int ret; - - map.m_lblk = lblk; - map.m_len = map_len; - - /* - * For non-extent based files this loop may iterate several times since - * we do not determine full hole size. - */ - while (map.m_len > 0) { - ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) - return ret; - /* There's extent covering m_lblk? Just return it. */ - if (ret > 0) { - int status; - - ext4_es_store_pblock(result, map.m_pblk); - result->es_lblk = map.m_lblk; - result->es_len = map.m_len; - if (map.m_flags & EXT4_MAP_UNWRITTEN) - status = EXTENT_STATUS_UNWRITTEN; - else - status = EXTENT_STATUS_WRITTEN; - ext4_es_store_status(result, status); - return 1; - } - ext4_es_find_delayed_extent_range(inode, map.m_lblk, - map.m_lblk + map.m_len - 1, - &es); - /* Is delalloc data before next block in extent tree? */ - if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) { - ext4_lblk_t offset = 0; - - if (es.es_lblk < lblk) - offset = lblk - es.es_lblk; - result->es_lblk = es.es_lblk + offset; - ext4_es_store_pblock(result, - ext4_es_pblock(&es) + offset); - result->es_len = es.es_len - offset; - ext4_es_store_status(result, ext4_es_status(&es)); - - return 1; - } - /* There's a hole at m_lblk, advance us after it */ - map.m_lblk += map.m_len; - map_len -= map.m_len; - map.m_len = map_len; - cond_resched(); - } - result->es_len = 0; - return 0; -} diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 75d83471f65c..b7558f292420 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -15,6 +15,7 @@ #include <linux/mount.h> #include <linux/file.h> #include <linux/quotaops.h> +#include <linux/random.h> #include <linux/uuid.h> #include <linux/uaccess.h> #include <linux/delay.h> @@ -99,7 +100,6 @@ static long swap_inode_boot_loader(struct super_block *sb, int err; struct inode *inode_bl; struct ext4_inode_info *ei_bl; - struct ext4_sb_info *sbi = EXT4_SB(sb); if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) return -EINVAL; @@ -158,10 +158,8 @@ static long swap_inode_boot_loader(struct super_block *sb, inode->i_ctime = inode_bl->i_ctime = current_time(inode); - spin_lock(&sbi->s_next_gen_lock); - inode->i_generation = sbi->s_next_generation++; - inode_bl->i_generation = sbi->s_next_generation++; - spin_unlock(&sbi->s_next_gen_lock); + inode->i_generation = prandom_u32(); + inode_bl->i_generation = prandom_u32(); ext4_discard_preallocations(inode); @@ -291,10 +289,20 @@ flags_err: if (err) goto flags_out; - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) + if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { + /* + * Changes to the journaling mode can cause unsafe changes to + * S_DAX if we are using the DAX mount option. + */ + if (test_opt(inode->i_sb, DAX)) { + err = -EBUSY; + goto flags_out; + } + err = ext4_change_inode_journal_flag(inode, jflag); - if (err) - goto flags_out; + if (err) + goto flags_out; + } if (migrate) { if (flags & EXT4_EXTENTS_FL) err = ext4_ext_migrate(inode); @@ -862,12 +870,6 @@ group_add_out: int err = 0, err2 = 0; ext4_group_t o_group = EXT4_SB(sb)->s_groups_count; - if (ext4_has_feature_bigalloc(sb)) { - ext4_msg(sb, KERN_ERR, - "Online resizing not (yet) supported with bigalloc"); - return -EOPNOTSUPP; - } - if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, sizeof(__u64))) { return -EFAULT; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 701085620cd8..d9f8b90a93ed 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4994,8 +4994,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_buddy e4b; - int err = 0, ret, blk_free_count; - ext4_grpblk_t blocks_freed; + int err = 0, ret, free_clusters_count; + ext4_grpblk_t clusters_freed; + ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block); + ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1); + unsigned long cluster_count = last_cluster - first_cluster + 1; ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); @@ -5007,8 +5010,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, * Check to see if we are freeing blocks across a group * boundary. */ - if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { - ext4_warning(sb, "too much blocks added to group %u", + if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) { + ext4_warning(sb, "too many blocks added to group %u", block_group); err = -EINVAL; goto error_return; @@ -5054,14 +5057,14 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (err) goto error_return; - for (i = 0, blocks_freed = 0; i < count; i++) { + for (i = 0, clusters_freed = 0; i < cluster_count; i++) { BUFFER_TRACE(bitmap_bh, "clear bit"); if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { ext4_error(sb, "bit already cleared for block %llu", (ext4_fsblk_t)(block + i)); BUFFER_TRACE(bitmap_bh, "bit already cleared"); } else { - blocks_freed++; + clusters_freed++; } } @@ -5075,19 +5078,20 @@ int ext4_g |