diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-21 11:03:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-21 11:03:38 -0700 |
commit | d723b99ec9e502a414a96a51ec229333f807b47e (patch) | |
tree | 2602310329eb4ecc3438a450c38781e1d44bf81d | |
parent | 5e0b17b026eb7c6de9baa9b0d45a51b05f05abe1 (diff) | |
parent | 27bc446e2def38db3244a6eb4bb1d6312936610a (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"Improvements to ext4's block allocator performance for very large file
systems, especially when the file system or files which are highly
fragmented. There is a new mount option, prefetch_block_bitmaps which
will pull in the block bitmaps and set up the in-memory buddy bitmaps
when the file system is initially mounted.
Beyond that, a lot of bug fixes and cleanups. In particular, a number
of changes to make ext4 more robust in the face of write errors or
file system corruptions"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (46 commits)
ext4: limit the length of per-inode prealloc list
ext4: reorganize if statement of ext4_mb_release_context()
ext4: add mb_debug logging when there are lost chunks
ext4: Fix comment typo "the the".
jbd2: clean up checksum verification in do_one_pass()
ext4: change to use fallthrough macro
ext4: remove unused parameter of ext4_generic_delete_entry function
mballoc: replace seq_printf with seq_puts
ext4: optimize the implementation of ext4_mb_good_group()
ext4: delete invalid comments near ext4_mb_check_limits()
ext4: fix typos in ext4_mb_regular_allocator() comment
ext4: fix checking of directory entry validity for inline directories
fs: prevent BUG_ON in submit_bh_wbc()
ext4: correctly restore system zone info when remount fails
ext4: handle add_system_zone() failure in ext4_setup_system_zone()
ext4: fold ext4_data_block_valid_rcu() into the caller
ext4: check journal inode extents more carefully
ext4: don't allow overlapping system zones
ext4: handle error of ext4_setup_system_zone() on remount
ext4: delete the invalid BUGON in ext4_mb_load_buddy_gfp()
...
-rw-r--r-- | Documentation/admin-guide/ext4.rst | 23 | ||||
-rw-r--r-- | Documentation/filesystems/ext4/about.rst | 2 | ||||
-rw-r--r-- | fs/buffer.c | 9 | ||||
-rw-r--r-- | fs/ext4/Kconfig | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 16 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 159 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 91 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 25 | ||||
-rw-r--r-- | fs/ext4/extents.c | 42 | ||||
-rw-r--r-- | fs/ext4/file.c | 11 | ||||
-rw-r--r-- | fs/ext4/hash.c | 4 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 20 | ||||
-rw-r--r-- | fs/ext4/inline.c | 4 | ||||
-rw-r--r-- | fs/ext4/inode.c | 30 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 34 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 289 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 4 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 4 | ||||
-rw-r--r-- | fs/ext4/namei.c | 66 | ||||
-rw-r--r-- | fs/ext4/readpage.c | 4 | ||||
-rw-r--r-- | fs/ext4/super.c | 268 | ||||
-rw-r--r-- | fs/ext4/sysfs.c | 13 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 3 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 16 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 46 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 33 | ||||
-rw-r--r-- | include/linux/jbd2.h | 2 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 85 |
28 files changed, 881 insertions, 424 deletions
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst index a683976fad6d..d2795ca6821e 100644 --- a/Documentation/admin-guide/ext4.rst +++ b/Documentation/admin-guide/ext4.rst @@ -489,6 +489,9 @@ Files in /sys/fs/ext4/<devname>: multiple of this tuning parameter if the stripe size is not set in the ext4 superblock + mb_max_inode_prealloc + The maximum length of per-inode ext4_prealloc_space list. + mb_max_to_scan The maximum number of extents the multiblock allocator will search to find the best extent. @@ -529,21 +532,21 @@ Files in /sys/fs/ext4/<devname>: Ioctls ====== -There is some Ext4 specific functionality which can be accessed by applications -through the system call interfaces. The list of all Ext4 specific ioctls are -shown in the table below. +Ext4 implements various ioctls which can be used by applications to access +ext4-specific functionality. An incomplete list of these ioctls is shown in the +table below. This list includes truly ext4-specific ioctls (``EXT4_IOC_*``) as +well as ioctls that may have been ext4-specific originally but are now supported +by some other filesystem(s) too (``FS_IOC_*``). -Table of Ext4 specific ioctls +Table of Ext4 ioctls - EXT4_IOC_GETFLAGS + FS_IOC_GETFLAGS Get additional attributes associated with inode. The ioctl argument is - an integer bitfield, with bit values described in ext4.h. This ioctl is - an alias for FS_IOC_GETFLAGS. + an integer bitfield, with bit values described in ext4.h. - EXT4_IOC_SETFLAGS + FS_IOC_SETFLAGS Set additional attributes associated with inode. The ioctl argument is - an integer bitfield, with bit values described in ext4.h. This ioctl is - an alias for FS_IOC_SETFLAGS. + an integer bitfield, with bit values described in ext4.h. EXT4_IOC_GETVERSION, EXT4_IOC_GETVERSION_OLD Get the inode i_generation number stored for each inode. The diff --git a/Documentation/filesystems/ext4/about.rst b/Documentation/filesystems/ext4/about.rst index 0aadba052264..cc76b577d2f4 100644 --- a/Documentation/filesystems/ext4/about.rst +++ b/Documentation/filesystems/ext4/about.rst @@ -39,6 +39,6 @@ entry. Other References ---------------- -Also see http://www.nongnu.org/ext2-doc/ for quite a collection of +Also see https://www.nongnu.org/ext2-doc/ for quite a collection of information about ext2/3. Here's another old reference: http://wiki.osdev.org/Ext2 diff --git a/fs/buffer.c b/fs/buffer.c index 061dd202979d..d468ed9981e0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3157,6 +3157,15 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags) WARN_ON(atomic_read(&bh->b_count) < 1); lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { + /* + * The bh should be mapped, but it might not be if the + * device was hot-removed. Not much we can do but fail the I/O. + */ + if (!buffer_mapped(bh)) { + unlock_buffer(bh); + return -EIO; + } + get_bh(bh); bh->b_end_io = end_buffer_write_sync; ret = submit_bh(REQ_OP_WRITE, op_flags, bh); diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 1afa5a4bcb5f..619dd35ddd48 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -110,7 +110,7 @@ config EXT4_KUNIT_TESTS This builds the ext4 KUnit tests. KUnit tests run during boot and output the results to the debug log - in TAP format (http://testanything.org/). Only useful for kernel devs + in TAP format (https://testanything.org/). Only useful for kernel devs running KUnit test harness and are not for inclusion into a production build. diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1ba46d87cdf1..48c3df47748d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -413,7 +413,8 @@ verified: * Return buffer_head on success or an ERR_PTR in case of failure. */ struct buffer_head * -ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) +ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group, + bool ignore_locked) { struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -441,6 +442,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) return ERR_PTR(-ENOMEM); } + if (ignore_locked && buffer_locked(bh)) { + /* buffer under IO already, return if called for prefetching */ + put_bh(bh); + return NULL; + } + if (bitmap_uptodate(bh)) goto verify; @@ -487,10 +494,11 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) * submit the buffer_head for reading */ set_buffer_new(bh); - trace_ext4_read_block_bitmap_load(sb, block_group); + trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked); bh->b_end_io = ext4_end_bitmap_read; get_bh(bh); - submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO | + (ignore_locked ? REQ_RAHEAD : 0), bh); return bh; verify: err = ext4_validate_block_bitmap(sb, desc, block_group, bh); @@ -534,7 +542,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) struct buffer_head *bh; int err; - bh = ext4_read_block_bitmap_nowait(sb, block_group); + bh = ext4_read_block_bitmap_nowait(sb, block_group, false); if (IS_ERR(bh)) return bh; err = ext4_wait_block_bitmap(sb, block_group, bh); diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 16e9b2fda03a..c54ba52f2dd4 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -24,6 +24,7 @@ struct ext4_system_zone { struct rb_node node; ext4_fsblk_t start_blk; unsigned int count; + u32 ino; }; static struct kmem_cache *ext4_system_zone_cachep; @@ -45,7 +46,8 @@ void ext4_exit_system_zone(void) static inline int can_merge(struct ext4_system_zone *entry1, struct ext4_system_zone *entry2) { - if ((entry1->start_blk + entry1->count) == entry2->start_blk) + if ((entry1->start_blk + entry1->count) == entry2->start_blk && + entry1->ino == entry2->ino) return 1; return 0; } @@ -66,9 +68,9 @@ static void release_system_zone(struct ext4_system_blocks *system_blks) */ static int add_system_zone(struct ext4_system_blocks *system_blks, ext4_fsblk_t start_blk, - unsigned int count) + unsigned int count, u32 ino) { - struct ext4_system_zone *new_entry = NULL, *entry; + struct ext4_system_zone *new_entry, *entry; struct rb_node **n = &system_blks->root.rb_node, *node; struct rb_node *parent = NULL, *new_node = NULL; @@ -79,30 +81,21 @@ static int add_system_zone(struct ext4_system_blocks *system_blks, n = &(*n)->rb_left; else if (start_blk >= (entry->start_blk + entry->count)) n = &(*n)->rb_right; - else { - if (start_blk + count > (entry->start_blk + - entry->count)) - entry->count = (start_blk + count - - entry->start_blk); - new_node = *n; - new_entry = rb_entry(new_node, struct ext4_system_zone, - node); - break; - } + else /* Unexpected overlap of system zones. */ + return -EFSCORRUPTED; } - if (!new_entry) { - new_entry = kmem_cache_alloc(ext4_system_zone_cachep, - GFP_KERNEL); - if (!new_entry) - return -ENOMEM; - new_entry->start_blk = start_blk; - new_entry->count = count; - new_node = &new_entry->node; - - rb_link_node(new_node, parent, n); - rb_insert_color(new_node, &system_blks->root); - } + new_entry = kmem_cache_alloc(ext4_system_zone_cachep, + GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->start_blk = start_blk; + new_entry->count = count; + new_entry->ino = ino; + new_node = &new_entry->node; + + rb_link_node(new_node, parent, n); + rb_insert_color(new_node, &system_blks->root); /* Can we merge to the left? */ node = rb_prev(new_node); @@ -151,40 +144,6 @@ static void debug_print_tree(struct ext4_sb_info *sbi) printk(KERN_CONT "\n"); } -/* - * Returns 1 if the passed-in block region (start_blk, - * start_blk+count) is valid; 0 if some part of the block region - * overlaps with filesystem metadata blocks. - */ -static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, - struct ext4_system_blocks *system_blks, - ext4_fsblk_t start_blk, - unsigned int count) -{ - struct ext4_system_zone *entry; - struct rb_node *n; - - if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || - (start_blk + count < start_blk) || - (start_blk + count > ext4_blocks_count(sbi->s_es))) - return 0; - - if (system_blks == NULL) - return 1; - - n = system_blks->root.rb_node; - while (n) { - entry = rb_entry(n, struct ext4_system_zone, node); - if (start_blk + count - 1 < entry->start_blk) - n = n->rb_left; - else if (start_blk >= (entry->start_blk + entry->count)) - n = n->rb_right; - else - return 0; - } - return 1; -} - static int ext4_protect_reserved_inode(struct super_block *sb, struct ext4_system_blocks *system_blks, u32 ino) @@ -214,19 +173,18 @@ static int ext4_protect_reserved_inode(struct super_block *sb, if (n == 0) { i++; } else { - if (!ext4_data_block_valid_rcu(sbi, system_blks, - map.m_pblk, n)) { - err = -EFSCORRUPTED; - __ext4_error(sb, __func__, __LINE__, -err, - map.m_pblk, "blocks %llu-%llu " - "from inode %u overlap system zone", - map.m_pblk, - map.m_pblk + map.m_len - 1, ino); + err = add_system_zone(system_blks, map.m_pblk, n, ino); + if (err < 0) { + if (err == -EFSCORRUPTED) { + __ext4_error(sb, __func__, __LINE__, + -err, map.m_pblk, + "blocks %llu-%llu from inode %u overlap system zone", + map.m_pblk, + map.m_pblk + map.m_len - 1, + ino); + } break; } - err = add_system_zone(system_blks, map.m_pblk, n); - if (err < 0) - break; i += n; } } @@ -262,14 +220,6 @@ int ext4_setup_system_zone(struct super_block *sb) int flex_size = ext4_flex_bg_size(sbi); int ret; - if (!test_opt(sb, BLOCK_VALIDITY)) { - if (sbi->system_blks) - ext4_release_system_zone(sb); - return 0; - } - if (sbi->system_blks) - return 0; - system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL); if (!system_blks) return -ENOMEM; @@ -277,22 +227,25 @@ int ext4_setup_system_zone(struct super_block *sb) for (i=0; i < ngroups; i++) { cond_resched(); if (ext4_bg_has_super(sb, i) && - ((i < 5) || ((i % flex_size) == 0))) - add_system_zone(system_blks, + ((i < 5) || ((i % flex_size) == 0))) { + ret = add_system_zone(system_blks, ext4_group_first_block_no(sb, i), - ext4_bg_num_gdb(sb, i) + 1); + ext4_bg_num_gdb(sb, i) + 1, 0); + if (ret) + goto err; + } gdp = ext4_get_group_desc(sb, i, NULL); ret = add_system_zone(system_blks, - ext4_block_bitmap(sb, gdp), 1); + ext4_block_bitmap(sb, gdp), 1, 0); if (ret) goto err; ret = add_system_zone(system_blks, - ext4_inode_bitmap(sb, gdp), 1); + ext4_inode_bitmap(sb, gdp), 1, 0); if (ret) goto err; ret = add_system_zone(system_blks, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group); + sbi->s_itb_per_group, 0); if (ret) goto err; } @@ -341,11 +294,24 @@ void ext4_release_system_zone(struct super_block *sb) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); } -int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, +/* + * Returns 1 if the passed-in block region (start_blk, + * start_blk+count) is valid; 0 if some part of the block region + * overlaps with some other filesystem metadata blocks. + */ +int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, unsigned int count) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_system_blocks *system_blks; - int ret; + struct ext4_system_zone *entry; + struct rb_node *n; + int ret = 1; + + if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (start_blk + count < start_blk) || + (start_blk + count > ext4_blocks_count(sbi->s_es))) + return 0; /* * Lock the system zone to prevent it being released concurrently @@ -354,8 +320,22 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, */ rcu_read_lock(); system_blks = rcu_dereference(sbi->system_blks); - ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk, - count); + if (system_blks == NULL) + goto out_rcu; + + n = system_blks->root.rb_node; + while (n) { + entry = rb_entry(n, struct ext4_system_zone, node); + if (start_blk + count - 1 < entry->start_blk) + n = n->rb_left; + else if (start_blk >= (entry->start_blk + entry->count)) + n = n->rb_right; + else { + ret = (entry->ino == inode->i_ino); + break; + } + } +out_rcu: rcu_read_unlock(); return ret; } @@ -374,8 +354,7 @@ int ext4_check_blockref(const char *function, unsigned int line, while (bref < p+max) { blk = le32_to_cpu(*bref++); if (blk && - unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), - blk, 1))) { + unlikely(!ext4_inode_block_valid(inode, blk, 1))) { ext4_error_inode(inode, function, line, blk, "invalid block"); return -EFSCORRUPTED; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 42f5060f3cdf..523e00d7b392 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -434,10 +434,36 @@ struct flex_groups { #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */ - -/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ +/* User modifiable flags */ +#define EXT4_FL_USER_MODIFIABLE (EXT4_SECRM_FL | \ + EXT4_UNRM_FL | \ + EXT4_COMPR_FL | \ + EXT4_SYNC_FL | \ + EXT4_IMMUTABLE_FL | \ + EXT4_APPEND_FL | \ + EXT4_NODUMP_FL | \ + EXT4_NOATIME_FL | \ + EXT4_JOURNAL_DATA_FL | \ + EXT4_NOTAIL_FL | \ + EXT4_DIRSYNC_FL | \ + EXT4_TOPDIR_FL | \ + EXT4_EXTENTS_FL | \ + 0x00400000 /* EXT4_EOFBLOCKS_FL */ | \ + EXT4_DAX_FL | \ + EXT4_PROJINHERIT_FL | \ + EXT4_CASEFOLD_FL) + +/* User visible flags */ +#define EXT4_FL_USER_VISIBLE (EXT4_FL_USER_MODIFIABLE | \ + EXT4_DIRTY_FL | \ + EXT4_COMPRBLK_FL | \ + EXT4_NOCOMPR_FL | \ + EXT4_ENCRYPT_FL | \ + EXT4_INDEX_FL | \ + EXT4_VERITY_FL | \ + EXT4_INLINE_DATA_FL) + +/* Flags we can manipulate with through FS_IOC_FSSETXATTR */ #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ EXT4_IMMUTABLE_FL | \ EXT4_APPEND_FL | \ @@ -669,8 +695,6 @@ enum { /* * ioctl commands */ -#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS -#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS #define EXT4_IOC_GETVERSION _IOR('f', 3, long) #define EXT4_IOC_SETVERSION _IOW('f', 4, long) #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION @@ -687,17 +711,11 @@ enum { #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) #define EXT4_IOC_SWAP_BOOT _IO('f', 17) #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) -#define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY -#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT -#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY /* ioctl codes 19--39 are reserved for fscrypt */ #define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40) #define EXT4_IOC_GETSTATE _IOW('f', 41, __u32) #define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap) -#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR -#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR - #define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32) /* @@ -722,8 +740,6 @@ enum { /* * ioctl commands in 32 bit emulation */ -#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS #define EXT4_IOC32_GETVERSION _IOR('f', 3, int) #define EXT4_IOC32_SETVERSION _IOW('f', 4, int) #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) @@ -1054,6 +1070,7 @@ struct ext4_inode_info { struct timespec64 i_crtime; /* mballoc */ + atomic_t i_prealloc_active; struct list_head i_prealloc_list; spinlock_t i_prealloc_lock; @@ -1172,6 +1189,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */ +#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ @@ -1501,10 +1519,13 @@ struct ext4_sb_info { unsigned int s_mb_stats; unsigned int s_mb_order2_reqs; unsigned int s_mb_group_prealloc; + unsigned int s_mb_max_inode_prealloc; unsigned int s_max_dir_size_kb; /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; unsigned long s_mb_last_start; + unsigned int s_mb_prefetch; + unsigned int s_mb_prefetch_limit; /* stats for buddy allocator */ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ @@ -1572,6 +1593,8 @@ struct ext4_sb_info { struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; + atomic_t s_warning_count; + atomic_t s_msg_count; /* Encryption context for '-o test_dummy_encryption' */ struct fscrypt_dummy_context s_dummy_enc_ctx; @@ -1585,6 +1608,9 @@ struct ext4_sb_info { #ifdef CONFIG_EXT4_DEBUG unsigned long s_simulate_fail; #endif + /* Record the errseq of the backing block device */ + errseq_t s_bdev_wb_err; + spinlock_t s_bdev_wb_lock; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -2313,9 +2339,15 @@ struct ext4_lazy_init { struct mutex li_list_mtx; }; +enum ext4_li_mode { + EXT4_LI_MODE_PREFETCH_BBITMAP, + EXT4_LI_MODE_ITABLE, +}; + struct ext4_li_request { struct super_block *lr_super; - struct ext4_sb_info *lr_sbi; + enum ext4_li_mode lr_mode; + ext4_group_t lr_first_not_zeroed; ext4_group_t lr_next_group; struct list_head lr_request; unsigned long lr_next_sched; @@ -2446,7 +2478,8 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, - ext4_group_t block_group); + ext4_group_t block_group, + bool ignore_locked); extern int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh); @@ -2651,9 +2684,15 @@ extern int ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); extern int ext4_mb_reserve_blocks(struct super_block *, int); -extern void ext4_discard_preallocations(struct inode *); +extern void ext4_discard_preallocations(struct inode *, unsigned int); extern int __init ext4_init_mballoc(void); extern void ext4_exit_mballoc(void); +extern ext4_group_t ext4_mb_prefetch(struct super_block *sb, + ext4_group_t group, + unsigned int nr, int *cnt); +extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, + unsigned int nr); + extern void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t block, unsigned long count, int flags); @@ -2765,8 +2804,7 @@ extern int ext4_search_dir(struct buffer_head *bh, struct ext4_filename *fname, unsigned int offset, struct ext4_dir_entry_2 **res_dir); -extern int ext4_generic_delete_entry(handle_t *handle, - struct inode *dir, +extern int ext4_generic_delete_entry(struct inode *dir, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh, void *entry_buf, @@ -2924,12 +2962,6 @@ do { \ #endif -extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, - __u32 compat); -extern int ext4_update_rocompat_feature(handle_t *handle, - struct super_block *sb, __u32 rocompat); -extern int ext4_update_incompat_feature(handle_t *handle, - struct super_block *sb, __u32 incompat); extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg); extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, @@ -3145,6 +3177,7 @@ struct ext4_group_info { (1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT) #define EXT4_GROUP_INFO_IBITMAP_CORRUPT \ (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT) +#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4 #define EXT4_MB_GRP_NEED_INIT(grp) \ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) @@ -3159,6 +3192,8 @@ struct ext4_group_info { (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) +#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \ + (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state))) #define EXT4_MAX_CONTENTION 8 #define EXT4_CONTENTION_THRESHOLD 2 @@ -3363,9 +3398,9 @@ extern void ext4_release_system_zone(struct super_block *sb); extern int ext4_setup_system_zone(struct super_block *sb); extern int __init ext4_init_system_zone(void); extern void ext4_exit_system_zone(void); -extern int ext4_data_block_valid(struct ext4_sb_info *sbi, - ext4_fsblk_t start_blk, - unsigned int count); +extern int ext4_inode_block_valid(struct inode *inode, + ext4_fsblk_t start_blk, + unsigned int count); extern int ext4_check_blockref(const char *, unsigned int, struct inode *, __le32 *, unsigned int); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0c76cdd44d90..760b9ee49dc0 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -195,6 +195,28 @@ static void ext4_journal_abort_handle(const char *caller, unsigned int line, jbd2_journal_abort_handle(handle); } +static void ext4_check_bdev_write_error(struct super_block *sb) +{ + struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; + + /* + * If the block device has write error flag, it may have failed to + * async write out metadata buffers in the background. In this case, + * we could read old data from disk and write it out again, which + * may lead to on-disk filesystem inconsistency. + */ + if (errseq_check(&mapping->wb_err, READ_ONCE(sbi->s_bdev_wb_err))) { + spin_lock(&sbi->s_bdev_wb_lock); + err = errseq_check_and_advance(&mapping->wb_err, &sbi->s_bdev_wb_err); + spin_unlock(&sbi->s_bdev_wb_lock); + if (err) + ext4_error_err(sb, -err, + "Error while async write back metadata"); + } +} + int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh) { @@ -202,6 +224,9 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, might_sleep(); + if (bh->b_bdev->bd_super) + ext4_check_bdev_write_error(bh->b_bdev->bd_super); + if (ext4_handle_valid(handle)) { err = jbd2_journal_get_write_access(handle, bh); if (err) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 221f240eae60..a0481582187a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * i_mutex. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; @@ -340,7 +340,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) */ if (lblock + len <= lblock) return 0; - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); + return ext4_inode_block_valid(inode, block, len); } static int ext4_valid_extent_idx(struct inode *inode, @@ -348,7 +348,7 @@ static int ext4_valid_extent_idx(struct inode *inode, { ext4_fsblk_t block = ext4_idx_pblock(ext_idx); - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); + return ext4_inode_block_valid(inode, block, 1); } static int ext4_valid_extent_entries(struct inode *inode, @@ -507,14 +507,10 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - if (!ext4_has_feature_journal(inode->i_sb) || - (inode->i_ino != - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; - } + err = __ext4_ext_check(function, line, inode, + ext_block_hdr(bh), depth, pblk); + if (err) + goto errout; set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries @@ -693,10 +689,8 @@ void ext4_ext_drop_refs(struct ext4_ext_path *path) return; depth = path->p_depth; for (i = 0; i <= depth; i++, path++) { - if (path->p_bh) { - brelse(path->p_bh); - path->p_bh = NULL; - } + brelse(path->p_bh); + path->p_bh = NULL; } } @@ -1915,7 +1909,7 @@ out: /* * ext4_ext_insert_extent: - * tries to merge requsted extent into the existing extent or + * tries to merge requested extent into the existing extent or * inserts requested extent as new one into the tree, * creating new leaf in the no-space case. */ @@ -3125,7 +3119,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * * * Splits extent [a, b] into two extents [a, @split) and [@split, b], states - * of which are deterimined by split_flag. + * of which are determined by split_flag. * * There are two cases: * a> the extent are splitted into two extent. @@ -3650,7 +3644,7 @@ static int ext4_split_convert_extents(handle_t *handle, eof_block = map->m_lblk + map->m_len; /* * It is safe to convert extent to initialized via explicit - * zeroout only if extent is fully insde i_size or new_size. + * zeroout only if extent is fully inside i_size or new_size. */ depth = ext_depth(inode); ex = path[depth].p_ext; @@ -4272,7 +4266,7 @@ got_allocated_blocks: * not a good idea to call discard here directly, * but otherwise we'd need to call it every free(). */ - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; ext4_free_blocks(handle, inode, NULL, newblock, @@ -4495,7 +4489,7 @@ static long ext4_zero_range(struct file *file, loff_t o |