diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 11:34:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 11:34:19 -0700 |
commit | 9f2e3a53f7ec9ef55e9d01bc29a6285d291c151e (patch) | |
tree | c25b0eb20dac1a39a6b55c521b2658dcceb7d532 | |
parent | 78438ce18f26dbcaa8993bb45d20ffb0cec3bc3e (diff) | |
parent | b1c16ac978fd40ae636e629bb69a652df7eebdc2 (diff) |
Merge tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"This time the majority of changes are cleanups, though there's still a
number of changes of user interest.
User visible changes:
- better read time and write checks to catch errors early and before
writing data to disk (to catch potential memory corruption on data
that get checksummed)
- qgroups + metadata relocation: last speed up patch int the series
to address the slowness, there should be no overhead comparing
balance with and without qgroups
- FIEMAP ioctl does not start a transaction unnecessarily, this can
result in a speed up and less blocking due to IO
- LOGICAL_INO (v1, v2) does not start transaction unnecessarily, this
can speed up the mentioned ioctl and scrub as well
- fsync on files with many (but not too many) hardlinks is faster,
finer decision if the links should be fsynced individually or
completely
- send tries harder to find ranges to clone
- trim/discard will skip unallocated chunks that haven't been touched
since the last mount
Fixes:
- send flushes delayed allocation before start, otherwise it could
miss some changes in case of a very recent rw->ro switch of a
subvolume
- fix fallocate with qgroups that could lead to space accounting
underflow, reported as a warning
- trim/discard ioctl honours the requested range
- starting send and dedupe on a subvolume at the same time will let
only one of them succeed, this is to prevent changes that send
could miss due to dedupe; both operations are restartable
Core changes:
- more tree-checker validations, errors reported by fuzzing tools:
- device item
- inode item
- block group profiles
- tracepoints for extent buffer locking
- async cow preallocates memory to avoid errors happening too deep in
the call chain
- metadata reservations for delalloc reworked to better adapt in
many-writers/low-space scenarios
- improved space flushing logic for intense DIO vs buffered workloads
- lots of cleanups
- removed unused struct members
- redundant argument removal
- properties and xattrs
- extent buffer locking
- selftests
- use common file type conversions
- many-argument functions reduction"
* tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (227 commits)
btrfs: Use kvmalloc for allocating compressed path context
btrfs: Factor out common extent locking code in submit_compressed_extents
btrfs: Set io_tree only once in submit_compressed_extents
btrfs: Replace clear_extent_bit with unlock_extent
btrfs: Make compress_file_range take only struct async_chunk
btrfs: Remove fs_info from struct async_chunk
btrfs: Rename async_cow to async_chunk
btrfs: Preallocate chunks in cow_file_range_async
btrfs: reserve delalloc metadata differently
btrfs: track DIO bytes in flight
btrfs: merge calls of btrfs_setxattr and btrfs_setxattr_trans in btrfs_set_prop
btrfs: delete unused function btrfs_set_prop_trans
btrfs: start transaction in xattr_handler_set_prop
btrfs: drop local copy of inode i_mode
btrfs: drop old_fsflags in btrfs_ioctl_setflags
btrfs: modify local copy of btrfs_inode flags
btrfs: drop useless inode i_flags copy and restore
btrfs: start transaction in btrfs_ioctl_setflags()
btrfs: export btrfs_set_prop
btrfs: refactor btrfs_set_props to validate externally
...
64 files changed, 3287 insertions, 2194 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 5810463dc6d2..a0af1b952c4d 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -93,7 +93,11 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, goto out; } - ret = btrfs_setxattr(trans, inode, name, value, size, 0); + if (trans) + ret = btrfs_setxattr(trans, inode, name, value, size, 0); + else + ret = btrfs_setxattr_trans(inode, name, value, size, 0); + out: kfree(value); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 11459fe84a29..982152d3f920 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -791,7 +791,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, count = node->ref_mod * -1; break; default: - BUG_ON(1); + BUG(); } *total_refs += count; switch (node->type) { @@ -1460,8 +1460,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, * callers (such as fiemap) which want to know whether the extent is * shared but do not need a ref count. * - * This attempts to allocate a transaction in order to account for - * delayed refs, but continues on even when the alloc fails. + * This attempts to attach to the running transaction in order to account for + * delayed refs, but continues on even when no running transaction exists. * * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. */ @@ -1484,13 +1484,16 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) tmp = ulist_alloc(GFP_NOFS); roots = ulist_alloc(GFP_NOFS); if (!tmp || !roots) { - ulist_free(tmp); - ulist_free(roots); - return -ENOMEM; + ret = -ENOMEM; + goto out; } - trans = btrfs_join_transaction(root); + trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { + ret = PTR_ERR(trans); + goto out; + } trans = NULL; down_read(&fs_info->commit_root_sem); } else { @@ -1523,6 +1526,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) } else { up_read(&fs_info->commit_root_sem); } +out: ulist_free(tmp); ulist_free(roots); return ret; @@ -1747,7 +1751,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, else if (flags & BTRFS_EXTENT_FLAG_DATA) *flags_ret = BTRFS_EXTENT_FLAG_DATA; else - BUG_ON(1); + BUG(); return 0; } @@ -1912,13 +1916,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, extent_item_objectid); if (!search_commit_root) { - trans = btrfs_join_transaction(fs_info->extent_root); - if (IS_ERR(trans)) - return PTR_ERR(trans); + trans = btrfs_attach_transaction(fs_info->extent_root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT && + PTR_ERR(trans) != -EROFS) + return PTR_ERR(trans); + trans = NULL; + } + } + + if (trans) btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); - } else { + else down_read(&fs_info->commit_root_sem); - } ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, tree_mod_seq_elem.seq, &refs, @@ -1951,7 +1961,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, free_leaf_list(refs); out: - if (!search_commit_root) { + if (trans) { btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); btrfs_end_transaction(trans); } else { diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6f5d07415dab..d5b438706b77 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -148,12 +148,6 @@ struct btrfs_inode { u64 last_unlink_trans; /* - * Track the transaction id of the last transaction used to create a - * hard link for the inode. This is used by the log tree (fsync). - */ - u64 last_link_trans; - - /* * Number of bytes outstanding that are going to need csums. This is * used in ENOSPC accounting. */ @@ -203,8 +197,6 @@ struct btrfs_inode { struct inode vfs_inode; }; -extern unsigned char btrfs_filetype_table[]; - static inline struct btrfs_inode *BTRFS_I(const struct inode *inode) { return container_of(inode, struct btrfs_inode, vfs_inode); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 4f2a8ae0aa42..1463e14af2fb 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -251,7 +251,7 @@ static void end_compressed_bio_write(struct bio *bio) cb->compressed_pages[0]->mapping = cb->inode->i_mapping; btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0], cb->start, cb->start + cb->len - 1, - bio->bi_status ? BLK_STS_OK : BLK_STS_NOTSUPP); + bio->bi_status == BLK_STS_OK); cb->compressed_pages[0]->mapping = NULL; end_compressed_writeback(inode, cb); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 324df36d28bf..5df76c17775a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -21,11 +21,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *ins_key, struct btrfs_path *path, int data_size, int extend); static int push_node_left(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src, int empty); static int balance_node_right(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, @@ -726,11 +724,11 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) return __tree_mod_log_search(fs_info, start, min_seq, 0); } -static noinline int -tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, +static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst, struct extent_buffer *src, unsigned long dst_offset, unsigned long src_offset, int nr_items) { + struct btrfs_fs_info *fs_info = dst->fs_info; int ret = 0; struct tree_mod_elem **tm_list = NULL; struct tree_mod_elem **tm_list_add, **tm_list_rem; @@ -950,7 +948,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (new_flags != 0) { int level = btrfs_header_level(buf); - ret = btrfs_set_disk_extent_flags(trans, fs_info, + ret = btrfs_set_disk_extent_flags(trans, buf->start, buf->len, new_flags, level, 0); @@ -970,7 +968,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (ret) return ret; } - clean_tree_block(fs_info, buf); + btrfs_clean_tree_block(buf); *last_ref = 1; } return 0; @@ -1792,9 +1790,8 @@ static void root_sub_used(struct btrfs_root *root, u32 size) /* given a node and slot number, this reads the blocks it points to. The * extent buffer is returned with a reference taken (but unlocked). */ -static noinline struct extent_buffer * -read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent, - int slot) +static noinline struct extent_buffer *read_node_slot( + struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); struct extent_buffer *eb; @@ -1806,7 +1803,7 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent, BUG_ON(level == 0); btrfs_node_key_to_cpu(parent, &first_key, slot); - eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot), + eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), btrfs_node_ptr_generation(parent, slot), level - 1, &first_key); if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { @@ -1863,7 +1860,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, return 0; /* promote the child to a root */ - child = read_node_slot(fs_info, mid, 0); + child = read_node_slot(mid, 0); if (IS_ERR(child)) { ret = PTR_ERR(child); btrfs_handle_fs_error(fs_info, ret, NULL); @@ -1888,7 +1885,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, path->locks[level] = 0; path->nodes[level] = NULL; - clean_tree_block(fs_info, mid); + btrfs_clean_tree_block(mid); btrfs_tree_unlock(mid); /* once for the path */ free_extent_buffer(mid); @@ -1903,7 +1900,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4) return 0; - left = read_node_slot(fs_info, parent, pslot - 1); + left = read_node_slot(parent, pslot - 1); if (IS_ERR(left)) left = NULL; @@ -1918,7 +1915,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, } } - right = read_node_slot(fs_info, parent, pslot + 1); + right = read_node_slot(parent, pslot + 1); if (IS_ERR(right)) right = NULL; @@ -1936,7 +1933,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* first, try to make some room in the middle buffer */ if (left) { orig_slot += btrfs_header_nritems(left); - wret = push_node_left(trans, fs_info, left, mid, 1); + wret = push_node_left(trans, left, mid, 1); if (wret < 0) ret = wret; } @@ -1945,11 +1942,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, * then try to empty the right most buffer into the middle */ if (right) { - wret = push_node_left(trans, fs_info, mid, right, 1); + wret = push_node_left(trans, mid, right, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { - clean_tree_block(fs_info, right); + btrfs_clean_tree_block(right); btrfs_tree_unlock(right); del_ptr(root, path, level + 1, pslot + 1); root_sub_used(root, right->len); @@ -1981,20 +1978,20 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_handle_fs_error(fs_info, ret, NULL); goto enospc; } - wret = balance_node_right(trans, fs_info, mid, left); + wret = balance_node_right(trans, mid, left); if (wret < 0) { ret = wret; goto enospc; } if (wret == 1) { - wret = push_node_left(trans, fs_info, left, mid, 1); + wret = push_node_left(trans, left, mid, 1); if (wret < 0) ret = wret; } BUG_ON(wret == 1); } if (btrfs_header_nritems(mid) == 0) { - clean_tree_block(fs_info, mid); + btrfs_clean_tree_block(mid); btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); @@ -2078,7 +2075,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (!parent) return 1; - left = read_node_slot(fs_info, parent, pslot - 1); + left = read_node_slot(parent, pslot - 1); if (IS_ERR(left)) left = NULL; @@ -2098,8 +2095,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (ret) wret = 1; else { - wret = push_node_left(trans, fs_info, - left, mid, 0); + wret = push_node_left(trans, left, mid, 0); } } if (wret < 0) @@ -2131,7 +2127,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, btrfs_tree_unlock(left); free_extent_buffer(left); } - right = read_node_slot(fs_info, parent, pslot + 1); + right = read_node_slot(parent, pslot + 1); if (IS_ERR(right)) right = NULL; @@ -2154,8 +2150,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (ret) wret = 1; else { - wret = balance_node_right(trans, fs_info, - right, mid); + wret = balance_node_right(trans, right, mid); } } if (wret < 0) @@ -2416,6 +2411,16 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (tmp) { /* first we do an atomic uptodate check */ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { + /* + * Do extra check for first_key, eb can be stale due to + * being cached, read from scrub, or have multiple + * parents (shared tree blocks). + */ + if (btrfs_verify_level_key(tmp, + parent_level - 1, &first_key, gen)) { + free_extent_buffer(tmp); + return -EUCLEAN; + } *eb_ret = tmp; return 0; } @@ -2706,7 +2711,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *b; int slot; int ret; @@ -2904,7 +2908,7 @@ cow_done: } else { p->slots[level] = slot; if (ins_len > 0 && - btrfs_leaf_free_space(fs_info, b) < ins_len) { + btrfs_leaf_free_space(b) < ins_len) { if (write_lock_level < 1) { write_lock_level = 1; btrfs_release_path(p); @@ -3181,11 +3185,31 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, slot = path->slots[0]; if (slot > 0) { btrfs_item_key(eb, &disk_key, slot - 1); - BUG_ON(comp_keys(&disk_key, new_key) >= 0); + if (unlikely(comp_keys(&disk_key, new_key) >= 0)) { + btrfs_crit(fs_info, + "slot %u key (%llu %u %llu) new key (%llu %u %llu)", + slot, btrfs_disk_key_objectid(&disk_key), + btrfs_disk_key_type(&disk_key), + btrfs_disk_key_offset(&disk_key), + new_key->objectid, new_key->type, + new_key->offset); + btrfs_print_leaf(eb); + BUG(); + } } if (slot < btrfs_header_nritems(eb) - 1) { btrfs_item_key(eb, &disk_key, slot + 1); - BUG_ON(comp_keys(&disk_key, new_key) <= 0); + if (unlikely(comp_keys(&disk_key, new_key) <= 0)) { + btrfs_crit(fs_info, + "slot %u key (%llu %u %llu) new key (%llu %u %llu)", + slot, btrfs_disk_key_objectid(&disk_key), + btrfs_disk_key_type(&disk_key), + btrfs_disk_key_offset(&disk_key), + new_key->objectid, new_key->type, + new_key->offset); + btrfs_print_leaf(eb); + BUG(); + } } btrfs_cpu_key_to_disk(&disk_key, new_key); @@ -3203,10 +3227,10 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src, int empty) { + struct btrfs_fs_info *fs_info = trans->fs_info; int push_items = 0; int src_nritems; int dst_nritems; @@ -3239,8 +3263,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, } else push_items = min(src_nritems - 8, push_items); - ret = tree_mod_log_eb_copy(fs_info, dst, src, dst_nritems, 0, - push_items); + ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items); if (ret) { btrfs_abort_transaction(trans, ret); return ret; @@ -3278,10 +3301,10 @@ static int push_node_left(struct btrfs_trans_handle *trans, * this will only push up to 1/2 the contents of the left node over */ static int balance_node_right(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src) { + struct btrfs_fs_info *fs_info = trans->fs_info; int push_items = 0; int max_push; int src_nritems; @@ -3315,8 +3338,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, (dst_nritems) * sizeof(struct btrfs_key_ptr)); - ret = tree_mod_log_eb_copy(fs_info, dst, src, 0, - src_nritems - push_items, push_items); + ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items, + push_items); if (ret) { btrfs_abort_transaction(trans, ret); return ret; @@ -3404,7 +3427,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, * blocknr is the block the key points to. */ static void insert_ptr(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct btrfs_path *path, struct btrfs_disk_key *key, u64 bytenr, int slot, int level) { @@ -3417,7 +3440,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, lower = path->nodes[level]; nritems = btrfs_header_nritems(lower); BUG_ON(slot > nritems); - BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info)); + BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info)); if (slot != nritems) { if (level) { ret = tree_mod_log_insert_move(lower, slot + 1, slot, @@ -3501,7 +3524,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, root_add_used(root, fs_info->nodesize); ASSERT(btrfs_header_level(c) == level); - ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid); + ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); if (ret) { btrfs_abort_transaction(trans, ret); return ret; @@ -3517,7 +3540,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(c); btrfs_mark_buffer_dirty(split); - insert_ptr(trans, fs_info, path, &disk_key, split->start, + insert_ptr(trans, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); if (path->slots[level] >= mid) { @@ -3565,9 +3588,9 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf) +noinline int btrfs_leaf_free_space(struct extent_buffer *leaf) { + struct btrfs_fs_info *fs_info = leaf->fs_info; int nritems = btrfs_header_nritems(leaf); int ret; @@ -3586,13 +3609,13 @@ noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info, * min slot controls the lowest index we're willing to push to the * right. We'll push up to and including min_slot, but no lower */ -static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, +static noinline int __push_leaf_right(struct btrfs_path *path, int data_size, int empty, struct extent_buffer *right, int free_space, u32 left_nritems, u32 min_slot) { + struct btrfs_fs_info *fs_info = right->fs_info; struct extent_buffer *left = path->nodes[0]; struct extent_buffer *upper = path->nodes[1]; |