summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-11 09:22:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-11 09:22:21 -0700
commit859862ddd2b6b8dee00498c015ab37f02474b442 (patch)
treeb5597dd52b2e596401522bab802ca7993c1c20be /fs/btrfs
parent412dd3a6daf0cadce1b2d6a34fa3713f40255579 (diff)
parentc7548af69d9ef71512eb52d8009521eba3e768fd (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "The biggest change here is Josef's rework of the btrfs quota accounting, which improves the in-memory tracking of delayed extent operations. I had been working on Btrfs stack usage for a while, mostly because it had become impossible to do long stress runs with slab, lockdep and pagealloc debugging turned on without blowing the stack. Even though you upgraded us to a nice king sized stack, I kept most of the patches. We also have some very hard to find corruption fixes, an awesome sysfs use after free, and the usual assortment of optimizations, cleanups and other fixes" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (80 commits) Btrfs: convert smp_mb__{before,after}_clear_bit Btrfs: fix scrub_print_warning to handle skinny metadata extents Btrfs: make fsync work after cloning into a file Btrfs: use right type to get real comparison Btrfs: don't check nodes for extent items Btrfs: don't release invalid page in btrfs_page_exists_in_range() Btrfs: make sure we retry if page is a retriable exception Btrfs: make sure we retry if we couldn't get the page btrfs: replace EINVAL with EOPNOTSUPP for dev_replace raid56 trivial: fs/btrfs/ioctl.c: fix typo s/substract/subtract/ Btrfs: fix leaf corruption after __btrfs_drop_extents Btrfs: ensure btrfs_prev_leaf doesn't miss 1 item Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled btrfs: free delayed node outside of root->inode_lock btrfs: replace EINVAL with ERANGE for resize when ULLONG_MAX Btrfs: fix transaction leak during fsync call btrfs: Avoid trucating page or punching hole in a already existed hole. Btrfs: update commit root on snapshot creation after orphan cleanup Btrfs: ioctl, don't re-lock extent range when not necessary Btrfs: avoid visiting all extent items when cloning a range ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c7
-rw-r--r--fs/btrfs/backref.c41
-rw-r--r--fs/btrfs/backref.h8
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/check-integrity.c5
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/ctree.c104
-rw-r--r--fs/btrfs/ctree.h143
-rw-r--r--fs/btrfs/delayed-inode.c7
-rw-r--r--fs/btrfs/delayed-ref.c39
-rw-r--r--fs/btrfs/delayed-ref.h24
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/disk-io.c113
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c470
-rw-r--r--fs/btrfs/extent_io.c401
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file-item.c80
-rw-r--r--fs/btrfs/file.c152
-rw-r--r--fs/btrfs/free-space-cache.c312
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c293
-rw-r--r--fs/btrfs/ioctl.c396
-rw-r--r--fs/btrfs/lzo.c14
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/qgroup.c937
-rw-r--r--fs/btrfs/qgroup.h107
-rw-r--r--fs/btrfs/relocation.c21
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/scrub.c9
-rw-r--r--fs/btrfs/send.c290
-rw-r--r--fs/btrfs/super.c13
-rw-r--r--fs/btrfs/sysfs.c50
-rw-r--r--fs/btrfs/tests/btrfs-tests.c97
-rw-r--r--fs/btrfs/tests/btrfs-tests.h9
-rw-r--r--fs/btrfs/tests/inode-tests.c35
-rw-r--r--fs/btrfs/tests/qgroup-tests.c468
-rw-r--r--fs/btrfs/transaction.c113
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c49
-rw-r--r--fs/btrfs/tree-log.h16
-rw-r--r--fs/btrfs/volumes.c122
-rw-r--r--fs/btrfs/volumes.h1
-rw-r--r--fs/btrfs/zlib.c26
46 files changed, 3693 insertions, 1303 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index f341a98031d2..6d1d0b93b1aa 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -16,4 +16,4 @@ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
- tests/extent-io-tests.o tests/inode-tests.o
+ tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index ff9b3995d453..9a0124a95851 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -79,13 +79,6 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
const char *name;
char *value = NULL;
- if (acl) {
- ret = posix_acl_valid(acl);
- if (ret < 0)
- return ret;
- ret = 0;
- }
-
switch (type) {
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 10db21fa0926..e25564bfcb46 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -900,7 +900,11 @@ again:
goto out;
BUG_ON(ret == 0);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (trans && likely(trans->type != __TRANS_DUMMY)) {
+#else
if (trans) {
+#endif
/*
* look if there are updates for this ref queued and lock the
* head
@@ -984,11 +988,12 @@ again:
goto out;
}
if (ref->count && ref->parent) {
- if (extent_item_pos && !ref->inode_list) {
+ if (extent_item_pos && !ref->inode_list &&
+ ref->level == 0) {
u32 bsz;
struct extent_buffer *eb;
bsz = btrfs_level_size(fs_info->extent_root,
- info_level);
+ ref->level);
eb = read_tree_block(fs_info->extent_root,
ref->parent, bsz, 0);
if (!eb || !extent_buffer_uptodate(eb)) {
@@ -1404,9 +1409,10 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
* returns <0 on error
*/
static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- struct btrfs_extent_inline_ref **out_eiref,
- int *out_type)
+ struct btrfs_key *key,
+ struct btrfs_extent_item *ei, u32 item_size,
+ struct btrfs_extent_inline_ref **out_eiref,
+ int *out_type)
{
unsigned long end;
u64 flags;
@@ -1416,19 +1422,26 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
/* first call */
flags = btrfs_extent_flags(eb, ei);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- info = (struct btrfs_tree_block_info *)(ei + 1);
- *out_eiref =
- (struct btrfs_extent_inline_ref *)(info + 1);
+ if (key->type == BTRFS_METADATA_ITEM_KEY) {
+ /* a skinny metadata extent */
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(ei + 1);
+ } else {
+ WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(info + 1);
+ }
} else {
*out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
}
*ptr = (unsigned long)*out_eiref;
- if ((void *)*ptr >= (void *)ei + item_size)
+ if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
return -ENOENT;
}
end = (unsigned long)ei + item_size;
- *out_eiref = (struct btrfs_extent_inline_ref *)*ptr;
+ *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
*out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
*ptr += btrfs_extent_inline_ref_size(*out_type);
@@ -1447,8 +1460,8 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
* <0 on error.
*/
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level)
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level)
{
int ret;
int type;
@@ -1459,8 +1472,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
return 1;
while (1) {
- ret = __get_extent_inline_ref(ptr, eb, ei, item_size,
- &eiref, &type);
+ ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size,
+ &eiref, &type);
if (ret < 0)
return ret;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index a910b27a8ad9..86fc20fec282 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -40,8 +40,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
u64 *flags);
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level);
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level);
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
@@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots);
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **roots);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 2256e9cceec5..4794923c410c 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -284,4 +284,6 @@ static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
&BTRFS_I(inode)->runtime_flags);
}
+bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
+
#endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 0e8388e72d8d..ce92ae30250f 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1093,6 +1093,7 @@ leaf_item_out_of_bounce_error:
next_stack =
btrfsic_stack_frame_alloc();
if (NULL == next_stack) {
+ sf->error = -1;
btrfsic_release_block_ctx(
&sf->
next_block_ctx);
@@ -1190,8 +1191,10 @@ continue_with_current_node_stack_frame:
sf->next_block_ctx.datav[0];
next_stack = btrfsic_stack_frame_alloc();
- if (NULL == next_stack)
+ if (NULL == next_stack) {
+ sf->error = -1;
goto one_stack_frame_backwards;
+ }
next_stack->i = -1;
next_stack->block = sf->next_block;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d43c544d3b68..92371c414228 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -887,7 +887,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
workspace = find_workspace(type);
if (IS_ERR(workspace))
- return -1;
+ return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
start, len, pages,
@@ -923,7 +923,7 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
workspace = find_workspace(type);
if (IS_ERR(workspace))
- return -ENOMEM;
+ return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
disk_start,
@@ -945,7 +945,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
workspace = find_workspace(type);
if (IS_ERR(workspace))
- return -ENOMEM;
+ return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
dest_page, start_byte,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1bcfcdb23cf4..aeab453b8e24 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -224,7 +224,8 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
static void add_root_to_dirty_list(struct btrfs_root *root)
{
spin_lock(&root->fs_info->trans_lock);
- if (root->track_dirty && list_empty(&root->dirty_list)) {
+ if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
+ list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
@@ -246,9 +247,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
int level;
struct btrfs_disk_key disk_key;
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->fs_info->running_transaction->transid);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->last_trans);
level = btrfs_header_level(buf);
if (level == 0)
@@ -354,44 +356,14 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
}
/*
- * Increment the upper half of tree_mod_seq, set lower half zero.
- *
- * Must be called with fs_info->tree_mod_seq_lock held.
- */
-static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
-{
- u64 seq = atomic64_read(&fs_info->tree_mod_seq);
- seq &= 0xffffffff00000000ull;
- seq += 1ull << 32;
- atomic64_set(&fs_info->tree_mod_seq, seq);
- return seq;
-}
-
-/*
- * Increment the lower half of tree_mod_seq.
- *
- * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
- * are generated should not technically require a spin lock here. (Rationale:
- * incrementing the minor while incrementing the major seq number is between its
- * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
- * just returns a unique sequence number as usual.) We have decided to leave
- * that requirement in here and rethink it once we notice it really imposes a
- * problem on some workload.
+ * Pull a new tree mod seq number for our operation.
*/
-static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
+static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic64_inc_return(&fs_info->tree_mod_seq);
}
/*
- * return the last minor in the previous major tree_mod_seq number
- */
-u64 btrfs_tree_mod_seq_prev(u64 seq)
-{
- return (seq & 0xffffffff00000000ull) - 1ull;
-}
-
-/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
* to record tree modifications, it should ensure to set elem->seq to zero
@@ -402,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
- u64 seq;
-
tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
- elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
+ elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
- seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
- return seq;
+ return elem->seq;
}
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -487,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
BUG_ON(!tm);
- spin_lock(&fs_info->tree_mod_seq_lock);
- tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ tm->seq = btrfs_inc_tree_mod_seq(fs_info);
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
@@ -997,14 +964,14 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
* snapshot and the block was not allocated by tree relocation,
* we know the block is not shared.
*/
- if (root->ref_cows &&
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
buf != root->node && buf != root->commit_root &&
(btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item) ||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
return 1;
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (root->ref_cows &&
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
return 1;
#endif
@@ -1146,9 +1113,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(buf);
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->fs_info->running_transaction->transid);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->last_trans);
level = btrfs_header_level(buf);
@@ -1193,7 +1161,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
- if (root->ref_cows) {
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret)
return ret;
@@ -1538,6 +1506,10 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf)
{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ return 0;
+#endif
/* ensure we can see the force_cow */
smp_rmb();
@@ -1556,7 +1528,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
!(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
- !root->force_cow)
+ !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
return 0;
return 1;
}
@@ -5125,7 +5097,17 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
return ret;
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
- if (ret < 0)
+ /*
+ * We might have had an item with the previous key in the tree right
+ * before we released our path. And after we released our path, that
+ * item might have been pushed to the first slot (0) of the leaf we
+ * were holding due to a tree balance. Alternatively, an item with the
+ * previous key can exist as the only element of a leaf (big fat item).
+ * Therefore account for these 2 cases, so that our callers (like
+ * btrfs_previous_item) don't miss an existing item with a key matching
+ * the previous key we computed above.
+ */
+ if (ret <= 0)
return 0;
return 1;
}
@@ -5736,6 +5718,24 @@ again:
ret = 0;
goto done;
}
+ /*
+ * So the above check misses one case:
+ * - after releasing the path above, someone has removed the item that
+ * used to be at the very end of the block, and balance between leafs
+ * gets another one with bigger key.offset to replace it.
+ *
+ * This one should be returned as well, or we can get leaf corruption
+ * later(esp. in __btrfs_drop_extents()).
+ *
+ * And a bit more explanation about this check,
+ * with ret > 0, the key isn't found, the path points to the slot
+ * where it should be inserted, so the path->slots[0] item must be the
+ * bigger one.
+ */
+ if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
+ ret = 0;
+ goto done;
+ }
while (level < BTRFS_MAX_LEVEL) {
if (!path->nodes[level]) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ba6b88528dc7..b7e2c1c1ef36 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -33,6 +33,7 @@
#include <asm/kmap_types.h>
#include <linux/pagemap.h>
#include <linux/btrfs.h>
+#include <linux/workqueue.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@@ -756,6 +757,12 @@ struct btrfs_dir_item {
#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+/*
+ * Internal in-memory flag that a subvolume has been marked for deletion but
+ * still visible as a directory
+ */
+#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48)
+
struct btrfs_root_item {
struct btrfs_inode_item inode;
__le64 generation;
@@ -840,7 +847,10 @@ struct btrfs_disk_balance_args {
/* BTRFS_BALANCE_ARGS_* */
__le64 flags;
- __le64 unused[8];
+ /* BTRFS_BALANCE_ARGS_LIMIT value */
+ __le64 limit;
+
+ __le64 unused[7];
} __attribute__ ((__packed__));
/*
@@ -1113,6 +1123,12 @@ struct btrfs_qgroup_limit_item {
__le64 rsv_excl;
} __attribute__ ((__packed__));
+/* For raid type sysfs entries */
+struct raid_kobject {
+ int raid_type;
+ struct kobject kobj;
+};
+
struct btrfs_space_info {
spinlock_t lock;
@@ -1163,7 +1179,7 @@ struct btrfs_space_info {
wait_queue_head_t wait;
struct kobject kobj;
- struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
+ struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
};
#define BTRFS_BLOCK_RSV_GLOBAL 1
@@ -1313,6 +1329,8 @@ struct btrfs_stripe_hash_table {
#define BTRFS_STRIPE_HASH_TABLE_BITS 11
+void btrfs_init_async_reclaim_work(struct work_struct *work);
+
/* fs_info */
struct reloc_control;
struct btrfs_device;
@@ -1534,6 +1552,9 @@ struct btrfs_fs_info {
*/
struct btrfs_workqueue *fixup_workers;
struct btrfs_workqueue *delayed_workers;
+
+ /* the extent workers do delayed refs on the extent allocation tree */
+ struct btrfs_workqueue *extent_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
int thread_pool_size;
@@ -1636,7 +1657,10 @@ struct btrfs_fs_info {
/* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree;
+ struct rb_root qgroup_op_tree;
spinlock_t qgroup_lock;
+ spinlock_t qgroup_op_lock;
+ atomic_t qgroup_op_seq;
/*
* used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -1688,6 +1712,9 @@ struct btrfs_fs_info {
struct semaphore uuid_tree_rescan_sem;
unsigned int update_uuid_tree_gen:1;
+
+ /* Used to reclaim the metadata space in the background. */
+ struct work_struct async_reclaim_work;
};
struct btrfs_subvolume_writers {
@@ -1696,6 +1723,26 @@ struct btrfs_subvolume_writers {
};
/*
+ * The state of btrfs root
+ */
+/*
+ * btrfs_record_root_in_trans is a multi-step process,
+ * and it can race with the balancing code. But the
+ * race is very small, and only the first time the root
+ * is added to each transaction. So IN_TRANS_SETUP
+ * is used to tell us when more checks are required
+ */
+#define BTRFS_ROOT_IN_TRANS_SETUP 0
+#define BTRFS_ROOT_REF_COWS 1
+#define BTRFS_ROOT_TRACK_DIRTY 2
+#define BTRFS_ROOT_IN_RADIX 3
+#define BTRFS_ROOT_DUMMY_ROOT 4
+#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 5
+#define BTRFS_ROOT_DEFRAG_RUNNING 6
+#define BTRFS_ROOT_FORCE_COW 7
+#define BTRFS_ROOT_MULTI_LOG_TASKS 8
+
+/*
* in ram representation of the tree. extent_root is used for all allocations
* and for the extent tree extent_root root.
*/
@@ -1706,6 +1753,7 @@ struct btrfs_root {
struct btrfs_root *log_root;
struct btrfs_root *reloc_root;
+ unsigned long state;
struct btrfs_root_item root_item;
struct btrfs_key root_key;
struct btrfs_fs_info *fs_info;
@@ -1740,7 +1788,6 @@ struct btrfs_root {
/* Just be updated when the commit succeeds. */
int last_log_commit;
pid_t log_start_pid;
- bool log_multiple_pids;
u64 objectid;
u64 last_trans;
@@ -1760,23 +1807,13 @@ struct btrfs_root {
u64 highest_objectid;
- /* btrfs_record_root_in_trans is a multi-step process,
- * and it can race with the balancing code. But the
- * race is very small, and only the first time the root
- * is added to each transaction. So in_trans_setup
- * is used to tell us when more checks are required
- */
- unsigned long in_trans_setup;
- int ref_cows;
- int track_dirty;
- int in_radix;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- int dummy_root;
+ u64 alloc_bytenr;
#endif
+
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
- int defrag_running;
char *name;
/* the dirty list is only used by non-reference counted roots */
@@ -1790,7 +1827,6 @@ struct btrfs_root {
spinlock_t orphan_lock;
atomic_t orphan_inodes;
struct btrfs_block_rsv *orphan_block_rsv;
- int orphan_item_inserted;
int orphan_cleanup_state;
spinlock_t inode_lock;
@@ -1808,8 +1844,6 @@ struct btrfs_root {
*/
dev_t anon_dev;
- int force_cow;
-
spinlock_t root_item_lock;
atomic_t refs;
@@ -2788,6 +2822,11 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root)
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
}
+static inline bool btrfs_root_dead(struct btrfs_root *root)
+{
+ return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
+}
+
/* struct btrfs_root_backup */
BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
tree_root, 64);
@@ -2897,6 +2936,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
cpu->vend = le64_to_cpu(disk->vend);
cpu->target = le64_to_cpu(disk->target);
cpu->flags = le64_to_cpu(disk->flags);
+ cpu->limit = le64_to_cpu(disk->limit);
}
static inline void
@@ -2914,6 +2954,7 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
disk->vend = cpu_to_le64(cpu->vend);
disk->target = cpu_to_le64(cpu->target);
disk->flags = cpu_to_le64(cpu->flags);
+ disk->limit = cpu_to_le64(cpu->limit);
}
/* struct btrfs_super_block */
@@ -3236,6 +3277,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+ unsigned long count, int wait);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
@@ -3275,9 +3318,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow);
+ struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow);
+ struct extent_buffer *buf, int full_backref, int no_quota)