summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorZheng Yan <zheng.yan@oracle.com>2008-09-23 13:14:14 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:07 -0400
commit31840ae1a6b433ca0e6a8d341756ff478bbf959e (patch)
tree9343db596aec175e9640aa2800b80f01496d7047 /fs
parent1c2308f8e7d8491467e0095af2b01500f1b70819 (diff)
Btrfs: Full back reference support
This patch makes the back reference system to explicit record the location of parent node for all types of extents. The location of parent node is placed into the offset field of backref key. Every time a tree block is balanced, the back references for the affected lower level extents are updated. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.c228
-rw-r--r--fs/btrfs/ctree.h62
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent-tree.c1143
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/file.c121
-rw-r--r--fs/btrfs/inode.c57
-rw-r--r--fs/btrfs/ioctl.c57
-rw-r--r--fs/btrfs/print-tree.c5
-rw-r--r--fs/btrfs/tree-log.c108
10 files changed, 1066 insertions, 722 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6f467901246f..50aea8cb653a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -125,7 +125,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
u32 nritems;
int ret = 0;
int level;
- struct btrfs_key first_key;
struct btrfs_root *new_root;
new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
@@ -141,18 +140,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
level = btrfs_header_level(buf);
nritems = btrfs_header_nritems(buf);
- if (nritems) {
- if (level == 0)
- btrfs_item_key_to_cpu(buf, &first_key, 0);
- else
- btrfs_node_key_to_cpu(buf, &first_key, 0);
- } else {
- first_key.objectid = 0;
- }
- cow = btrfs_alloc_free_block(trans, new_root, buf->len,
- new_root_objectid,
- trans->transid, first_key.objectid,
- level, buf->start, 0);
+
+ cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
+ new_root_objectid, trans->transid,
+ level, buf->start, 0);
if (IS_ERR(cow)) {
kfree(new_root);
return PTR_ERR(cow);
@@ -165,7 +156,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
WARN_ON(btrfs_header_generation(buf) > trans->transid);
- ret = btrfs_inc_ref(trans, new_root, buf, 0);
+ ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
kfree(new_root);
if (ret)
@@ -184,39 +175,31 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
u64 search_start, u64 empty_size,
u64 prealloc_dest)
{
- u64 root_gen;
+ u64 parent_start;
struct extent_buffer *cow;
u32 nritems;
int ret = 0;
int different_trans = 0;
int level;
int unlock_orig = 0;
- struct btrfs_key first_key;
if (*cow_ret == buf)
unlock_orig = 1;
WARN_ON(!btrfs_tree_locked(buf));
- if (root->ref_cows) {
- root_gen = trans->transid;
- } else {
- root_gen = 0;
- }
+ if (parent)
+ parent_start = parent->start;
+ else
+ parent_start = 0;
+
WARN_ON(root->ref_cows && trans->transid !=
root->fs_info->running_transaction->transid);
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
level = btrfs_header_level(buf);
nritems = btrfs_header_nritems(buf);
- if (nritems) {
- if (level == 0)
- btrfs_item_key_to_cpu(buf, &first_key, 0);
- else
- btrfs_node_key_to_cpu(buf, &first_key, 0);
- } else {
- first_key.objectid = 0;
- }
+
if (prealloc_dest) {
struct btrfs_key ins;
@@ -224,19 +207,19 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
ins.offset = buf->len;
ins.type = BTRFS_EXTENT_ITEM_KEY;
- ret = btrfs_alloc_reserved_extent(trans, root,
+ ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
root->root_key.objectid,
- root_gen, level,
- first_key.objectid,
+ trans->transid, level, 0,
&ins);
BUG_ON(ret);
cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
buf->len);
} else {
cow = btrfs_alloc_free_block(trans, root, buf->len,
+ parent_start,
root->root_key.objectid,
- root_gen, first_key.objectid,
- level, search_start, empty_size);
+ trans->transid, level,
+ search_start, empty_size);
}
if (IS_ERR(cow))
return PTR_ERR(cow);
@@ -249,17 +232,23 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (btrfs_header_generation(buf) != trans->transid) {
+ u32 nr_extents;
different_trans = 1;
- ret = btrfs_inc_ref(trans, root, buf, 1);
+ ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
if (ret)
return ret;
+
+ ret = btrfs_cache_ref(trans, root, buf, nr_extents);
+ WARN_ON(ret);
} else {
+ ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
+ if (ret)
+ return ret;
clean_tree_block(trans, root, buf);
}
if (buf == root->node) {
WARN_ON(parent && parent != buf);
- root_gen = btrfs_header_generation(buf);
spin_lock(&root->node_lock);
root->node = cow;
@@ -268,13 +257,14 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (buf != root->commit_root) {
btrfs_free_extent(trans, root, buf->start,
- buf->len, root->root_key.objectid,
- root_gen, 0, 0, 1);
+ buf->len, buf->start,
+ root->root_key.objectid,
+ btrfs_header_generation(buf),
+ 0, 0, 1);
}
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
- root_gen = btrfs_header_generation(parent);
btrfs_set_node_blockptr(parent, parent_slot,
cow->start);
WARN_ON(trans->transid == 0);
@@ -283,8 +273,8 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(parent);
WARN_ON(btrfs_header_generation(parent) != trans->transid);
btrfs_free_extent(trans, root, buf->start, buf->len,
- btrfs_header_owner(parent), root_gen,
- 0, 0, 1);
+ parent_start, btrfs_header_owner(parent),
+ btrfs_header_generation(parent), 0, 0, 1);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -831,6 +821,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
root->node = child;
spin_unlock(&root->node_lock);
+ ret = btrfs_update_extent_ref(trans, root, child->start,
+ mid->start, child->start,
+ root->root_key.objectid,
+ trans->transid, level - 1, 0);
+ BUG_ON(ret);
+
add_root_to_dirty_list(root);
btrfs_tree_unlock(child);
path->locks[level] = 0;
@@ -840,7 +836,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* once for the path */
free_extent_buffer(mid);
ret = btrfs_free_extent(trans, root, mid->start, mid->len,
- root->root_key.objectid,
+ mid->start, root->root_key.objectid,
btrfs_header_generation(mid), 0, 0, 1);
/* once for the root ptr */
free_extent_buffer(mid);
@@ -905,7 +901,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
wret = btrfs_free_extent(trans, root, bytenr,
- blocksize,
+ blocksize, parent->start,
btrfs_header_owner(parent),
generation, 0, 0, 1);
if (wret)
@@ -954,6 +950,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
wret = btrfs_free_extent(trans, root, bytenr, blocksize,
+ parent->start,
btrfs_header_owner(parent),
root_gen, 0, 0, 1);
if (wret)
@@ -1500,6 +1497,41 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
}
/*
+ * update item key.
+ *
+ * This function isn't completely safe. It's the caller's responsibility
+ * that the new key won't break the order
+ */
+int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key)
+{
+ struct btrfs_disk_key disk_key;
+ struct extent_buffer *eb;
+ int slot;
+
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ if (slot > 0) {
+ btrfs_item_key(eb, &disk_key, slot - 1);
+ if (comp_keys(&disk_key, new_key) >= 0)
+ return -1;
+ }
+ if (slot < btrfs_header_nritems(eb) - 1) {
+ btrfs_item_key(eb, &disk_key, slot + 1);
+ if (comp_keys(&disk_key, new_key) <= 0)
+ return -1;
+ }
+
+ btrfs_cpu_key_to_disk(&disk_key, new_key);
+ btrfs_set_item_key(eb, &disk_key, slot);
+ btrfs_mark_buffer_dirty(eb);
+ if (slot == 0)
+ fixup_low_keys(trans, root, path, &disk_key, 1);
+ return 0;
+}
+
+/*
* try to push data from one node into the next node left in the
* tree.
*
@@ -1558,6 +1590,10 @@ static int push_node_left(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(dst, dst_nritems + push_items);
btrfs_mark_buffer_dirty(src);
btrfs_mark_buffer_dirty(dst);
+
+ ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
+ BUG_ON(ret);
+
return ret;
}
@@ -1619,6 +1655,10 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(src);
btrfs_mark_buffer_dirty(dst);
+
+ ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
+ BUG_ON(ret);
+
return ret;
}
@@ -1633,30 +1673,24 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int level)
{
- u64 root_gen;
u64 lower_gen;
struct extent_buffer *lower;
struct extent_buffer *c;
struct extent_buffer *old;
struct btrfs_disk_key lower_key;
+ int ret;
BUG_ON(path->nodes[level]);
BUG_ON(path->nodes[level-1] != root->node);
- if (root->ref_cows)
- root_gen = trans->transid;
- else
- root_gen = 0;
-
lower = path->nodes[level-1];
if (level == 1)
btrfs_item_key(lower, &lower_key, 0);
else
btrfs_node_key(lower, &lower_key, 0);
- c = btrfs_alloc_free_block(trans, root, root->nodesize,
- root->root_key.objectid,
- root_gen, le64_to_cpu(lower_key.objectid),
+ c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
+ root->root_key.objectid, trans->transid,
level, root->node->start, 0);
if (IS_ERR(c))
return PTR_ERR(c);
@@ -1679,7 +1713,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
btrfs_set_node_key(c, &lower_key, 0);
btrfs_set_node_blockptr(c, 0, lower->start);
lower_gen = btrfs_header_generation(lower);
- WARN_ON(lower_gen == 0);
+ WARN_ON(lower_gen != trans->transid);
btrfs_set_node_ptr_generation(c, 0, lower_gen);
@@ -1690,6 +1724,12 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
root->node = c;
spin_unlock(&root->node_lock);
+ ret = btrfs_update_extent_ref(trans, root, lower->start,
+ lower->start, c->start,
+ root->root_key.objectid,
+ trans->transid, level - 1, 0);
+ BUG_ON(ret);
+
/* the super has an extra ref to root->node */
free_extent_buffer(old);
@@ -1698,20 +1738,6 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
path->nodes[level] = c;
path->locks[level] = 1;
path->slots[level] = 0;
-
- if (root->ref_cows && lower_gen != trans->transid) {
- struct btrfs_path *back_path = btrfs_alloc_path();
- int ret;
- mutex_lock(&root->fs_info->alloc_mutex);
- ret = btrfs_insert_extent_backref(trans,
- root->fs_info->extent_root,
- path, lower->start,
- root->root_key.objectid,
- trans->transid, 0, 0);
- BUG_ON(ret);
- mutex_unlock(&root->fs_info->alloc_mutex);
- btrfs_free_path(back_path);
- }
return 0;
}
@@ -1766,7 +1792,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int level)
{
- u64 root_gen;
struct extent_buffer *c;
struct extent_buffer *split;
struct btrfs_disk_key disk_key;
@@ -1793,17 +1818,11 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
}
c_nritems = btrfs_header_nritems(c);
- if (root->ref_cows)
- root_gen = trans->transid;
- else
- root_gen = 0;
- btrfs_node_key(c, &disk_key, 0);
split = btrfs_alloc_free_block(trans, root, root->nodesize,
- root->root_key.objectid,
- root_gen,
- btrfs_disk_key_objectid(&disk_key),
- level, c->start, 0);
+ path->nodes[level + 1]->start,
+ root->root_key.objectid,
+ trans->transid, level, c->start, 0);
if (IS_ERR(split))
return PTR_ERR(split);
@@ -1840,6 +1859,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
+ ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
+ BUG_ON(ret);
+
if (path->slots[level] >= mid) {
path->slots[level] -= mid;
btrfs_tree_unlock(c);
@@ -1955,10 +1977,23 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
else
nr = 1;
+ if (path->slots[0] >= left_nritems)
+ push_space += data_size + sizeof(*item);
+
i = left_nritems - 1;
while (i >= nr) {
item = btrfs_item_nr(left, i);
+ if (!empty && push_items > 0) {
+ if (path->slots[0] > i)
+ break;
+ if (path->slots[0] == i) {
+ int space = btrfs_leaf_free_space(root, left);
+ if (space + push_space * 2 > free_space)
+ break;
+ }
+ }
+
if (path->slots[0] == i)
push_space += data_size + sizeof(*item);
@@ -1973,6 +2008,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
this_item_size = btrfs_item_size(left, item);
if (this_item_size + sizeof(*item) + push_space > free_space)
break;
+
push_items++;
push_space += this_item_size + sizeof(*item);
if (i == 0)
@@ -2046,6 +2082,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_mark_buffer_dirty(left);
btrfs_mark_buffer_dirty(right);
+ ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
+ BUG_ON(ret);
+
btrfs_item_key(right, &disk_key, 0);
btrfs_set_node_key(upper, &disk_key, slot + 1);
btrfs_mark_buffer_dirty(upper);
@@ -2147,6 +2186,16 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
KM_USER1);
}
+ if (!empty && push_items > 0) {
+ if (path->slots[0] < i)
+ break;
+ if (path->slots[0] == i) {
+ int space = btrfs_leaf_free_space(root, right);
+ if (space + push_space * 2 > free_space)
+ break;
+ }
+ }
+
if (path->slots[0] == i)
push_space += data_size + sizeof(*item);
@@ -2255,6 +2304,10 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (right_nritems)
btrfs_mark_buffer_dirty(right);
+ ret = btrfs_update_ref(trans, root, right, left,
+ old_left_nritems, push_items);
+ BUG_ON(ret);
+
btrfs_item_key(right, &disk_key, 0);
wret = fixup_low_keys(trans, root, path, &disk_key, 1);
if (wret)
@@ -2294,7 +2347,6 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
struct btrfs_path *path, int data_size,
int extend)
{
- u64 root_gen;
struct extent_buffer *l;
u32 nritems;
int mid;
@@ -2313,11 +2365,6 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
if (extend)
space_needed = data_size;
- if (root->ref_cows)
- root_gen = trans->transid;
- else
- root_gen = 0;
-
/* first try to make some room by pushing left and right */
if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
wret = push_leaf_right(trans, root, path, data_size, 0);
@@ -2348,13 +2395,10 @@ again:
nritems = btrfs_header_nritems(l);
mid = (nritems + 1)/ 2;
- btrfs_item_key(l, &disk_key, 0);
-
right = btrfs_alloc_free_block(trans, root, root->leafsize,
- root->root_key.objectid,
- root_gen,
- le64_to_cpu(disk_key.objectid),
- 0, l->start, 0);
+ path->nodes[1]->start,
+ root->root_key.objectid,
+ trans->transid, 0, l->start, 0);
if (IS_ERR(right)) {
BUG_ON(1);
return PTR_ERR(right);
@@ -2485,6 +2529,9 @@ again:
btrfs_mark_buffer_dirty(l);
BUG_ON(path->slots[0] != slot);
+ ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
+ BUG_ON(ret);
+
if (mid <= slot) {
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
@@ -2956,6 +3003,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
ret = wret;
wret = btrfs_free_extent(trans, root,
leaf->start, leaf->len,
+ path->nodes[1]->start,
btrfs_header_owner(path->nodes[1]),
root_gen, 0, 0, 1);
if (wret)
@@ -3007,7 +3055,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
free_extent_buffer(leaf);
wret = btrfs_free_extent(trans, root, bytenr,
- blocksize,
+ blocksize, path->nodes[1]->start,
btrfs_header_owner(path->nodes[1]),
root_gen, 0, 0, 1);
if (wret)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 730aae3bc181..138c157bbc45 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -40,7 +40,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
struct btrfs_ordered_sum;
-#define BTRFS_MAGIC "_B8RfS_M"
+#define BTRFS_MAGIC "_B9RfS_M"
#define BTRFS_ACL_NOT_CACHED ((void *)-1)
@@ -81,6 +81,9 @@ struct btrfs_ordered_sum;
#define BTRFS_TREE_LOG_OBJECTID -6ULL
#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
+/* dummy objectid represents multiple objectids */
+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
+
/*
* All files have objectids in this range.
*/
@@ -369,6 +372,7 @@ struct btrfs_extent_ref {
__le64 generation;
__le64 objectid;
__le64 offset;
+ __le32 num_refs;
} __attribute__ ((__packed__));
/* dev extents record free space on individual devices. The owner
@@ -1047,9 +1051,6 @@ btrfs_inode_otime(struct btrfs_inode_item *inode_item)
BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
-/* struct btrfs_extent_item */
-BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
-
/* struct btrfs_dev_extent */
BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
chunk_tree, 64);
@@ -1070,14 +1071,20 @@ BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64);
BTRFS_SETGET_FUNCS(ref_offset, struct btrfs_extent_ref, offset, 64);
+BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64);
BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref,
generation, 64);
BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref,
objectid, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_offset, struct btrfs_extent_ref,
+ offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref,
+ num_refs, 32);
+/* struct btrfs_extent_item */
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item,
refs, 32);
@@ -1474,8 +1481,7 @@ static inline struct dentry *fdentry(struct file *file) {
}
/* extent-tree.c */
-int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path,
- u64 start, u64 len);
+int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_update_pinned_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int pin);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
@@ -1495,10 +1501,9 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
int data, int owner);
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u32 blocksize,
+ u32 blocksize, u64 parent,
u64 root_objectid,
u64 ref_generation,
- u64 first_objectid,
int level,
u64 hint,
u64 empty_size);
@@ -1508,23 +1513,24 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size);
int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_path *path, u64 bytenr,
+ struct btrfs_path *path,
+ u64 bytenr, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset);
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 num_bytes, u64 min_bytes,
+ u64 num_bytes, u64 parent, u64 min_bytes,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins, u64 data);
int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_root *root, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
struct btrfs_key *ins);
int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+ struct btrfs_root *root, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
struct btrfs_key *ins);
@@ -1535,9 +1541,16 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
u64 search_end, struct btrfs_key *ins,
u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int cache_ref);
-int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, u64 bytenr, u64 num_bytes,
+ struct extent_buffer *orig_buf, struct extent_buffer *buf,
+ u32 *nr_extents);
+int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct extent_buffer *buf, u32 nr_extents);
+int btrfs_update_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *orig_buf,
+ struct extent_buffer *buf, int start_slot, int nr);
+int btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 ref_generation,
u64 owner_objectid, u64 owner_offset, int pin);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
@@ -1545,10 +1558,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_io_tree *unpin);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes,
- u64 root_objectid, u64 ref_generation,
- u64 owner, u64 owner_offset);
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset);
+int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 orig_parent, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
@@ -1561,7 +1579,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
-
+int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 25be96946a2f..d35ca6a3f513 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -882,8 +882,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
root->ref_cows = 0;
root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
- BTRFS_TREE_LOG_OBJECTID,
- 0, 0, 0, 0, 0);
+ 0, BTRFS_TREE_LOG_OBJECTID,
+ trans->transid, 0, 0, 0);
btrfs_set_header_nritems(root->node, 0);
btrfs_set_header_level(root->node, 0);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 558fbe407368..5258923d621f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -29,6 +29,21 @@
#include "locking.h"
#include "ref-cache.h"
+#define PENDING_EXTENT_INSERT 0
+#define PENDING_EXTENT_DELETE 1
+#define PENDING_BACKREF_UPDATE 2
+
+struct pending_extent_op {
+ int type;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 parent;
+ u64 orig_parent;
+ u64 generation;
+ u64 orig_generation;
+ int level;
+};
+
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
btrfs_root *extent_root);
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
@@ -487,48 +502,15 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
return ret;
}
-static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
- u64 owner, u64 owner_offset)
-{
- u32 high_crc = ~(u32)0;
- u32 low_crc = ~(u32)0;
- __le64 lenum;
- lenum = cpu_to_le64(root_objectid);
- high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
- lenum = cpu_to_le64(ref_generation);
- low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
- if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
- lenum = cpu_to_le64(owner);
- low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
- lenum = cpu_to_le64(owner_offset);
- low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
- }
- return ((u64)high_crc << 32) | (u64)low_crc;
-}
-
-static int match_extent_ref(struct extent_buffer *leaf,
- struct btrfs_extent_ref *disk_ref,
- struct btrfs_extent_ref *cpu_ref)
-{
- int ret;
- int len;
-
- if (cpu_ref->objectid)
- len = sizeof(*cpu_ref);
- else
- len = 2 * sizeof(u64);
- ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
- len);
- return ret == 0;
-}
-
/* simple helper to search for an existing extent at a given offset */
-int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path,
- u64 start, u64 len)
+int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
{
int ret;
struct btrfs_key key;
+ struct btrfs_path *path;
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
maybe_lock_mutex(root);
key.objectid = start;
key.offset = len;
@@ -536,72 +518,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path,
ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
0, 0);
maybe_unlock_mutex(root);
- return ret;
-}
-
-static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 bytenr,
- u64 root_objectid,
- u64 ref_generation, u64 owner,
- u64 owner_offset, int del)
-{
- u64 hash;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct btrfs_extent_ref ref;
- struct extent_buffer *leaf;
- struct btrfs_extent_ref *disk_ref;
- int ret;
- int ret2;
-
- btrfs_set_stack_ref_root(&ref, root_objectid);
- btrfs_set_stack_ref_generation(&ref, ref_generation);
- btrfs_set_stack_ref_objectid(&ref, owner);
- btrfs_set_stack_ref_offset(&ref, owner_offset);
-
- hash = hash_extent_ref(root_objectid, ref_generation, owner,
- owner_offset);
- key.offset = hash;
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_REF_KEY;
-
- while (1) {
- ret = btrfs_search_slot(trans, root, &key, path,
- del ? -1 : 0, del);
- if (ret < 0)
- goto out;
- leaf = path->nodes[0];
- if (ret != 0) {
- u32 nritems = btrfs_header_nritems(leaf);
- if (path->slots[0] >= nritems) {
- ret2 = btrfs_next_leaf(root, path);
- if (ret2)
- goto out;
- leaf = path->nodes[0];
- }
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != bytenr ||
- found_key.type != BTRFS_EXTENT_REF_KEY)
- goto out;
- key.offset = found_key.offset;
- if (del) {
- btrfs_release_path(root, path);
- continue;
- }
- }
- disk_ref = btrfs_item_ptr(path->nodes[0],
- path->slots[0],
- struct btrfs_extent_ref);
- if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
- ret = 0;
- goto out;
- }
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- key.offset = found_key.offset + 1;
- btrfs_release_path(root, path);
- }
-out:
+ btrfs_free_path(path);
return ret;
}
@@ -622,7 +539,7 @@ out:
* File extents can be referenced by:
*
* - multiple snapshots, subvolumes, or different generations in one subvol
- * - different files inside a single subvolume (in theory, not implemented yet)
+ * - different files inside a single subvolume
* - different offsets inside a file (bookend extents in file.c)
*
* The extent ref structure has fields for:
@@ -631,119 +548,284 @@ out:
* - Generation number of the tree holding the reference
* - objectid of the file holding the reference
* - offset in the file corresponding to the key holding the reference
+ * - number of references holding by parent node (alway 1 for tree blocks)
+ *
+ * Btree leaf may hold multiple references to a file extent. In most cases,
+ * these references are from same file and the corresponding offsets inside
+ * the file are close together. So inode objectid and offset in file are
+ * just hints, they provide hints about where in the btree the references
+ * can be found and when we can stop searching.
*
* When a file extent is allocated the fields are filled in:
- * (root_key.objectid, trans->transid, inode objectid, offset in file)
+ * (root_key.objectid, trans->transid, inode objectid, offset in file, 1)
*
* When a leaf is cow'd new references are added for every file extent found
- * in the leaf. It looks the same as the create case, but trans->transid
- * will be different when the block is cow'd.
+ * in the leaf. It looks similar to the create case, but trans->transid will
+ * be different when the block is cow'd.
*
- * (root_key.objectid, trans->transid, inode objectid, offset in file)
+ * (root_key.objectid, trans->transid, inode objectid, offset in file,
+ * number of references in the leaf)
*
- * When a file extent is removed either during snapshot deletion or file
- * truncation, the corresponding back reference is found
- * by searching for:
+ * Because inode objectid and offset in file are just hints, they are not
+ * used when backrefs are deleted. When a file extent is removed either
+ * during snapshot deletion or file truncation, we find the corresponding
+ * back back reference and check the following fields.
*
- * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
- * inode objectid, offset in file)
+ * (btrfs_header_owner(leaf), btrfs_header_generation(leaf))
*
* Btree extents can be referenced by:
*
* - Different subvolumes
* - Different generations of the same subvolume
*
- * Storing sufficient information for a full reverse mapping of a btree
- * block would require storing the lowest key of the block in the backref,
- * and it would require updating that lowest key either before write out or
- * every time it changed. Instead, the objectid of the lowest key is stored
- * along with the level of the tree block. This provides a hint
- * about where in the btree the block can be found. Searches through the
- * btree only need to look for a pointer to that block, so they stop one
- * level higher than the level recorded in the backref.
- *
- * Some btrees do not do reference counting on their extents. These
- * include the extent tree and the tree of tree roots. Backrefs for these
- * trees always have a generation of zero.
- *
* When a tree block is created, back references are inserted:
*
- * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
+ * (root->root_key.objectid, trans->transid, level, 0, 1)
*
- * When a tree block is cow'd in a reference counted root,
- * new back references are added for all the blocks it points to.
- * These are of the form (trans->transid will have increased since creation):
+ * When a tree block is cow'd, new back references are added for all the
+ * blocks it points to. If the tree block isn't in reference counted root,
+ * the old back references are removed. These new back references are of
+ * the form (trans->transid will have increased since creation):
*
- * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
+ * (root->root_key.objectid, trans->transid, level, 0, 1)
*
- * Because the lowest_key_objectid and the level are just hints
- * they are not used when backrefs are deleted. When a backref is deleted:
+ * When a backref is in deleting, the following fields are checked:
*
* if backref was for a tree root:
- * root_objectid = root->root_key.objectid
+ * (btrfs_header_owner(itself), btrfs_header_generation(itself))
* else
- * root_objectid = btrfs_header_owner(parent)
+ * (btrfs_header_owner(parent), btrfs_header_generation(parent))
*
- * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
+ * Back Reference Key composing:
*
- * Back Reference Key hashing:
- *
- * Back references have four fields, each 64 bits long. Unfortunately,
- * This is hashed into a single 64 bit number and placed into the key offset.
- * The key objectid corresponds to the first byte in the extent, and the
- * key type is set to BTRFS_EXTENT_REF_KEY
+ * The key objectid corresponds to the first byte in the extent, the key
+ * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
+ * byte of parent extent. If a extent is tree root, the key offset is se