summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2015-12-18 11:11:10 -0800
committerChris Mason <clm@fb.com>2015-12-18 11:11:10 -0800
commitf7d3d2f99eeaa9f5c111965b1516972f4fc5e449 (patch)
tree2ad3d4d98af789fd807795a9683ce4904b3c4182 /fs/btrfs
parent9f9499ae8e6415cefc4fe0a96ad0e27864353c89 (diff)
parent70f6d82ec73c3ae2d3adc6853c5bebcd73610097 (diff)
Merge branch 'freespace-tree' into for-linus-4.5
Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile5
-rw-r--r--fs/btrfs/ctree.h157
-rw-r--r--fs/btrfs/disk-io.c38
-rw-r--r--fs/btrfs/extent-tree.c101
-rw-r--r--fs/btrfs/extent_io.c183
-rw-r--r--fs/btrfs/extent_io.h10
-rw-r--r--fs/btrfs/free-space-tree.c1584
-rw-r--r--fs/btrfs/free-space-tree.h72
-rw-r--r--fs/btrfs/super.c56
-rw-r--r--fs/btrfs/tests/btrfs-tests.c58
-rw-r--r--fs/btrfs/tests/btrfs-tests.h10
-rw-r--r--fs/btrfs/tests/extent-io-tests.c138
-rw-r--r--fs/btrfs/tests/free-space-tests.c41
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c571
-rw-r--r--fs/btrfs/tests/qgroup-tests.c20
15 files changed, 2932 insertions, 112 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 6d1d0b93b1aa..128ce17a80b0 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,11 +9,12 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o props.o hash.o
+ uuid-tree.o props.o hash.o free-space-tree.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
- tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o
+ tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
+ tests/free-space-tree-tests.o
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 35489e7129a7..cf87979a153e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -96,6 +96,9 @@ struct btrfs_ordered_sum;
/* for storing items that use the BTRFS_UUID_KEY* types */
#define BTRFS_UUID_TREE_OBJECTID 9ULL
+/* tracks free space in block groups. */
+#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
@@ -500,6 +503,8 @@ struct btrfs_super_block {
* Compat flags that we support. If any incompat flags are set other than the
* ones specified below then we will fail to mount
*/
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
+
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
@@ -526,7 +531,10 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
-#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
+
+#define BTRFS_FEATURE_COMPAT_RO_SUPP \
+ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
@@ -1088,6 +1096,13 @@ struct btrfs_block_group_item {
__le64 flags;
} __attribute__ ((__packed__));
+struct btrfs_free_space_info {
+ __le32 extent_count;
+ __le32 flags;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+
#define BTRFS_QGROUP_LEVEL_SHIFT 48
static inline u64 btrfs_qgroup_level(u64 qgroupid)
{
@@ -1296,6 +1311,9 @@ struct btrfs_caching_control {
atomic_t count;
};
+/* Once caching_thread() finds this much free space, it will wake up waiters. */
+#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2)
+
struct btrfs_io_ctl {
void *cur, *orig;
struct page *page;
@@ -1321,8 +1339,20 @@ struct btrfs_block_group_cache {
u64 delalloc_bytes;
u64 bytes_super;
u64 flags;
- u64 sectorsize;
u64 cache_generation;
+ u32 sectorsize;
+
+ /*
+ * If the free space extent count exceeds this number, convert the block
+ * group to bitmaps.
+ */
+ u32 bitmap_high_thresh;
+
+ /*
+ * If the free space extent count drops below this number, convert the
+ * block group back to extents.
+ */
+ u32 bitmap_low_thresh;
/*
* It is just used for the delayed data space allocation because
@@ -1378,6 +1408,15 @@ struct btrfs_block_group_cache {
struct list_head io_list;
struct btrfs_io_ctl io_ctl;
+
+ /* Lock for free space tree operations. */
+ struct mutex free_space_lock;
+
+ /*
+ * Does the block group need to be added to the free space tree?
+ * Protected by free_space_lock.
+ */
+ int needs_free_space;
};
/* delayed seq elem */
@@ -1429,6 +1468,7 @@ struct btrfs_fs_info {
struct btrfs_root *csum_root;
struct btrfs_root *quota_root;
struct btrfs_root *uuid_root;
+ struct btrfs_root *free_space_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
@@ -2092,6 +2132,27 @@ struct btrfs_ioctl_defrag_range_args {
*/
#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
+/*
+ * Every block group is represented in the free space tree by a free space info
+ * item, which stores some accounting information. It is keyed on
+ * (block_group_start, FREE_SPACE_INFO, block_group_length).
+ */
+#define BTRFS_FREE_SPACE_INFO_KEY 198
+
+/*
+ * A free space extent tracks an extent of space that is free in a block group.
+ * It is keyed on (start, FREE_SPACE_EXTENT, length).
+ */
+#define BTRFS_FREE_SPACE_EXTENT_KEY 199
+
+/*
+ * When a block group becomes very fragmented, we convert it to use bitmaps
+ * instead of extents. A free space bitmap is keyed on
+ * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
+ * (length / sectorsize) bits.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_KEY 200
+
#define BTRFS_DEV_EXTENT_KEY 204
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
@@ -2184,6 +2245,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
+#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (8192)
@@ -2506,6 +2568,11 @@ BTRFS_SETGET_FUNCS(disk_block_group_flags,
BTRFS_SETGET_STACK_FUNCS(block_group_flags,
struct btrfs_block_group_item, flags, 64);
+/* struct btrfs_free_space_info */
+BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
+ extent_count, 32);
+BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
+
/* struct btrfs_inode_ref */
BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
@@ -3573,6 +3640,9 @@ void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const u64 type);
+u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_fs_info *info, u64 start, u64 end);
+
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -3737,6 +3807,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->csum_root);
kfree(fs_info->quota_root);
kfree(fs_info->uuid_root);
+ kfree(fs_info->free_space_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
security_free_mnt_opts(&fs_info->security_opts);
@@ -4247,6 +4318,30 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
}
}
+#define btrfs_clear_fs_incompat(__fs_info, opt) \
+ __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
+ u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ u64 features;
+
+ disk_super = fs_info->super_copy;
+ features = btrfs_super_incompat_flags(disk_super);
+ if (features & flag) {
+ spin_lock(&fs_info->super_lock);
+ features = btrfs_super_incompat_flags(disk_super);
+ if (features & flag) {
+ features &= ~flag;
+ btrfs_set_super_incompat_flags(disk_super, features);
+ btrfs_info(fs_info, "clearing %llu feature flag",
+ flag);
+ }
+ spin_unlock(&fs_info->super_lock);
+ }
+}
+
#define btrfs_fs_incompat(fs_info, opt) \
__btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
@@ -4257,6 +4352,64 @@ static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
return !!(btrfs_super_incompat_flags(disk_super) & flag);
}
+#define btrfs_set_fs_compat_ro(__fs_info, opt) \
+ __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
+ u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ u64 features;
+
+ disk_super = fs_info->super_copy;
+ features = btrfs_super_compat_ro_flags(disk_super);
+ if (!(features & flag)) {
+ spin_lock(&fs_info->super_lock);
+ features = btrfs_super_compat_ro_flags(disk_super);
+ if (!(features & flag)) {
+ features |= flag;
+ btrfs_set_super_compat_ro_flags(disk_super, features);
+ btrfs_info(fs_info, "setting %llu ro feature flag",
+ flag);
+ }
+ spin_unlock(&fs_info->super_lock);
+ }
+}
+
+#define btrfs_clear_fs_compat_ro(__fs_info, opt) \
+ __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
+ u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ u64 features;
+
+ disk_super = fs_info->super_copy;
+ features = btrfs_super_compat_ro_flags(disk_super);
+ if (features & flag) {
+ spin_lock(&fs_info->super_lock);
+ features = btrfs_super_compat_ro_flags(disk_super);
+ if (features & flag) {
+ features &= ~flag;
+ btrfs_set_super_compat_ro_flags(disk_super, features);
+ btrfs_info(fs_info, "clearing %llu ro feature flag",
+ flag);
+ }
+ spin_unlock(&fs_info->super_lock);
+ }
+}
+
+#define btrfs_fs_compat_ro(fs_info, opt) \
+ __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ disk_super = fs_info->super_copy;
+ return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
+}
+
/*
* Call btrfs_abort_transaction as early as possible when an error condition is
* detected, that way the exact line number is reported.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 974be09e7556..52e623f59848 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -42,6 +42,7 @@
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
+#include "free-space-tree.h"
#include "inode-map.h"
#include "check-integrity.h"
#include "rcu-string.h"
@@ -1650,6 +1651,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
return fs_info->uuid_root ? fs_info->uuid_root :
ERR_PTR(-ENOENT);
+ if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+ return fs_info->free_space_root ? fs_info->free_space_root :
+ ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, location->objectid);
if (root) {
@@ -2148,6 +2152,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_root_extent_buffers(info->uuid_root);
if (chunk_root)
free_root_extent_buffers(info->chunk_root);
+ free_root_extent_buffers(info->free_space_root);
}
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2448,6 +2453,15 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
fs_info->uuid_root = root;
}
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ fs_info->free_space_root = root;
+ }
+
return 0;
}
@@ -3076,6 +3090,30 @@ retry_root_backup:
btrfs_qgroup_rescan_resume(fs_info);
+ if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+ btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ pr_info("BTRFS: clearing free space tree\n");
+ ret = btrfs_clear_free_space_tree(fs_info);
+ if (ret) {
+ pr_warn("BTRFS: failed to clear free space tree %d\n",
+ ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ }
+
+ if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+ !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ pr_info("BTRFS: creating free space tree\n");
+ ret = btrfs_create_free_space_tree(fs_info);
+ if (ret) {
+ pr_warn("BTRFS: failed to create free space tree %d\n",
+ ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ }
+
if (!fs_info->uuid_root) {
pr_info("BTRFS: creating UUID tree\n");
ret = btrfs_create_uuid_tree(fs_info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b89680a1923..8abb344e3dcb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,7 @@
#include "raid56.h"
#include "locking.h"
#include "free-space-cache.h"
+#include "free-space-tree.h"
#include "math.h"
#include "sysfs.h"
#include "qgroup.h"
@@ -357,8 +358,8 @@ static void fragment_free_space(struct btrfs_root *root,
* we need to check the pinned_extents for any extents that can't be used yet
* since their free space will be released as soon as the transaction commits.
*/
-static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_fs_info *info, u64 start, u64 end)
+u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_fs_info *info, u64 start, u64 end)
{
u64 extent_start, extent_end, size, total_added = 0;
int ret;
@@ -395,11 +396,10 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
return total_added;
}
-static noinline void caching_thread(struct btrfs_work *work)
+static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_fs_info *fs_info;
- struct btrfs_caching_control *caching_ctl;
struct btrfs_root *extent_root;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -407,17 +407,16 @@ static noinline void caching_thread(struct btrfs_work *work)
u64 total_found = 0;
u64 last = 0;
u32 nritems;
- int ret = -ENOMEM;
+ int ret;
bool wakeup = true;
- caching_ctl = container_of(work, struct btrfs_caching_control, work);
block_group = caching_ctl->block_group;
fs_info = block_group->fs_info;
extent_root = fs_info->extent_root;
path = btrfs_alloc_path();
if (!path)
- goto out;
+ return -ENOMEM;
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -443,15 +442,11 @@ static noinline void caching_thread(struct btrfs_work *work)
key.objectid = last;
key.offset = 0;
key.type = BTRFS_EXTENT_ITEM_KEY;
-again:
- mutex_lock(&caching_ctl->mutex);
- /* need to make sure the commit_root doesn't disappear */
- down_read(&fs_info->commit_root_sem);
next:
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
- goto err;
+ goto out;
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -477,12 +472,14 @@ next:
up_read(&fs_info->commit_root_sem);
mutex_unlock(&caching_ctl->mutex);
cond_resched();
- goto again;
+ mutex_lock(&caching_ctl->mutex);
+ down_read(&fs_info->commit_root_sem);
+ goto next;
}
ret = btrfs_next_leaf(extent_root, path);
if (ret < 0)
- goto err;
+ goto out;
if (ret)
break;
leaf = path->nodes[0];
@@ -521,7 +518,7 @@ next:
else
last = key.objectid + key.offset;
- if (total_found > (1024 * 1024 * 2)) {
+ if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
if (wakeup)
wake_up(&caching_ctl->wait);
@@ -534,9 +531,35 @@ next:
total_found += add_new_free_space(block_group, fs_info, last,
block_group->key.objectid +
block_group->key.offset);
+ caching_ctl->progress = (u64)-1;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static noinline void caching_thread(struct btrfs_work *work)
+{
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_caching_control *caching_ctl;
+ int ret;
+
+ caching_ctl = container_of(work, struct btrfs_caching_control, work);
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+
+ mutex_lock(&caching_ctl->mutex);
+ down_read(&fs_info->commit_root_sem);
+
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ ret = load_free_space_tree(caching_ctl);
+ else
+ ret = load_extent_tree_free(caching_ctl);
+
spin_lock(&block_group->lock);
block_group->caching_ctl = NULL;
- block_group->cached = BTRFS_CACHE_FINISHED;
+ block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
spin_unlock(&block_group->lock);
#ifdef CONFIG_BTRFS_DEBUG
@@ -555,20 +578,11 @@ next:
#endif
caching_ctl->progress = (u64)-1;
-err:
- btrfs_free_path(path);
- up_read(&fs_info->commit_root_sem);
-
- free_excluded_extents(extent_root, block_group);
+ up_read(&fs_info->commit_root_sem);
+ free_excluded_extents(fs_info->extent_root, block_group);
mutex_unlock(&caching_ctl->mutex);
-out:
- if (ret) {
- spin_lock(&block_group->lock);
- block_group->caching_ctl = NULL;
- block_group->cached = BTRFS_CACHE_ERROR;
- spin_unlock(&block_group->lock);
- }
+
wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl);
@@ -680,8 +694,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
}
} else {
/*
- * We are not going to do the fast caching, set cached to the
- * appropriate value and wakeup any waiters.
+ * We're either using the free space tree or no caching at all.
+ * Set cached to the appropriate value and wakeup any waiters.
*/
spin_lock(&cache->lock);
if (load_cache_only) {
@@ -6661,6 +6675,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
+ ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
+ num_bytes);
+ if (ret) {
+ btrfs_abort_transaction(trans, extent_root, ret);
+ goto out;
+ }
+
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
@@ -7672,6 +7693,11 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
+ ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+ ins->offset);
+ if (ret)
+ return ret;
+
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -7752,6 +7778,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
+ ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+ num_bytes);
+ if (ret)
+ return ret;
+
ret = update_block_group(trans, root, ins->objectid, root->nodesize,
1);
if (ret) { /* -ENOENT, logic error */
@@ -9656,6 +9687,8 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
cache->full_stripe_len = btrfs_full_stripe_len(root,
&root->fs_info->mapping_tree,
start);
+ set_free_space_tree_thresholds(cache);
+
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
init_rwsem(&cache->data_rwsem);
@@ -9667,6 +9700,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
INIT_LIST_HEAD(&cache->io_list);
btrfs_init_free_space_ctl(cache);
atomic_set(&cache->trimming, 0);
+ mutex_init(&cache->free_space_lock);
return cache;
}
@@ -9877,6 +9911,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
key.objectid, key.offset);
if (ret)
btrfs_abort_transaction(trans, extent_root, ret);
+ add_block_group_free_space(trans, root->fs_info, block_group);
+ /* already aborted the transaction if it failed. */
next:
list_del_init(&block_group->bg_list);
}
@@ -9907,6 +9943,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
+ cache->needs_free_space = 1;
ret = exclude_super_stripes(root, cache);
if (ret) {
/*
@@ -10277,6 +10314,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
unlock_chunks(root);
+ ret = remove_block_group_free_space(trans, root->fs_info, block_group);
+ if (ret)
+ goto out;
+
btrfs_put_block_group(block_group);
btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9abe18763a7f..2b3f26326565 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4797,24 +4797,14 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
return new;
}
-struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start)
+struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start, unsigned long len)
{
struct extent_buffer *eb;
- unsigned long len;
unsigned long num_pages;
unsigned long i;
- if (!fs_info) {
- /*
- * Called only from tests that don't always have a fs_info
- * available, but we know that nodesize is 4096
- */
- len = 4096;
- } else {
- len = fs_info->tree_root->nodesize;
- }
- num_pages = num_extent_pages(0, len);
+ num_pages = num_extent_pages(start, len);
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
@@ -4837,6 +4827,24 @@ err:
return NULL;
}
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start)
+{
+ unsigned long len;
+
+ if (!fs_info) {
+ /*
+ * Called only from tests that don't always have a fs_info
+ * available, but we know that nodesize is 4096
+ */
+ len = 4096;
+ } else {
+ len = fs_info->tree_root->nodesize;
+ }
+
+ return __alloc_dummy_extent_buffer(fs_info, start, len);
+}
+
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
@@ -5594,6 +5602,155 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
+/*
+ * The extent buffer bitmap operations are done with byte granularity because
+ * bitmap items are not guaranteed to be aligned to a word and therefore a
+ * single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+ ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+ (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+/*
+ * eb_bitmap_offset() - calculate the page and offset of the byte containing the
+ * given bit number
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @nr: bit number
+ * @page_index: return index of the page in the extent buffer that contains the
+ * given bit number
+ * @page_offset: return offset into the page given by page_index
+ *
+ * This helper hides the ugliness of finding the byte in an extent buffer which
+ * contains a given bit.
+ */
+static inline void eb_bitmap_offset(struct extent_buffer *eb,
+ unsigned long start, unsigned long nr,
+ unsigned long *page_index,
+ size_t *page_offset)
+{
+ size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ size_t byte_offset = BIT_BYTE(nr);
+ size_t offset;
+
+ /*
+ * The byte we want is the offset of the extent buffer + the offset of
+ * the bitmap item in the extent buffer + the offset of the byte in the
+ * bitmap item.
+ */
+ offset = start_offset + start + byte_offset;
+
+ *page_index = offset >> PAGE_CACHE_SHIFT;
+ *page_offset = offset & (PAGE_CACHE_SIZE - 1);
+}
+
+/**
+ * extent_buffer_test_bit - determine whether a bit in a bitmap item is set
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @nr: bit number to test
+ */
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long nr)
+{
+ char *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+
+ eb_bitmap_offset(eb, start, nr, &i, &offset);
+ page = eb->pages[i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+ return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
+}
+
+/**
+ * extent_buffer_bitmap_set - set an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to set
+ */
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len)
+{
+ char *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+ const unsigned int size = pos + len;
+ int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+ unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+
+ eb_bitmap_offset(eb, start, pos, &i, &offset);
+ page = eb->pages[i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+
+ while (len >= bits_to_set) {
+ kaddr[offset] |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_BYTE;
+ mask_to_set = ~0U;
+ if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+ offset = 0;
+ page = eb->pages[++i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+ }
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+ kaddr[offset] |= mask_to_set;
+ }
+}
+
+
+/**
+ * extent_buffer_bitmap_clear - clear an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to clear
+ */
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len)
+{
+ char *kaddr;
+ struct page *page;
+ unsigned long i;
+ size_t offset;
+ const unsigned int size = pos + len;
+ int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+ unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+
+ eb_bitmap_offset(eb, start, pos, &i, &offset);
+ page = eb->pages[i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+
+ while (len >= bits_to_clear) {
+ kaddr[offset] &= ~mask_to_clear;
+ len -= bits_to_clear;
+ bits_to_clear = BITS_PER_BYTE;
+ mask_to_clear = ~0U;
+ if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+ offset = 0;
+ page = eb->pages[++i];
+ WARN_ON(!PageUptodate(page));
+ kaddr = page_address(page);
+ }
+ }
+ if (len) {
+ mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+ kaddr[offset] &= ~mask_to_clear;
+ }
+}
+
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
{
unsigned long distance = (src > dst) ? src - dst : dst - src;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f4c1ae11855f..350c8b0a8582 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -282,8 +282,10 @@ void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
+struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start);
+ u64 start);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
@@ -328,6 +330,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos);
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len);
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len);
void clear_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_uptodate(struct extent_buffer *eb);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
new file mode 100644
index 000000000000..cbe36dd0d97b
--- /dev/null
+++ b/fs/btrfs/free-space-tree.c
@@ -0,0 +1,1584 @@
+/*
+ * Copyright (C) 2015 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "locking.h"
+#include "free-space-tree.h"
+#include "transaction.h"
+
+static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
+
+void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
+{
+ u32 bitmap_range;
+ size_t bitmap_size;
+ u64 num_bitmaps, total_bitmap_size;
+
+ /*
+ * We convert to bitmaps when the disk space required for using extents
+ * exceeds that required for using bitmaps.
+ */
+ bitmap_range = cache->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+ num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1,
+ bitmap_range);
+ bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
+ total_bitmap_size = num_bitmaps * bitmap_size;
+ cache->bitmap_high_thresh = div_u64(total_bitmap_size,
+ sizeof(struct btrfs_item));
+
+ /*
+ * We allow for a small buffer between the high threshold and low
+ * threshold to avoid thrashing back and forth between the two formats.
+ */
+ if (cache->bitmap_high_thresh > 100)
+ cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
+ else
+ cache->bitmap_low_thresh = 0;
+}
+
+static int add_new_free_space_info(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = fs_info->free_space_root;
+ struct btrfs_free_space_info *info;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = block_group->key.objectid;
+ key.type = BTRFS_FREE_SPACE_INFO_KEY;
+ key.offset = block_group->key.offset;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ info = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_info);
+ btrfs_set_free_space_extent_count(leaf, info, 0);