summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c1254
1 files changed, 801 insertions, 453 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a1febf155747..b15afeae16df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -51,6 +51,24 @@ enum {
CHUNK_ALLOC_FORCE = 2,
};
+/*
+ * Declare a helper function to detect underflow of various space info members
+ */
+#define DECLARE_SPACE_INFO_UPDATE(name) \
+static inline void update_##name(struct btrfs_space_info *sinfo, \
+ s64 bytes) \
+{ \
+ if (bytes < 0 && sinfo->name < -bytes) { \
+ WARN_ON(1); \
+ sinfo->name = 0; \
+ return; \
+ } \
+ sinfo->name += bytes; \
+}
+
+DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
+DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
+
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node, u64 parent,
u64 root_objectid, u64 owner_objectid,
@@ -1037,7 +1055,7 @@ out_free:
/*
* is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
- * is_data == BTRFS_REF_TYPE_DATA, data type is requried,
+ * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
* is_data == BTRFS_REF_TYPE_ANY, either type is OK.
*/
int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
@@ -2406,25 +2424,82 @@ static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_ref
btrfs_delayed_ref_unlock(head);
}
-static int cleanup_extent_op(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head)
+static struct btrfs_delayed_extent_op *cleanup_extent_op(
+ struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
- int ret;
if (!extent_op)
- return 0;
- head->extent_op = NULL;
+ return NULL;
+
if (head->must_insert_reserved) {
+ head->extent_op = NULL;
btrfs_free_delayed_extent_op(extent_op);
- return 0;
+ return NULL;
}
+ return extent_op;
+}
+
+static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *head)
+{
+ struct btrfs_delayed_extent_op *extent_op;
+ int ret;
+
+ extent_op = cleanup_extent_op(head);
+ if (!extent_op)
+ return 0;
+ head->extent_op = NULL;
spin_unlock(&head->lock);
ret = run_delayed_extent_op(trans, head, extent_op);
btrfs_free_delayed_extent_op(extent_op);
return ret ? ret : 1;
}
+static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *head)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_delayed_ref_root *delayed_refs =
+ &trans->transaction->delayed_refs;
+ int nr_items = 1; /* Dropping this ref head update. */
+
+ if (head->total_ref_mod < 0) {
+ struct btrfs_space_info *space_info;
+ u64 flags;
+
+ if (head->is_data)
+ flags = BTRFS_BLOCK_GROUP_DATA;
+ else if (head->is_system)
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
+ else
+ flags = BTRFS_BLOCK_GROUP_METADATA;
+ space_info = __find_space_info(fs_info, flags);
+ ASSERT(space_info);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned,
+ -head->num_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
+
+ /*
+ * We had csum deletions accounted for in our delayed refs rsv,
+ * we need to drop the csum leaves for this update from our
+ * delayed_refs_rsv.
+ */
+ if (head->is_data) {
+ spin_lock(&delayed_refs->lock);
+ delayed_refs->pending_csums -= head->num_bytes;
+ spin_unlock(&delayed_refs->lock);
+ nr_items += btrfs_csum_bytes_to_leaves(fs_info,
+ head->num_bytes);
+ }
+ }
+
+ /* Also free its reserved qgroup space */
+ btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
+ head->qgroup_reserved);
+ btrfs_delayed_refs_rsv_release(fs_info, nr_items);
+}
+
static int cleanup_ref_head(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head)
{
@@ -2435,7 +2510,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
- ret = cleanup_extent_op(trans, head);
+ ret = run_and_cleanup_extent_op(trans, head);
if (ret < 0) {
unselect_delayed_ref_head(delayed_refs, head);
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
@@ -2456,37 +2531,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
spin_unlock(&delayed_refs->lock);
return 1;
}
- delayed_refs->num_heads--;
- rb_erase_cached(&head->href_node, &delayed_refs->href_root);
- RB_CLEAR_NODE(&head->href_node);
+ btrfs_delete_ref_head(delayed_refs, head);
spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock);
- atomic_dec(&delayed_refs->num_entries);
-
- trace_run_delayed_ref_head(fs_info, head, 0);
-
- if (head->total_ref_mod < 0) {
- struct btrfs_space_info *space_info;
- u64 flags;
-
- if (head->is_data)
- flags = BTRFS_BLOCK_GROUP_DATA;
- else if (head->is_system)
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- else
- flags = BTRFS_BLOCK_GROUP_METADATA;
- space_info = __find_space_info(fs_info, flags);
- ASSERT(space_info);
- percpu_counter_add_batch(&space_info->total_bytes_pinned,
- -head->num_bytes,
- BTRFS_TOTAL_BYTES_PINNED_BATCH);
-
- if (head->is_data) {
- spin_lock(&delayed_refs->lock);
- delayed_refs->pending_csums -= head->num_bytes;
- spin_unlock(&delayed_refs->lock);
- }
- }
if (head->must_insert_reserved) {
btrfs_pin_extent(fs_info, head->bytenr,
@@ -2497,9 +2544,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
}
}
- /* Also free its reserved qgroup space */
- btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
- head->qgroup_reserved);
+ cleanup_ref_head_accounting(trans, head);
+
+ trace_run_delayed_ref_head(fs_info, head, 0);
btrfs_delayed_ref_unlock(head);
btrfs_put_delayed_ref_head(head);
return 0;
@@ -2792,40 +2839,28 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
return num_csums;
}
-int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans)
+bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_rsv *global_rsv;
- u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
- u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
- unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs;
- u64 num_bytes, num_dirty_bgs_bytes;
- int ret = 0;
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ bool ret = false;
+ u64 reserved;
- num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- num_heads = heads_to_leaves(fs_info, num_heads);
- if (num_heads > 1)
- num_bytes += (num_heads - 1) * fs_info->nodesize;
- num_bytes <<= 1;
- num_bytes += btrfs_csum_bytes_to_leaves(fs_info, csum_bytes) *
- fs_info->nodesize;
- num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(fs_info,
- num_dirty_bgs);
- global_rsv = &fs_info->global_block_rsv;
+ spin_lock(&global_rsv->lock);
+ reserved = global_rsv->reserved;
+ spin_unlock(&global_rsv->lock);
/*
- * If we can't allocate any more chunks lets make sure we have _lots_ of
- * wiggle room since running delayed refs can create more delayed refs.
+ * Since the global reserve is just kind of magic we don't really want
+ * to rely on it to save our bacon, so if our size is more than the
+ * delayed_refs_rsv and the global rsv then it's time to think about
+ * bailing.
*/
- if (global_rsv->space_info->full) {
- num_dirty_bgs_bytes <<= 1;
- num_bytes <<= 1;
- }
-
- spin_lock(&global_rsv->lock);
- if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
- ret = 1;
- spin_unlock(&global_rsv->lock);
+ spin_lock(&delayed_refs_rsv->lock);
+ reserved += delayed_refs_rsv->reserved;
+ if (delayed_refs_rsv->size >= reserved)
+ ret = true;
+ spin_unlock(&delayed_refs_rsv->lock);
return ret;
}
@@ -2844,7 +2879,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
if (val >= NSEC_PER_SEC / 2)
return 2;
- return btrfs_check_space_for_delayed_refs(trans);
+ return btrfs_check_space_for_delayed_refs(trans->fs_info);
}
struct async_delayed_refs {
@@ -3588,6 +3623,8 @@ again:
*/
mutex_lock(&trans->transaction->cache_write_mutex);
while (!list_empty(&dirty)) {
+ bool drop_reserve = true;
+
cache = list_first_entry(&dirty,
struct btrfs_block_group_cache,
dirty_list);
@@ -3660,6 +3697,7 @@ again:
list_add_tail(&cache->dirty_list,
&cur_trans->dirty_bgs);
btrfs_get_block_group(cache);
+ drop_reserve = false;
}
spin_unlock(&cur_trans->dirty_bgs_lock);
} else if (ret) {
@@ -3667,9 +3705,11 @@ again:
}
}
- /* if its not on the io list, we need to put the block group */
+ /* if it's not on the io list, we need to put the block group */
if (should_put)
btrfs_put_block_group(cache);
+ if (drop_reserve)
+ btrfs_delayed_refs_rsv_release(fs_info, 1);
if (ret)
break;
@@ -3818,6 +3858,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
/* if its not on the io list, we need to put the block group */
if (should_put)
btrfs_put_block_group(cache);
+ btrfs_delayed_refs_rsv_release(fs_info, 1);
spin_lock(&cur_trans->dirty_bgs_lock);
}
spin_unlock(&cur_trans->dirty_bgs_lock);
@@ -4256,7 +4297,7 @@ commit_trans:
data_sinfo->flags, bytes, 1);
return -ENOSPC;
}
- data_sinfo->bytes_may_use += bytes;
+ update_bytes_may_use(data_sinfo, bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
data_sinfo->flags, bytes, 1);
spin_unlock(&data_sinfo->lock);
@@ -4309,10 +4350,7 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
data_sinfo = fs_info->data_sinfo;
spin_lock(&data_sinfo->lock);
- if (WARN_ON(data_sinfo->bytes_may_use < len))
- data_sinfo->bytes_may_use = 0;
- else
- data_sinfo->bytes_may_use -= len;
+ update_bytes_may_use(data_sinfo, -len);
trace_btrfs_space_reservation(fs_info, "space_info",
data_sinfo->flags, len, 0);
spin_unlock(&data_sinfo->lock);
@@ -4637,7 +4675,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
/*
* If we have dup, raid1 or raid10 then only half of the free
- * space is actually useable. For raid56, the space info used
+ * space is actually usable. For raid56, the space info used
* doesn't include the parity drive, so we don't have to
* change the math
*/
@@ -4793,8 +4831,10 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
{
struct reserve_ticket *ticket = NULL;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
struct btrfs_trans_handle *trans;
- u64 bytes;
+ u64 bytes_needed;
+ u64 reclaim_bytes = 0;
trans = (struct btrfs_trans_handle *)current->journal_info;
if (trans)
@@ -4807,15 +4847,15 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
else if (!list_empty(&space_info->tickets))
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
- bytes = (ticket) ? ticket->bytes : 0;
+ bytes_needed = (ticket) ? ticket->bytes : 0;
spin_unlock(&space_info->lock);
- if (!bytes)
+ if (!bytes_needed)
return 0;
/* See if there is enough pinned space to make this reservation */
if (__percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes,
+ bytes_needed,
BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
goto commit;
@@ -4827,14 +4867,18 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
return -ENOSPC;
spin_lock(&delayed_rsv->lock);
- if (delayed_rsv->size > bytes)
- bytes = 0;
- else
- bytes -= delayed_rsv->size;
+ reclaim_bytes += delayed_rsv->reserved;
spin_unlock(&delayed_rsv->lock);
+ spin_lock(&delayed_refs_rsv->lock);
+ reclaim_bytes += delayed_refs_rsv->reserved;
+ spin_unlock(&delayed_refs_rsv->lock);
+ if (reclaim_bytes >= bytes_needed)
+ goto commit;
+ bytes_needed -= reclaim_bytes;
+
if (__percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes,
+ bytes_needed,
BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
return -ENOSPC;
}
@@ -4882,6 +4926,20 @@ static void flush_space(struct btrfs_fs_info *fs_info,
shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
state == FLUSH_DELALLOC_WAIT);
break;
+ case FLUSH_DELAYED_REFS_NR:
+ case FLUSH_DELAYED_REFS:
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ if (state == FLUSH_DELAYED_REFS_NR)
+ nr = calc_reclaim_items_nr(fs_info, num_bytes);
+ else
+ nr = 0;
+ btrfs_run_delayed_refs(trans, nr);
+ btrfs_end_transaction(trans);
+ break;
case ALLOC_CHUNK:
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
@@ -5108,7 +5166,7 @@ static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
list_del_init(&ticket->list);
if (ticket->bytes && ticket->bytes < orig_bytes) {
u64 num_bytes = orig_bytes - ticket->bytes;
- space_info->bytes_may_use -= num_bytes;
+ update_bytes_may_use(space_info, -num_bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
}
@@ -5154,13 +5212,13 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
* If not things get more complicated.
*/
if (used + orig_bytes <= space_info->total_bytes) {
- space_info->bytes_may_use += orig_bytes;
+ update_bytes_may_use(space_info, orig_bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, orig_bytes, 1);
ret = 0;
} else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
system_chunk)) {
- space_info->bytes_may_use += orig_bytes;
+ update_bytes_may_use(space_info, orig_bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, orig_bytes, 1);
ret = 0;
@@ -5223,7 +5281,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
if (ticket.bytes) {
if (ticket.bytes < orig_bytes) {
u64 num_bytes = orig_bytes - ticket.bytes;
- space_info->bytes_may_use -= num_bytes;
+ update_bytes_may_use(space_info, -num_bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags,
num_bytes, 0);
@@ -5244,7 +5302,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
* @orig_bytes - the number of bytes we want
* @flush - whether or not we can flush to make our reservation
*
- * This will reserve orgi_bytes number of bytes from the space info associated
+ * This will reserve orig_bytes number of bytes from the space info associated
* with the block_rsv. If there is not enough space it will make an attempt to
* flush out space to make room. It will do this by flushing delalloc if
* possible or committing the transaction. If flush is 0 then no attempts to
@@ -5354,6 +5412,90 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
return 0;
}
+/**
+ * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
+ * @fs_info - the fs info for our fs.
+ * @src - the source block rsv to transfer from.
+ * @num_bytes - the number of bytes to transfer.
+ *
+ * This transfers up to the num_bytes amount from the src rsv to the
+ * delayed_refs_rsv. Any extra bytes are returned to the space info.
+ */
+void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *src,
+ u64 num_bytes)
+{
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ u64 to_free = 0;
+
+ spin_lock(&src->lock);
+ src->reserved -= num_bytes;
+ src->size -= num_bytes;
+ spin_unlock(&src->lock);
+
+ spin_lock(&delayed_refs_rsv->lock);
+ if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
+ u64 delta = delayed_refs_rsv->size -
+ delayed_refs_rsv->reserved;
+ if (num_bytes > delta) {
+ to_free = num_bytes - delta;
+ num_bytes = delta;
+ }
+ } else {
+ to_free = num_bytes;
+ num_bytes = 0;
+ }
+
+ if (num_bytes)
+ delayed_refs_rsv->reserved += num_bytes;
+ if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
+ delayed_refs_rsv->full = 1;
+ spin_unlock(&delayed_refs_rsv->lock);
+
+ if (num_bytes)
+ trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+ 0, num_bytes, 1);
+ if (to_free)
+ space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
+ to_free);
+}
+
+/**
+ * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
+ * @fs_info - the fs_info for our fs.
+ * @flush - control how we can flush for this reservation.
+ *
+ * This will refill the delayed block_rsv up to 1 items size worth of space and
+ * will return -ENOSPC if we can't make the reservation.
+ */
+int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+ enum btrfs_reserve_flush_enum flush)
+{
+ struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
+ u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
+ u64 num_bytes = 0;
+ int ret = -ENOSPC;
+
+ spin_lock(&block_rsv->lock);
+ if (block_rsv->reserved < block_rsv->size) {
+ num_bytes = block_rsv->size - block_rsv->reserved;
+ num_bytes = min(num_bytes, limit);
+ }
+ spin_unlock(&block_rsv->lock);
+
+ if (!num_bytes)
+ return 0;
+
+ ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
+ num_bytes, flush);
+ if (ret)
+ return ret;
+ block_rsv_add_bytes(block_rsv, num_bytes, 0);
+ trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+ 0, num_bytes, 1);
+ return 0;
+}
+
/*
* This is for space we already have accounted in space_info->bytes_may_use, so
* basically when we're returning space from block_rsv's.
@@ -5407,7 +5549,7 @@ again:
flush = BTRFS_RESERVE_FLUSH_ALL;
goto again;
}
- space_info->bytes_may_use -= num_bytes;
+ update_bytes_may_use(space_info, -num_bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
spin_unlock(&space_info->lock);
@@ -5435,7 +5577,7 @@ again:
ticket->bytes, 1);
list_del_init(&ticket->list);
num_bytes -= ticket->bytes;
- space_info->bytes_may_use += ticket->bytes;
+ update_bytes_may_use(space_info, ticket->bytes);
ticket->bytes = 0;
space_info->tickets_id++;
wake_up(&ticket->wait);
@@ -5443,7 +5585,7 @@ again:
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags,
num_bytes, 1);
- space_info->bytes_may_use += num_bytes;
+ update_bytes_may_use(space_info, num_bytes);
ticket->bytes -= num_bytes;
num_bytes = 0;
}
@@ -5629,11 +5771,11 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
/**
* btrfs_inode_rsv_refill - refill the inode block rsv.
* @inode - the inode we are refilling.
- * @flush - the flusing restriction.
+ * @flush - the flushing restriction.
*
* Essentially the same as btrfs_block_rsv_refill, except it uses the
* block_rsv->size as the minimum size. We'll either refill the missing amount
- * or return if we already have enough space. This will also handle the resreve
+ * or return if we already have enough space. This will also handle the reserve
* tracepoint for the reserved amount.
*/
static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
@@ -5674,6 +5816,31 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
return ret;
}
+static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes, u64 *qgroup_to_release)
+{
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_block_rsv *target = delayed_rsv;
+
+ if (target->full || target == block_rsv)
+ target = global_rsv;
+
+ if (block_rsv->space_info != target->space_info)
+ target = NULL;
+
+ return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
+ qgroup_to_release);
+}
+
+void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
+ u64 num_bytes)
+{
+ __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
+}
+
/**
* btrfs_inode_rsv_release - release any excessive reservation.
* @inode - the inode we need to release from.
@@ -5688,7 +5855,6 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 released = 0;
u64 qgroup_to_release = 0;
@@ -5698,8 +5864,8 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
* are releasing 0 bytes, and then we'll just get the reservation over
* the size free'd.
*/
- released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
- &qgroup_to_release);
+ released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
+ &qgroup_to_release);
if (released > 0)
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), released, 0);
@@ -5710,16 +5876,26 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
qgroup_to_release);
}
-void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
+/**
+ * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
+ * @fs_info - the fs_info for our fs.
+ * @nr - the number of items to drop.
+ *
+ * This drops the delayed ref head's count from the delayed refs rsv and frees
+ * any excess reservation we had.
+ */
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
{
+ struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
+ u64 released = 0;
- if (global_rsv == block_rsv ||
- block_rsv->space_info != global_rsv->space_info)
- global_rsv = NULL;
- block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
+ released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
+ num_bytes, NULL);
+ if (released)
+ trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+ 0, released, 0);
}
static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -5750,14 +5926,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = min(num_bytes,
block_rsv->size - block_rsv->reserved);
block_rsv->reserved += num_bytes;
- sinfo->bytes_may_use += num_bytes;
+ update_bytes_may_use(sinfo, num_bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
sinfo->flags, num_bytes,
1);
}
} else if (block_rsv->reserved > block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
- sinfo->bytes_may_use -= num_bytes;
+ update_bytes_may_use(sinfo, -num_bytes);
trace_btrfs_space_reservation(fs_info, "space_info",
sinfo->flags, num_bytes, 0);
block_rsv->reserved = block_rsv->size;
@@ -5784,9 +5960,10 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
fs_info->delayed_block_rsv.space_info = space_info;
+ fs_info->delayed_refs_rsv.space_info = space_info;
- fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
+ fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
+ fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
if (fs_info->quota_root)
@@ -5806,8 +5983,34 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
WARN_ON(fs_info->delayed_block_rsv.size > 0);
WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
+ WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
+ WARN_ON(fs_info->delayed_refs_rsv.size > 0);
}
+/*
+ * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
+ * @trans - the trans that may have generated delayed refs
+ *
+ * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
+ * it'll calculate the additional size and add it to the delayed_refs_rsv.
+ */
+void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+ u64 num_bytes;
+
+ if (!trans->delayed_ref_updates)
+ return;
+
+ num_bytes = btrfs_calc_trans_metadata_size(fs_info,
+ trans->delayed_ref_updates);
+ spin_lock(&delayed_rsv->lock);
+ delayed_rsv->size += num_bytes;
+ delayed_rsv->full = 0;
+ spin_unlock(&delayed_rsv->lock);
+ trans->delayed_ref_updates = 0;
+}
/*
* To be called after all the new block groups attached to the transaction
@@ -6100,6 +6303,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
u64 old_val;
u64 byte_in_group;
int factor;
+ int ret = 0;
/* block accounting for super block */
spin_lock(&info->delalloc_root_lock);
@@ -6113,8 +6317,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
while (total) {
cache = btrfs_lookup_block_group(info, bytenr);
- if (!cache)
- return -ENOENT;
+ if (!cache) {
+ ret = -ENOENT;
+ break;
+ }
factor = btrfs_bg_type_to_factor(cache->flags);
/*
@@ -6151,7 +6357,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
old_val -= num_bytes;
btrfs_set_block_group_used(&cache->item, old_val);
cache->pinned += num_bytes;
- cache->space_info->bytes_pinned += num_bytes;
+ update_bytes_pinned(cache->space_info, num_bytes);
cache->space_info->bytes_used -= num_bytes;
cache->space_info->disk_used -= num_bytes * factor;
spin_unlock(&cache->lock);
@@ -6173,6 +6379,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
list_add_tail(&cache->dirty_list,
&trans->transaction->dirty_bgs);
trans->transaction->num_dirty_bgs++;
+ trans->delayed_ref_updates++;
btrfs_get_block_group(cache);
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
@@ -6190,7 +6397,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
total -= num_bytes;
bytenr += num_bytes;
}
- return 0;
+
+ /* Modified block groups are accounted for in the delayed_refs_rsv. */
+ btrfs_update_delayed_refs_rsv(trans);
+ return ret;
}
static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
@@ -6222,7 +6432,7 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info,
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += num_bytes;
- cache->space_info->bytes_pinned += num_bytes;
+ update_bytes_pinned(cache->space_info, num_bytes);
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
@@ -6431,7 +6641,7 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
} else {
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
- space_info->bytes_may_use -= ram_bytes;
+ update_bytes_may_use(space_info, -ram_bytes);
if (delalloc)
cache->delalloc_bytes += num_bytes;
}
@@ -6587,7 +6797,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
cache->pinned -= len;
- space_info->bytes_pinned -= len;
+ update_bytes_pinned(space_info, -len);
trace_btrfs_space_reservation(fs_info, "pinned",
space_info->flags, len, 0);
@@ -6608,7 +6818,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
to_add = min(len, global_rsv->size -
global_rsv->reserved);
global_rsv->reserved += to_add;
- space_info->bytes_may_use += to_add;
+ update_bytes_may_use(space_info, to_add);
if (global_rsv->reserved >= global_rsv->size)
global_rsv->full = 1;
trace_btrfs_space_reservation(fs_info,
@@ -6647,9 +6857,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
unpin = &fs_info->freed_extents[0];
while (!trans->aborted) {
+ struct extent_state *cached_state = NULL;
+
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, NULL);
+ EXTENT_DIRTY, &cached_state);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
@@ -6659,9 +6871,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
ret = btrfs_discard_extent(fs_info, start,
end + 1 - start, NULL);
- clear_extent_dirty(unpin, start, end);
+ clear_extent_dirty(unpin, start, end, &cached_state);
unpin_extent_range(fs_info, start, end, true);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
+ free_extent_state(cached_state);
cond_resched();
}
@@ -6955,12 +7168,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
goto out;
- if (head->extent_op) {
- if (!head->must_insert_reserved)
- goto out;
- btrfs_free_delayed_extent_op(head->extent_op);
- head->extent_op = NULL;
- }
+ if (cleanup_extent_op(head) != NULL)
+ goto out;
/*
* waiting for the lock here would deadlock. If someone else has it
@@ -6969,22 +7178,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
if (!mutex_trylock(&head->mutex))
goto out;
- /*
- * at this point we have a head with no other entries. Go
- * ahead and process it.
- */
- rb_erase_cached(&head->href_node, &delayed_refs->href_root);
- RB_CLEAR_NODE(&head->href_node);
- atomic_dec(&delayed_refs->num_entries);
-
- /*
- * we don't take a ref on the node because we're removing it from the
- * tree, so we just steal the ref the tree was holding.
- */
- delayed_refs->num_heads--;
- if (head->processing == 0)
- delayed_refs->num_heads_ready--;
+ btrfs_delete_ref_head(delayed_refs, head);
head->processing = 0;
+
spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock);
@@ -6992,6 +7188,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
if (head->must_insert_reserved)
ret = 1;
+ cleanup_ref_head_accounting(trans, head);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head);
return ret;
@@ -7239,6 +7436,345 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
}
/*
+ * Structure used internally for find_free_extent() function. Wraps needed
+ * parameters.
+ */
+struct find_free_extent_ctl {
+ /* Basic allocation info */
+ u64 ram_bytes;
+ u64 num_bytes;
+ u64 empty_size;
+ u64 flags;
+ int delalloc;
+
+ /* Where to start the search inside the bg */
+ u64 search_start;
+
+ /* For clustered allocation */
+ u64 empty_cluster;
+
+ bool have_caching_bg;
+ bool orig_have_caching_bg;
+
+ /* RAID index, converted from flags */
+ int index;
+
+ /*
+ * Current loop number, check find_free_extent_update_loop() for details
+ */
+ int loop;
+
+ /*
+ * Whether we're refilling a cluster, if true we need to re-search
+ * current block group but don't try to refill the cluster again.
+ */
+ bool retry_clustered;
+
+ /*
+ * Whether we're updating free space cache, if true we need to re-search
+ * current block group but don't try updating free space cache again.
+ */
+ bool retry_unclustered;
+
+ /* If current block group is cached */
+ int cached;
+
+ /* Max contiguous hole found */
+ u64 max_extent_size;
+
+ /* Total free space from free space cache, not always contiguous */
+ u64 total_free_space;
+
+ /* Found result */
+ u64 found_offset;
+};
+
+
+/*
+ * Helper function for find_free_extent().
+ *
+ * Return -ENOENT to inform caller that we need fallback to unclustered mode.
+ * Return -EAGAIN to inform caller that we need to re-search this block group
+ * Return >0 to inform caller that we find nothing
+ * Return 0 means we have found a location and set ffe_ctl->found_offset.
+ */
+static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
+ struct btrfs_free_cluster *last_ptr,
+ struct find_free_extent_ctl *ffe_ctl,
+ struct btrfs_block_group_cache **cluster_bg_ret)
+{
+ struct btrfs_fs_info *fs_info = bg->fs_info;
+ struct btrfs_block_group_cache *cluster_bg;
+ u64 aligned_cluster;
+ u64 offset;
+ int ret;
+
+ cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
+ if (!cluster_bg)
+ goto refill_cluster;
+ if (cluster_bg != bg && (cluster_bg->ro ||
+ !block_group_bits(cluster_bg, ffe_ctl->flags)))
+ goto release_cluster;
+
+ offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
+ ffe_ctl->num_bytes, cluster_bg->key.objectid,
+ &ffe_ctl->max_extent_size);
+ if (offset) {
+ /* We have a block, we're done */
+ spin_unlock(&last_ptr->refill_lock);
+ trace_btrfs_reserve_extent_cluster(cluster_bg,
+ ffe_ctl->search_start, ffe_ctl->num_bytes);
+ *cluster_bg_ret = cluster_bg;
+ ffe_ctl->found_offset = offset;
+ return 0;
+ }
+ WARN_ON(last_ptr->block_group != cluster_bg);
+
+release_cluster:
+ /*
+ * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so
+ * lets just skip it and let the allocator find whatever block it can
+ * find. If we reach this point, we will have tried the cluster
+ * allocator plenty of times and not have found anything, so we are
+ * likely way too fragmented for the clustering stuff to find anything.
+ *
+ * However, if the cluster is taken from the current block group,
+ * release the cluster first, so that we stand a better chance of
+ * succeeding in the unclustered allocation.
+ */
+ if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
+ spin_unlock(&last_ptr->refill_lock);
+ btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
+ return -ENOENT;
+ }
+
+ /* This cluster didn't work out, free it and start over */
+ btrfs_return_cluster_to_free_space(NULL, last_ptr);
+
+ if (cluster_bg != bg)
+ btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
+
+refill_cluster:
+ if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
+ spin_unlock(&last_ptr->refill_lock);
+ return -ENOENT;
+ }
+
+ aligned_cluster = max_t(u64,
+ ffe_ctl->empty_cluster + ffe_ctl->empty_size,
+ bg->full_stripe_len);
+ ret = btrfs_find_space_cluster(fs_info, bg, last_ptr,
+ ffe_ctl->search_start, ffe_ctl->num_bytes,
+ aligned_cluster);
+ if (ret == 0) {
+ /* Now pull our allocation out of this cluster */
+ offset = btrfs_alloc_from_cluster(bg, last_ptr,
+ ffe_ctl->num_bytes, ffe_ctl->search_start,
+ &ffe