summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-19 14:36:00 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-19 14:36:00 -0800
commit2b9fb532d4168e8974fe49709e2c4c8d5352a64c (patch)
tree610cbe2d1bb32e28db135a767f158ade31452e2e /fs/btrfs
parent4533f6e27a366ecc3da4876074ebfe0cc0ea4f0f (diff)
parenta742994aa2e271eb8cd8e043d276515ec858ed73 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This pull is mostly cleanups and fixes: - The raid5/6 cleanups from Zhao Lei fixup some long standing warts in the code and add improvements on top of the scrubbing support from 3.19. - Josef has round one of our ENOSPC fixes coming from large btrfs clusters here at FB. - Dave Sterba continues a long series of cleanups (thanks Dave), and Filipe continues hammering on corner cases in fsync and others This all was held up a little trying to track down a use-after-free in btrfs raid5/6. It's not clear yet if this is just made easier to trigger with this pull or if its a new bug from the raid5/6 cleanups. Dave Sterba is the only one to trigger it so far, but he has a consistent way to reproduce, so we'll get it nailed shortly" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits) Btrfs: don't remove extents and xattrs when logging new names Btrfs: fix fsync data loss after adding hard link to inode Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group Btrfs: account for large extents with enospc Btrfs: don't set and clear delalloc for O_DIRECT writes Btrfs: only adjust outstanding_extents when we do a short write btrfs: Fix out-of-space bug Btrfs: scrub, fix sleep in atomic context Btrfs: fix scheduler warning when syncing log Btrfs: Remove unnecessary placeholder in btrfs_err_code btrfs: cleanup init for list in free-space-cache btrfs: delete chunk allocation attemp when setting block group ro btrfs: clear bio reference after submit_one_bio() Btrfs: fix scrub race leading to use-after-free Btrfs: add missing cleanup on sysfs init failure Btrfs: fix race between transaction commit and empty block group removal btrfs: add more checks to btrfs_read_sys_array btrfs: cleanup, rename a few variables in btrfs_read_sys_array btrfs: add checks for sys_chunk_array sizes btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c28
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c55
-rw-r--r--fs/btrfs/ctree.h39
-rw-r--r--fs/btrfs/delayed-inode.c38
-rw-r--r--fs/btrfs/dev-replace.c25
-rw-r--r--fs/btrfs/disk-io.c102
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/extent-tree.c250
-rw-r--r--fs/btrfs/extent_io.c87
-rw-r--r--fs/btrfs/extent_io.h65
-rw-r--r--fs/btrfs/free-space-cache.c13
-rw-r--r--fs/btrfs/inode-item.c9
-rw-r--r--fs/btrfs/inode.c156
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/raid56.c103
-rw-r--r--fs/btrfs/raid56.h11
-rw-r--r--fs/btrfs/reada.c19
-rw-r--r--fs/btrfs/relocation.c12
-rw-r--r--fs/btrfs/scrub.c309
-rw-r--r--fs/btrfs/send.c9
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/sysfs.c10
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c2
-rw-r--r--fs/btrfs/tests/extent-io-tests.c3
-rw-r--r--fs/btrfs/tests/inode-tests.c4
-rw-r--r--fs/btrfs/tests/qgroup-tests.c23
-rw-r--r--fs/btrfs/transaction.c27
-rw-r--r--fs/btrfs/transaction.h7
-rw-r--r--fs/btrfs/tree-log.c234
-rw-r--r--fs/btrfs/volumes.c242
-rw-r--r--fs/btrfs/volumes.h18
33 files changed, 1062 insertions, 859 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8729cf68d2fe..f55721ff9385 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1246,25 +1246,6 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
return ret;
}
-/*
- * this makes the path point to (inum INODE_ITEM ioff)
- */
-int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
- struct btrfs_path *path)
-{
- struct btrfs_key key;
- return btrfs_find_item(fs_root, path, inum, ioff,
- BTRFS_INODE_ITEM_KEY, &key);
-}
-
-static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
- struct btrfs_path *path,
- struct btrfs_key *found_key)
-{
- return btrfs_find_item(fs_root, path, inum, ioff,
- BTRFS_INODE_REF_KEY, found_key);
-}
-
int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
u64 start_off, struct btrfs_path *path,
struct btrfs_inode_extref **ret_extref,
@@ -1374,7 +1355,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
- ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
+ ret = btrfs_find_item(fs_root, path, parent, 0,
+ BTRFS_INODE_REF_KEY, &found_key);
if (ret > 0)
ret = -ENOENT;
if (ret)
@@ -1727,8 +1709,10 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_key found_key;
while (!ret) {
- ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
- &found_key);
+ ret = btrfs_find_item(fs_root, path, inum,
+ parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY,
+ &found_key);
+
if (ret < 0)
break;
if (ret) {
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 2a1ac6bfc724..9c41fbac3009 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -32,9 +32,6 @@ struct inode_fs_paths {
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
void *ctx);
-int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
- struct btrfs_path *path);
-
int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
struct btrfs_path *path, struct btrfs_key *found_key,
u64 *flags);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4aadadcfab20..de5e4f2adfea 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -185,6 +185,9 @@ struct btrfs_inode {
struct btrfs_delayed_node *delayed_node;
+ /* File creation time. */
+ struct timespec i_otime;
+
struct inode vfs_inode;
};
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 14a72ed14ef7..993642199326 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -213,11 +213,19 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
*/
static void add_root_to_dirty_list(struct btrfs_root *root)
{
+ if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
+ !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
+ return;
+
spin_lock(&root->fs_info->trans_lock);
- if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
- list_empty(&root->dirty_list)) {
- list_add(&root->dirty_list,
- &root->fs_info->dirty_cowonly_roots);
+ if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
+ /* Want the extent tree to be the last on the list */
+ if (root->objectid == BTRFS_EXTENT_TREE_OBJECTID)
+ list_move_tail(&root->dirty_list,
+ &root->fs_info->dirty_cowonly_roots);
+ else
+ list_move(&root->dirty_list,
+ &root->fs_info->dirty_cowonly_roots);
}
spin_unlock(&root->fs_info->trans_lock);
}
@@ -1363,8 +1371,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
BUG_ON(tm->slot != 0);
- eb_rewin = alloc_dummy_extent_buffer(eb->start,
- fs_info->tree_root->nodesize);
+ eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
if (!eb_rewin) {
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
@@ -1444,7 +1451,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
} else if (old_root) {
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
- eb = alloc_dummy_extent_buffer(logical, root->nodesize);
+ eb = alloc_dummy_extent_buffer(root->fs_info, logical);
} else {
btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
eb = btrfs_clone_extent_buffer(eb_root);
@@ -2282,7 +2289,7 @@ static void reada_for_search(struct btrfs_root *root,
if ((search <= target && target - search <= 65536) ||
(search > target && search - target <= 65536)) {
gen = btrfs_node_ptr_generation(node, nr);
- readahead_tree_block(root, search, blocksize);
+ readahead_tree_block(root, search);
nread += blocksize;
}
nscan++;
@@ -2301,7 +2308,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
u64 gen;
u64 block1 = 0;
u64 block2 = 0;
- int blocksize;
parent = path->nodes[level + 1];
if (!parent)
@@ -2309,7 +2315,6 @@ static noinline void reada_for_balance(struct btrfs_root *root,
nritems = btrfs_header_nritems(parent);
slot = path->slots[level + 1];
- blocksize = root->nodesize;
if (slot > 0) {
block1 = btrfs_node_blockptr(parent, slot - 1);
@@ -2334,9 +2339,9 @@ static noinline void reada_for_balance(struct btrfs_root *root,
}
if (block1)
- readahead_tree_block(root, block1, blocksize);
+ readahead_tree_block(root, block1);
if (block2)
- readahead_tree_block(root, block2, blocksize);
+ readahead_tree_block(root, block2);
}
@@ -2609,32 +2614,24 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
return 0;
}
-int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
u64 iobjectid, u64 ioff, u8 key_type,
struct btrfs_key *found_key)
{
int ret;
struct btrfs_key key;
struct extent_buffer *eb;
- struct btrfs_path *path;
+
+ ASSERT(path);
+ ASSERT(found_key);
key.type = key_type;
key.objectid = iobjectid;
key.offset = ioff;
- if (found_path == NULL) {
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- } else
- path = found_path;
-
ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
- if ((ret < 0) || (found_key == NULL)) {
- if (path != found_path)
- btrfs_free_path(path);
+ if (ret < 0)
return ret;
- }
eb = path->nodes[0];
if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
@@ -3383,7 +3380,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root);
extent_buffer_get(c);
path->nodes[level] = c;
- path->locks[level] = BTRFS_WRITE_LOCK;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->slots[level] = 0;
return 0;
}
@@ -4356,13 +4353,15 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
path->search_for_split = 1;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
path->search_for_split = 0;
+ if (ret > 0)
+ ret = -EAGAIN;
if (ret < 0)
goto err;
ret = -EAGAIN;
leaf = path->nodes[0];
- /* if our item isn't there or got smaller, return now */
- if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
+ /* if our item isn't there, return now */
+ if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
goto err;
/* the leaf has changed, it now has room. return now */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0b180708bf79..84c3b00f3de8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
+#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
+
/*
* The key defines the order in the tree, and so it also defines (optimal)
* block layout.
@@ -1020,6 +1022,9 @@ enum btrfs_raid_types {
BTRFS_BLOCK_GROUP_RAID6 | \
BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10)
+#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
+ BTRFS_BLOCK_GROUP_RAID6)
+
/*
* We need a bit for restriper to be able to tell when chunks of type
* SINGLE are available. This "extended" profile format is used in
@@ -1239,7 +1244,6 @@ enum btrfs_disk_cache_state {
BTRFS_DC_ERROR = 1,
BTRFS_DC_CLEAR = 2,
BTRFS_DC_SETUP = 3,
- BTRFS_DC_NEED_WRITE = 4,
};
struct btrfs_caching_control {
@@ -1277,7 +1281,6 @@ struct btrfs_block_group_cache {
unsigned long full_stripe_len;
unsigned int ro:1;
- unsigned int dirty:1;
unsigned int iref:1;
unsigned int has_caching_ctl:1;
unsigned int removed:1;
@@ -1315,6 +1318,9 @@ struct btrfs_block_group_cache {
struct list_head ro_list;
atomic_t trimming;
+
+ /* For dirty block groups */
+ struct list_head dirty_list;
};
/* delayed seq elem */
@@ -1741,6 +1747,7 @@ struct btrfs_fs_info {
spinlock_t unused_bgs_lock;
struct list_head unused_bgs;
+ struct mutex unused_bg_unpin_mutex;
/* For btrfs to record security options */
struct security_mnt_opts security_opts;
@@ -1776,6 +1783,7 @@ struct btrfs_subvolume_writers {
#define BTRFS_ROOT_DEFRAG_RUNNING 6
#define BTRFS_ROOT_FORCE_COW 7
#define BTRFS_ROOT_MULTI_LOG_TASKS 8
+#define BTRFS_ROOT_DIRTY 9
/*
* in ram representation of the tree. extent_root is used for all allocations
@@ -1794,8 +1802,6 @@ struct btrfs_root {
struct btrfs_fs_info *fs_info;
struct extent_io_tree dirty_log_pages;
- struct kobject root_kobj;
- struct completion kobj_unregister;
struct mutex objectid_mutex;
spinlock_t accounting_lock;
@@ -2465,31 +2471,6 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
-
-static inline struct btrfs_timespec *
-btrfs_inode_atime(struct btrfs_inode_item *inode_item)
-{
- unsigned long ptr = (unsigned long)inode_item;
- ptr += offsetof(struct btrfs_inode_item, atime);
- return (struct btrfs_timespec *)ptr;
-}
-
-static inline struct btrfs_timespec *
-btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
-{
- unsigned long ptr = (unsigned long)inode_item;
- ptr += offsetof(struct btrfs_inode_item, mtime);
- return (struct btrfs_timespec *)ptr;
-}
-
-static inline struct btrfs_timespec *
-btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
-{
- unsigned long ptr = (unsigned long)inode_item;
- ptr += offsetof(struct btrfs_inode_item, ctime);
- return (struct btrfs_timespec *)ptr;
-}
-
BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index de4e70fb3cbb..82f0c7c95474 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1755,27 +1755,31 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
btrfs_set_stack_inode_block_group(inode_item, 0);
- btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
+ btrfs_set_stack_timespec_sec(&inode_item->atime,
inode->i_atime.tv_sec);
- btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
+ btrfs_set_stack_timespec_nsec(&inode_item->atime,
inode->i_atime.tv_nsec);
- btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
+ btrfs_set_stack_timespec_sec(&inode_item->mtime,
inode->i_mtime.tv_sec);
- btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
+ btrfs_set_stack_timespec_nsec(&inode_item->mtime,
inode->i_mtime.tv_nsec);
- btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
+ btrfs_set_stack_timespec_sec(&inode_item->ctime,
inode->i_ctime.tv_sec);
- btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
+ btrfs_set_stack_timespec_nsec(&inode_item->ctime,
inode->i_ctime.tv_nsec);
+
+ btrfs_set_stack_timespec_sec(&inode_item->otime,
+ BTRFS_I(inode)->i_otime.tv_sec);
+ btrfs_set_stack_timespec_nsec(&inode_item->otime,
+ BTRFS_I(inode)->i_otime.tv_nsec);
}
int btrfs_fill_inode(struct inode *inode, u32 *rdev)
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item;
- struct btrfs_timespec *tspec;
delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node)
@@ -1802,17 +1806,19 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
*rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
- tspec = btrfs_inode_atime(inode_item);
- inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
- inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+ inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
+ inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
+
+ inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
+ inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
- tspec = btrfs_inode_mtime(inode_item);
- inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
- inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+ inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
+ inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
- tspec = btrfs_inode_ctime(inode_item);
- inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
- inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+ BTRFS_I(inode)->i_otime.tv_sec =
+ btrfs_stack_timespec_sec(&inode_item->otime);
+ BTRFS_I(inode)->i_otime.tv_nsec =
+ btrfs_stack_timespec_nsec(&inode_item->otime);
inode->i_generation = BTRFS_I(inode)->generation;
BTRFS_I(inode)->index_cnt = (u64)-1;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ca6a3a3b6b6c..5ec03d999c37 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -440,18 +440,9 @@ leave:
*/
static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
{
- s64 writers;
- DEFINE_WAIT(wait);
-
set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
- do {
- prepare_to_wait(&fs_info->replace_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- writers = percpu_counter_sum(&fs_info->bio_counter);
- if (writers)
- schedule();
- finish_wait(&fs_info->replace_wait, &wait);
- } while (writers);
+ wait_event(fs_info->replace_wait, !percpu_counter_sum(
+ &fs_info->bio_counter));
}
/*
@@ -932,15 +923,15 @@ void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
{
- DEFINE_WAIT(wait);
-again:
- percpu_counter_inc(&fs_info->bio_counter);
- if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) {
+ while (1) {
+ percpu_counter_inc(&fs_info->bio_counter);
+ if (likely(!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
+ &fs_info->fs_state)))
+ break;
+
btrfs_bio_counter_dec(fs_info);
wait_event(fs_info->replace_wait,
!test_bit(BTRFS_FS_STATE_DEV_REPLACING,
&fs_info->fs_state));
- goto again;
}
-
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1afb18226da8..f79f38542a73 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size);
- printk_ratelimited(KERN_INFO
+ printk_ratelimited(KERN_WARNING
"BTRFS: %s checksum verify failed on %llu wanted %X found %X "
"level %d\n",
root->fs_info->sb->s_id, buf->start,
@@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
ret = 0;
goto out;
}
- printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
+ printk_ratelimited(KERN_ERR
+ "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
eb->fs_info->sb->s_id, eb->start,
parent_transid, btrfs_header_generation(eb));
ret = 1;
@@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start "
+ printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
"%llu %llu\n",
eb->fs_info->sb->s_id, found_start, eb->start);
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(root, eb)) {
- printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n",
+ printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
eb->fs_info->sb->s_id, eb->start);
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
- btrfs_info(root->fs_info, "bad tree block level %d",
+ btrfs_err(root->fs_info, "bad tree block level %d",
(int)btrfs_header_level(eb));
ret = -EIO;
goto err;
@@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = {
.set_page_dirty = btree_set_page_dirty,
};
-void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
+void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = root->fs_info->btree_inode;
- buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
@@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
free_extent_buffer(buf);
}
-int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
+int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int mirror_num, struct extent_buffer **eb)
{
struct extent_buffer *buf = NULL;
@@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
int ret;
- buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return 0;
@@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
}
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize)
+ u64 bytenr)
{
if (btrfs_test_is_dummy_root(root))
- return alloc_test_extent_buffer(root->fs_info, bytenr,
- blocksize);
- return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
+ return alloc_test_extent_buffer(root->fs_info, bytenr);
+ return alloc_extent_buffer(root->fs_info, bytenr);
}
@@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
struct extent_buffer *buf = NULL;
int ret;
- buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize);
+ buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return NULL;
@@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
- memset(&root->root_kobj, 0, sizeof(root->root_kobj));
if (fs_info)
root->defrag_trans_start = fs_info->generation;
else
root->defrag_trans_start = 0;
- init_completion(&root->kobj_unregister);
root->root_key.objectid = objectid;
root->anon_dev = 0;
@@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
bool check_ref)
{
struct btrfs_root *root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
int ret;
if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
@@ -1669,8 +1669,17 @@ again:
if (ret)
goto fail;
- ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
- location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ key.objectid = BTRFS_ORPHAN_OBJECTID;
+ key.type = BTRFS_ORPHAN_ITEM_KEY;
+ key.offset = location->objectid;
+
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
+ btrfs_free_path(path);
if (ret < 0)
goto fail;
if (ret == 0)
@@ -2232,6 +2241,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
+ mutex_init(&fs_info->unused_bg_unpin_mutex);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
@@ -2496,7 +2506,7 @@ int open_ctree(struct super_block *sb,
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
- printk(KERN_ERR "BTRFS: has skinny extents\n");
+ printk(KERN_INFO "BTRFS: has skinny extents\n");
/*
* flag our filesystem as having big metadata blocks if
@@ -2520,7 +2530,7 @@ int open_ctree(struct super_block *sb,
*/
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(sectorsize != nodesize)) {
- printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
+ printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
"are not allowed for mixed block groups on %s\n",
sb->s_id);
goto fail_alloc;
@@ -2628,12 +2638,12 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize_bits = blksize_bits(sectorsize);
if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
+ printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
goto fail_sb_buffer;
}
if (sectorsize != PAGE_SIZE) {
- printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
+ printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
"found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer;
}
@@ -2642,7 +2652,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
- printk(KERN_WARNING "BTRFS: failed to read the system "
+ printk(KERN_ERR "BTRFS: failed to read the system "
"array on %s\n", sb->s_id);
goto fail_sb_buffer;
}
@@ -2657,7 +2667,7 @@ int open_ctree(struct super_block *sb,
generation);
if (!chunk_root->node ||
!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
- printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
+ printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@@ -2669,7 +2679,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_chunk_tree(chunk_root);
if (ret) {
- printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
+ printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@@ -2681,7 +2691,7 @@ int open_ctree(struct super_block *sb,
btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) {
- printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
+ printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@@ -2765,7 +2775,7 @@ retry_root_backup:
ret = btrfs_recover_balance(fs_info);
if (ret) {
- printk(KERN_WARNING "BTRFS: failed to recover balance\n");
+ printk(KERN_ERR "BTRFS: failed to recover balance\n");
goto fail_block_groups;
}
@@ -3860,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
btrfs_super_log_root(sb));
+ /*
+ * Check the lower bound, the alignment and other constraints are
+ * checked later.
+ */
+ if (btrfs_super_nodesize(sb) < 4096) {
+ printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
+ btrfs_super_nodesize(sb));
+ ret = -EINVAL;
+ }
+ if (btrfs_super_sectorsize(sb) < 4096) {
+ printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
+ btrfs_super_sectorsize(sb));
+ ret = -EINVAL;
+ }
+
if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
fs_info->fsid, sb->dev_item.fsid);
@@ -3873,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
if (btrfs_super_num_devices(sb) > (1UL << 31))
printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
btrfs_super_num_devices(sb));
+ if (btrfs_super_num_devices(sb) == 0) {
+ printk(KERN_ERR "BTRFS: number of devices is 0\n");
+ ret = -EINVAL;
+ }
if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
@@ -3881,6 +3910,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
}
/*
+ * Obvious sys_chunk_array corruptions, it must hold at least one key
+ * and one chunk
+ */
+ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+ printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
+ btrfs_super_sys_array_size(sb),
+ BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ ret = -EINVAL;
+ }
+ if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ + sizeof(struct btrfs_chunk)) {
+ printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
+ btrfs_super_sys_array_size(sb),
+ sizeof(struct btrfs_disk_k