summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-03-24 15:01:56 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:00 -0400
commit0b86a832a1f38abec695864ec2eaedc9d2383f1b (patch)
tree8ec0db0b55f77eb6a23fe7db3c1064d298db55c1
parent7f93bf8d27653726e3721c01fefc523487ecf2af (diff)
Btrfs: Add support for multiple devices per filesystem
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c38
-rw-r--r--fs/btrfs/ctree.h313
-rw-r--r--fs/btrfs/disk-io.c97
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c429
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/inode.c26
-rw-r--r--fs/btrfs/print-tree.c48
-rw-r--r--fs/btrfs/transaction.c50
-rw-r--r--fs/btrfs/volumes.c852
-rw-r--r--fs/btrfs/volumes.h78
13 files changed, 1563 insertions, 380 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9c61609f3714..5ac6f2946c21 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o bit-radix.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
- extent_io.o
+ extent_io.o volumes.o
ifeq ($(CONFIG_FS_POSIX_ACL),y)
btrfs-y += acl.o
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ef8809c1e69c..70b6ddfe15a1 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -70,6 +70,14 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
memset(p, 0, sizeof(*p));
}
+static void add_root_to_dirty_list(struct btrfs_root *root)
+{
+ if (root->track_dirty && list_empty(&root->dirty_list)) {
+ list_add(&root->dirty_list,
+ &root->fs_info->dirty_cowonly_roots);
+ }
+}
+
int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
@@ -196,6 +204,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
root_gen, 0, 0, 1);
}
free_extent_buffer(buf);
+ add_root_to_dirty_list(root);
} else {
root_gen = btrfs_header_generation(parent);
btrfs_set_node_blockptr(parent, parent_slot,
@@ -241,7 +250,7 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
return 0;
}
- search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
+ search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
ret = __btrfs_cow_block(trans, root, buf, parent,
parent_slot, cow_ret, search_start, 0);
return ret;
@@ -724,6 +733,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(ret);
root->node = child;
+ add_root_to_dirty_list(root);
path->nodes[level] = NULL;
clean_tree_block(trans, root, mid);
wait_on_tree_block_writeback(root, mid);
@@ -1369,6 +1379,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
/* the super has an extra ref to root->node */
free_extent_buffer(root->node);
root->node = c;
+ add_root_to_dirty_list(root);
extent_buffer_get(c);
path->nodes[level] = c;
path->slots[level] = 0;
@@ -2777,3 +2788,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
}
return 0;
}
+
+int btrfs_previous_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid,
+ int type)
+{
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ int ret;
+
+ while(1) {
+ if (path->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret != 0)
+ return ret;
+ } else {
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.type == type)
+ return 0;
+ }
+ return 1;
+}
+
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 92d892f92075..1453d995fef6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -40,12 +40,44 @@ extern struct kmem_cache *btrfs_path_cachep;
#define BTRFS_MAGIC "_B4RfS_M"
#define BTRFS_MAX_LEVEL 8
+
+/* holds pointers to all of the tree roots */
#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/* stores information about which extents are in use, and reference counts */
#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/* one per subvolume, storing files and directories */
#define BTRFS_FS_TREE_OBJECTID 3ULL
+
+/* directory objectid inside the root tree */
#define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL
+
+
+/*
+ * chunk tree stores translations from logical -> physical block numbering
+ * the super block points to the chunk tree
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 5ULL
+
+/*
+ * stores information about which areas of a given device are in use.
+ * one per device. The tree of tree roots points to the device tree
+ */
+#define BTRFS_DEV_TREE_OBJECTID 6ULL
+
+/*
+ * All files have objectids higher than this.
+ */
#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+
+/*
+ * the device items go into the chunk tree. The key is in the form
+ * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
/*
* we can actually store much bigger names, but lets not confuse the rest
* of linux
@@ -95,6 +127,81 @@ struct btrfs_key {
u64 offset;
} __attribute__ ((__packed__));
+struct btrfs_mapping_tree {
+ struct extent_map_tree map_tree;
+};
+
+#define BTRFS_DEV_UUID_SIZE 16
+struct btrfs_dev_item {
+ /* the internal btrfs device id */
+ __le64 devid;
+
+ /* size of the device */
+ __le64 total_bytes;
+
+ /* bytes used */
+ __le64 bytes_used;
+
+ /* optimal io alignment for this device */
+ __le32 io_align;
+
+ /* optimal io width for this device */
+ __le32 io_width;
+
+ /* minimal io size for this device */
+ __le32 sector_size;
+
+ /* the kernel device number */
+ __le64 rdev;
+
+ /* type and info about this device */
+ __le64 type;
+
+ /* partition number, 0 for whole dev */
+ __le32 partition;
+
+ /* length of the name data at the end of the item */
+ __le16 name_len;
+
+ /* physical drive uuid (or lvm uuid) */
+ u8 uuid[BTRFS_DEV_UUID_SIZE];
+ /* name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+ __le64 devid;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+ __le64 owner;
+ __le64 stripe_len;
+ __le64 type;
+
+ /* optimal io alignment for this chunk */
+ __le32 io_align;
+
+ /* optimal io width for this chunk */
+ __le32 io_width;
+
+ /* minimal io size for this chunk */
+ __le32 sector_size;
+
+ /* 2^16 stripes is quite a lot, a second limit is the size of a single
+ * item in the btree
+ */
+ __le16 num_stripes;
+ struct btrfs_stripe stripe;
+ /* additional stripes go here */
+} __attribute__ ((__packed__));
+
+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+{
+ BUG_ON(num_stripes == 0);
+ return sizeof(struct btrfs_chunk) +
+ sizeof(struct btrfs_stripe) * (num_stripes - 1);
+}
+
#define BTRFS_FSID_SIZE 16
/*
* every tree block (leaf or node) starts with this header.
@@ -119,6 +226,13 @@ struct btrfs_header {
sizeof(struct btrfs_item) - \
sizeof(struct btrfs_file_extent_item))
+
+/*
+ * this is a very generous portion of the super block, giving us
+ * room to translate 14 chunks with 3 stripes each.
+ */
+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+
/*
* the super block basically lists the main trees of the FS
* it currently lacks any block count etc etc
@@ -131,6 +245,7 @@ struct btrfs_super_block {
__le64 magic;
__le64 generation;
__le64 root;
+ __le64 chunk_root;
__le64 total_bytes;
__le64 bytes_used;
__le64 root_dir_objectid;
@@ -138,7 +253,10 @@ struct btrfs_super_block {
__le32 nodesize;
__le32 leafsize;
__le32 stripesize;
+ __le32 sys_chunk_array_size;
u8 root_level;
+ u8 chunk_root_level;
+ u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
} __attribute__ ((__packed__));
/*
@@ -208,12 +326,22 @@ struct btrfs_extent_ref {
__le64 offset;
} __attribute__ ((__packed__));
+/* dev extents record free space on individual devices. The owner
+ * field points back to the chunk allocation mapping tree that allocated
+ * the extent
+ */
+struct btrfs_dev_extent {
+ __le64 owner;
+ __le64 length;
+} __attribute__ ((__packed__));
+
+
struct btrfs_inode_ref {
__le16 name_len;
/* name goes here */
} __attribute__ ((__packed__));
-struct btrfs_inode_timespec {
+struct btrfs_timespec {
__le64 sec;
__le32 nsec;
} __attribute__ ((__packed__));
@@ -231,13 +359,13 @@ struct btrfs_inode_item {
__le32 uid;
__le32 gid;
__le32 mode;
- __le32 rdev;
+ __le64 rdev;
__le16 flags;
__le16 compat_flags;
- struct btrfs_inode_timespec atime;
- struct btrfs_inode_timespec ctime;
- struct btrfs_inode_timespec mtime;
- struct btrfs_inode_timespec otime;
+ struct btrfs_timespec atime;
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec mtime;
+ struct btrfs_timespec otime;
} __attribute__ ((__packed__));
struct btrfs_dir_item {
@@ -290,29 +418,34 @@ struct btrfs_csum_item {
u8 csum;
} __attribute__ ((__packed__));
-/* tag for the radix tree of block groups in ram */
-#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024)
-
+/* different types of block groups (and chunks) */
+#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
+#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
-#define BTRFS_BLOCK_GROUP_DATA 1
-#define BTRFS_BLOCK_GROUP_MIXED 2
struct btrfs_block_group_item {
__le64 used;
- u8 flags;
+ __le64 chunk_tree;
+ __le64 chunk_objectid;
+ __le64 flags;
} __attribute__ ((__packed__));
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
- int data;
- int cached;
u64 pinned;
+ u64 flags;
+ int cached;
};
+
+struct btrfs_device;
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
struct btrfs_root *extent_root;
struct btrfs_root *tree_root;
+ struct btrfs_root *chunk_root;
+ struct btrfs_root *dev_root;
struct radix_tree_root fs_roots_radix;
struct extent_io_tree free_space_cache;
@@ -321,6 +454,9 @@ struct btrfs_fs_info {
struct extent_io_tree pending_del;
struct extent_io_tree extent_ins;
+ /* logical->physical extent mapping */
+ struct btrfs_mapping_tree mapping_tree;
+
u64 generation;
u64 last_trans_committed;
unsigned long mount_opt;
@@ -330,6 +466,7 @@ struct btrfs_fs_info {
struct btrfs_transaction *running_transaction;
struct btrfs_super_block super_copy;
struct extent_buffer *sb_buffer;
+ struct block_device *__bdev;
struct super_block *sb;
struct inode *btree_inode;
spinlock_t hash_lock;
@@ -350,12 +487,17 @@ struct btrfs_fs_info {
unsigned long throttles;
u64 total_pinned;
+ struct list_head dirty_cowonly_roots;
+
+ struct list_head devices;
+ struct list_head *last_device;
spinlock_t delalloc_lock;
spinlock_t new_trans_lock;
u64 delalloc_bytes;
u64 last_alloc;
u64 last_data_alloc;
};
+
/*
* in ram representation of the tree. extent_root is used for all allocations
* and for the extent tree extent_root root.
@@ -387,14 +529,19 @@ struct btrfs_root {
u64 highest_inode;
u64 last_inode_alloc;
int ref_cows;
+ int track_dirty;
struct btrfs_key defrag_progress;
int defrag_running;
int defrag_level;
char *name;
int in_sysfs;
+
+ /* the dirty list is only used by non-reference counted roots */
+ struct list_head dirty_list;
};
/*
+
* inode items have the data typically returned from stat and store other
* info about object characteristics. There is one for every file and dir in
* the FS
@@ -439,6 +586,10 @@ struct btrfs_root {
*/
#define BTRFS_BLOCK_GROUP_ITEM_KEY 50
+#define BTRFS_DEV_EXTENT_KEY 75
+#define BTRFS_DEV_ITEM_KEY 76
+#define BTRFS_CHUNK_ITEM_KEY 77
+
/*
* string items are for debugging. They just store a short string of
* data in the FS
@@ -518,13 +669,104 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
s->member = cpu_to_le##bits(val); \
}
+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_FUNCS(device_rdev, struct btrfs_dev_item, rdev, 64);
+BTRFS_SETGET_FUNCS(device_partition, struct btrfs_dev_item, partition, 32);
+BTRFS_SETGET_FUNCS(device_name_len, struct btrfs_dev_item, name_len, 16);
+
+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+{
+ return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+}
+
+static inline char *btrfs_device_name(struct btrfs_dev_item *d)
+{
+ return (char *)(d + 1);
+}
+
+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
+ stripe_len, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
+ io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
+ io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
+ sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
+ num_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
+ int nr)
+{
+ unsigned long offset = (unsigned long)c;
+ offset += offsetof(struct btrfs_chunk, stripe);
+ offset += nr * sizeof(struct btrfs_stripe);
+ return (struct btrfs_stripe *)offset;
+}
+
+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr,
+ u64 val)
+{
+ btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr,
+ u64 val)
+{
+ btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
+}
+
/* struct btrfs_block_group_item */
BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
used, 64);
BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
used, 64);
-BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item,
- flags, 8);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item,
+ chunk_tree, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item,
+ chunk_tree, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+ struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd,
+ struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_flags,
+ struct btrfs_block_group_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+ struct btrfs_block_group_item, flags, 64);
/* struct btrfs_inode_ref */
BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
@@ -538,49 +780,53 @@ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
-BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32);
+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16);
BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item,
compat_flags, 16);
-static inline struct btrfs_inode_timespec *
+static inline struct btrfs_timespec *
btrfs_inode_atime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, atime);
- return (struct btrfs_inode_timespec *)ptr;
+ return (struct btrfs_timespec *)ptr;
}
-static inline struct btrfs_inode_timespec *
+static inline struct btrfs_timespec *
btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, mtime);
- return (struct btrfs_inode_timespec *)ptr;
+ return (struct btrfs_timespec *)ptr;
}
-static inline struct btrfs_inode_timespec *
+static inline struct btrfs_timespec *
btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, ctime);
- return (struct btrfs_inode_timespec *)ptr;
+ return (struct btrfs_timespec *)ptr;
}
-static inline struct btrfs_inode_timespec *
+static inline struct btrfs_timespec *
btrfs_inode_otime(struct btrfs_inode_item *inode_item)
{
unsigned long ptr = (unsigned long)inode_item;
ptr += offsetof(struct btrfs_inode_item, otime);
- return (struct btrfs_inode_timespec *)ptr;
+ return (struct btrfs_timespec *)ptr;
}
-BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64);
-BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32);
+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
/* struct btrfs_extent_item */
BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32);
+/* struct btrfs_dev_extent */
+BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64);
+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
+
/* struct btrfs_extent_ref */
BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64);
BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64);
@@ -846,8 +1092,14 @@ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
+ struct btrfs_super_block, sys_chunk_array_size, 32);
BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
+ chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
+ chunk_root_level, 64);
BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
total_bytes, 64);
BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
@@ -1009,7 +1261,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root);
+int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytes_used,
+ u64 type, u64 chunk_tree, u64 chunk_objectid,
+ u64 size);
/* ctree.c */
+int btrfs_previous_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid,
+ int type);
int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *parent, int parent_slot,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 88e21bdbc478..8e37fa120cc8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -28,6 +28,7 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
+#include "volumes.h"
#include "print-tree.h"
#if 0
@@ -234,6 +235,19 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
return 0;
}
+static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 offset;
+ offset = bio->bi_sector << 9;
+ if (offset == BTRFS_SUPER_INFO_OFFSET) {
+ bio->bi_bdev = root->fs_info->sb->s_bdev;
+ submit_bio(rw, bio);
+ return 0;
+ }
+ return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio);
+}
+
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
struct extent_io_tree *tree;
@@ -345,6 +359,23 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
return ret;
}
+static int close_all_devices(struct btrfs_fs_info *fs_info)
+{
+ struct list_head *list;
+ struct list_head *next;
+ struct btrfs_device *device;
+
+ list = &fs_info->devices;
+ while(!list_empty(list)) {
+ next = list->next;
+ list_del(next);
+ device = list_entry(next, struct btrfs_device, dev_list);
+ kfree(device->name);
+ kfree(device);
+ }
+ return 0;
+}
+
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u32 blocksize)
{
@@ -420,6 +451,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->leafsize = leafsize;
root->stripesize = stripesize;
root->ref_cows = 0;
+ root->track_dirty = 0;
+
root->fs_info = fs_info;
root->objectid = objectid;
root->last_trans = 0;
@@ -427,6 +460,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->last_inode_alloc = 0;
root->name = NULL;
root->in_sysfs = 0;
+
+ INIT_LIST_HEAD(&root->dirty_list);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -634,6 +669,10 @@ struct btrfs_root *open_ctree(struct super_block *sb)
GFP_NOFS);
struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
GFP_NOFS);
+ struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root),
+ GFP_NOFS);
+ struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root),
+ GFP_NOFS);
int ret;
int err = -EIO;
struct btrfs_super_block *disk_super;
@@ -657,6 +696,12 @@ struct btrfs_root *open_ctree(struct super_block *sb)
fs_info->last_trans_committed = 0;
fs_info->tree_root = tree_root;
fs_info->extent_root = extent_root;
+ fs_info->chunk_root = chunk_root;
+ fs_info->dev_root = dev_root;
+ INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
+ INIT_LIST_HEAD(&fs_info->devices);
+ btrfs_mapping_init(&fs_info->mapping_tree);
+ fs_info->last_device = &fs_info->devices;
fs_info->sb = sb;
fs_info->throttles = 0;
fs_info->mount_opt = 0;
@@ -714,12 +759,12 @@ struct btrfs_root *open_ctree(struct super_block *sb)
goto fail_iput;
}
#endif
- __setup_root(512, 512, 512, 512, tree_root,
+ __setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
fs_info->sb_buffer = read_tree_block(tree_root,
BTRFS_SUPER_INFO_OFFSET,
- 512);
+ 4096);
if (!fs_info->sb_buffer)
goto fail_iput;
@@ -730,6 +775,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
(unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
BTRFS_FSID_SIZE);
+
disk_super = &fs_info->super_copy;
if (!btrfs_super_root(disk_super))
goto fail_sb_buffer;
@@ -753,23 +799,47 @@ struct btrfs_root *open_ctree(struct super_block *sb)
goto fail_sb_buffer;
}
+ mutex_lock(&fs_info->fs_mutex);
+ ret = btrfs_read_sys_array(tree_root);
+ BUG_ON(ret);
+
+ blocksize = btrfs_level_size(tree_root,
+ btrfs_super_chunk_root_level(disk_super));
+
+ __setup_root(nodesize, leafsize, sectorsize, stripesize,
+ chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+
+ chunk_root->node = read_tree_block(chunk_root,
+ btrfs_super_chunk_root(disk_super),
+ blocksize);
+ BUG_ON(!chunk_root->node);
+
+ ret = btrfs_read_chunk_tree(chunk_root);
+ BUG_ON(ret);
+
blocksize = btrfs_level_size(tree_root,
btrfs_super_root_level(disk_super));
+
tree_root->node = read_tree_block(tree_root,
btrfs_super_root(disk_super),
blocksize);
if (!tree_root->node)
goto fail_sb_buffer;
- mutex_lock(&fs_info->fs_mutex);
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_EXTENT_TREE_OBJECTID, extent_root);
- if (ret) {
- mutex_unlock(&fs_info->fs_mutex);
+ if (ret)
goto fail_tree_root;
- }
+ extent_root->track_dirty = 1;
+
+ ret = find_and_setup_root(tree_root, fs_info,
+ BTRFS_DEV_TREE_OBJECTID, dev_root);
+ dev_root->track_dirty = 1;
+
+ if (ret)
+ goto fail_extent_root;
btrfs_read_block_groups(extent_root);
@@ -777,7 +847,10 @@ struct btrfs_root *open_ctree(struct super_block *sb)
mutex_unlock(&fs_info->fs_mutex);
return tree_root;
+fail_extent_root:
+ free_extent_buffer(extent_root->node);
fail_tree_root:
+ mutex_unlock(&fs_info->fs_mutex);
free_extent_buffer(tree_root->node);
fail_sb_buffer:
free_extent_buffer(fs_info->sb_buffer);
@@ -874,6 +947,12 @@ int close_ctree(struct btrfs_root *root)
if (fs_info->tree_root->node)
free_extent_buffer(fs_info->tree_root->node);
+ if (root->fs_info->chunk_root->node);
+ free_extent_buffer(root->fs_info->chunk_root->node);
+
+ if (root->fs_info->dev_root->node);
+ free_extent_buffer(root->fs_info->dev_root->node);
+
free_extent_buffer(fs_info->sb_buffer);
btrfs_free_block_groups(root->fs_info);
@@ -901,8 +980,13 @@ int close_ctree(struct btrfs_root *root)
kfree(hasher);
}
#endif
+ close_all_devices(fs_info);
+ btrfs_mapping_tree_free(&fs_info->mapping_tree);
+
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
+ kfree(fs_info->chunk_root);
+ kfree(fs_info->dev_root);
return 0;
}
@@ -1016,4 +1100,5 @@ int btrfs_read_buffer(struct extent_buffer *buf)
static struct extent_io_ops btree_extent_io_ops = {
.writepage_io_hook = btree_writepage_io_hook,
+ .submit_bio_hook = btree_submit_bio_hook,
};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 828f3a2081b6..206cb48638f7 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -20,6 +20,7 @@
#define __DISKIO__
#define BTRFS_SUPER_INFO_OFFSET (16 * 1024)
+struct btrfs_device;
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u32 blocksize);
@@ -65,4 +66,5 @@ int btrfs_read_buffer(struct extent_buffer *buf);
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, char *result);
void btrfs_throttle(struct btrfs_root *root);
+int btrfs_open_device(struct btrfs_device *dev);
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ebfd304138ca..2cd957d6e8d8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -24,37 +24,19 @@
#include "disk-io.h"
#include "print-tree.h"
#include "transaction.h"
+#include "volumes.h"
-#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
+#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
+#define BLOCK_GROUP_SYSTEM EXTENT_NEW
+
#define BLOCK_GROUP_DIRTY EXTENT_DIRTY
static int finish_current_insert(struct btrfs_trans_handle *trans, struct
btrfs_root *extent_root);
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
btrfs_root *extent_root);
-static int find_previous_extent(struct btrfs_root *root,
- struct btrfs_path *path)
-{
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
- int ret;
- while(1) {
- if (path->slots[0] == 0) {
- ret = btrfs_prev_leaf(root, path);
- if (ret != 0)
- return ret;
- } else {
- path->slots[0]--;
- }
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
- return 0;
- }
- return 1;
-}
static int cache_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group)
@@ -91,7 +73,7 @@ static int cache_block_group(struct btrfs_root *root,
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
return ret;
- ret = find_previous_extent(root, path);
+ ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
if (ret < 0)
return ret;
if (ret == 0) {
@@ -168,7 +150,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
block_group_cache = &info->block_group_cache;
ret = find_first_extent_bit(block_group_cache,
bytenr, &start, &end,
- BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
+ BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
+ BLOCK_GROUP_SYSTEM);
if (ret) {
return NULL;
}
@@ -182,23 +165,38 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
return block_group;
return NULL;
}
-static u64 noinline find_search_start(struct btrfs_root *root,
+
+static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
+{
+ if ((bits & BLOCK_GROUP_DATA) &&
+ (cache->flags & BTRFS_BLOCK_GROUP_DATA))
+ return 1;
+ if ((bits & BLOCK_GROUP_METADATA) &&
+ (cache->flags & BTRFS_BLOCK_GROUP_METADATA))
+ return 1;
+ if ((bits & BLOCK_GROUP_SYSTEM) &&
+ (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
+ return 1;
+ return 0;
+}
+
+static int noinline find_search_start(struct btrfs_root *root,
struct btrfs_block_group_cache **cache_ret,
- u64 search_start, int num, int data)
+ u64 *start_ret, int num, int data)
{
int ret;
struct btrfs_block_group_cache *cache = *cache_ret;
struct extent_io_tree *free_space_cache;
- struct extent_state *state;
u64 last;
u64 start = 0;
+ u64 end = 0;
u64 cache_miss = 0;
u64 total_fs_bytes;
+ u64 search_start = *start_ret;
int wrapped = 0;
- if (!cache) {
+ if (!cache)
goto out;
- }
total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
free_space_cache = &root->fs_info->free_space_cache;
@@ -208,6 +206,9 @@ again:
goto out;
last = max(search_start, cache->key.objectid);
+ if (!block_group_bits(cache, data)) {
+ goto new_group;
+ }
while(1) {
ret = find_first_extent_bit(&root->fs_info->free_space_cache,
@@ -225,22 +226,20 @@ again:
cache_miss = start;
continue;
}
- if (data != BTRFS_BLOCK_GROUP_MIXED &&
- start + num > cache->key.objectid + cache->key.offset)
+ if (start + num > cache->key.objectid + cache->key.offset)
goto new_group;
if (start + num > total_fs_bytes)
goto new_group;
- return start;
+ *start_ret = start;
+ return 0;
}
out:
cache = btrfs_lookup_block_group(root->fs_info, search_start);
if (!cache) {
- printk("Unable to find block group for %Lu\n",
- search_start);
+ printk("Unable to find block group for %Lu\n", search_start);
WARN_ON(1);
- return search_start;
}
- return search_start;
+ return -ENOSPC;
new_group:
last = cache->key.objectid + cache->key.offset;
@@ -251,7 +250,6 @@ no_cache:
if (!wrapped) {
wrapped = 1;
last = search_start;
- data = BTRFS_BLOCK_GROUP_MIXED;
goto wrapped;
}
goto out;
@@ -299,7 +297,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
int ret;
int full_search = 0;
int factor = 8;
- int data_swap = 0;
block_group_cache = &info->block_gro