summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/btrfs.txt261
-rw-r--r--fs/btrfs/backref.c12
-rw-r--r--fs/btrfs/check-integrity.c12
-rw-r--r--fs/btrfs/compression.h9
-rw-r--r--fs/btrfs/ctree.c36
-rw-r--r--fs/btrfs/ctree.h87
-rw-r--r--fs/btrfs/delayed-inode.c10
-rw-r--r--fs/btrfs/delayed-ref.c12
-rw-r--r--fs/btrfs/dev-replace.c134
-rw-r--r--fs/btrfs/dev-replace.h7
-rw-r--r--fs/btrfs/disk-io.c71
-rw-r--r--fs/btrfs/extent-tree.c40
-rw-r--r--fs/btrfs/extent_io.c40
-rw-r--r--fs/btrfs/extent_io.h5
-rw-r--r--fs/btrfs/extent_map.c8
-rw-r--r--fs/btrfs/file-item.c103
-rw-r--r--fs/btrfs/file.c158
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c326
-rw-r--r--fs/btrfs/ioctl.c35
-rw-r--r--fs/btrfs/ordered-data.c6
-rw-r--r--fs/btrfs/print-tree.c23
-rw-r--r--fs/btrfs/props.c1
-rw-r--r--fs/btrfs/reada.c268
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/scrub.c32
-rw-r--r--fs/btrfs/send.c37
-rw-r--r--fs/btrfs/super.c52
-rw-r--r--fs/btrfs/tests/btrfs-tests.c6
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c1
-rw-r--r--fs/btrfs/tests/inode-tests.c1
-rw-r--r--fs/btrfs/transaction.c13
-rw-r--r--fs/btrfs/tree-log.c102
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c51
-rw-r--r--fs/btrfs/xattr.c67
36 files changed, 1102 insertions, 931 deletions
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
index c772b47e7ef0..f9dad22d95ce 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -1,20 +1,10 @@
-
BTRFS
=====
-Btrfs is a copy on write filesystem for Linux aimed at
-implementing advanced features while focusing on fault tolerance,
-repair and easy administration. Initially developed by Oracle, Btrfs
-is licensed under the GPL and open for contribution from anyone.
-
-Linux has a wealth of filesystems to choose from, but we are facing a
-number of challenges with scaling to the large storage subsystems that
-are becoming common in today's data centers. Filesystems need to scale
-in their ability to address and manage large storage, and also in
-their ability to detect, repair and tolerate errors in the data stored
-on disk. Btrfs is under heavy development, and is not suitable for
-any uses other than benchmarking and review. The Btrfs disk format is
-not yet finalized.
+Btrfs is a copy on write filesystem for Linux aimed at implementing advanced
+features while focusing on fault tolerance, repair and easy administration.
+Jointly developed by several companies, licensed under the GPL and open for
+contribution from anyone.
The main Btrfs features include:
@@ -28,243 +18,14 @@ The main Btrfs features include:
* Checksums on data and metadata (multiple algorithms available)
* Compression
* Integrated multiple device support, with several raid algorithms
- * Online filesystem check (not yet implemented)
- * Very fast offline filesystem check
- * Efficient incremental backup and FS mirroring (not yet implemented)
+ * Offline filesystem check
+ * Efficient incremental backup and FS mirroring
* Online filesystem defragmentation
+For more information please refer to the wiki
-Mount Options
-=============
-
-When mounting a btrfs filesystem, the following option are accepted.
-Options with (*) are default options and will not show in the mount options.
-
- alloc_start=<bytes>
- Debugging option to force all block allocations above a certain
- byte threshold on each block device. The value is specified in
- bytes, optionally with a K, M, or G suffix, case insensitive.
- Default is 1MB.
-
- noautodefrag(*)
- autodefrag
- Disable/enable auto defragmentation.
- Auto defragmentation detects small random writes into files and queue
- them up for the defrag process. Works best for small files;
- Not well suited for large database workloads.
-
- check_int
- check_int_data
- check_int_print_mask=<value>
- These debugging options control the behavior of the integrity checking
- module (the BTRFS_FS_CHECK_INTEGRITY config option required).
-
- check_int enables the integrity checker module, which examines all
- block write requests to ensure on-disk consistency, at a large
- memory and CPU cost.
-
- check_int_data includes extent data in the integrity checks, and
- implies the check_int option.
-
- check_int_print_mask takes a bitmask of BTRFSIC_PRINT_MASK_* values
- as defined in fs/btrfs/check-integrity.c, to control the integrity
- checker module behavior.
-
- See comments at the top of fs/btrfs/check-integrity.c for more info.
-
- commit=<seconds>
- Set the interval of periodic commit, 30 seconds by default. Higher
- values defer data being synced to permanent storage with obvious
- consequences when the system crashes. The upper bound is not forced,
- but a warning is printed if it's more than 300 seconds (5 minutes).
-
- compress
- compress=<type>
- compress-force
- compress-force=<type>
- Control BTRFS file data compression. Type may be specified as "zlib"
- "lzo" or "no" (for no compression, used for remounting). If no type
- is specified, zlib is used. If compress-force is specified,
- all files will be compressed, whether or not they compress well.
- If compression is enabled, nodatacow and nodatasum are disabled.
-
- degraded
- Allow mounts to continue with missing devices. A read-write mount may
- fail with too many devices missing, for example if a stripe member
- is completely missing.
-
- device=<devicepath>
- Specify a device during mount so that ioctls on the control device
- can be avoided. Especially useful when trying to mount a multi-device
- setup as root. May be specified multiple times for multiple devices.
-
- nodiscard(*)
- discard
- Disable/enable discard mount option.
- Discard issues frequent commands to let the block device reclaim space
- freed by the filesystem.
- This is useful for SSD devices, thinly provisioned
- LUNs and virtual machine images, but may have a significant
- performance impact. (The fstrim command is also available to
- initiate batch trims from userspace).
-
- noenospc_debug(*)
- enospc_debug
- Disable/enable debugging option to be more verbose in some ENOSPC conditions.
-
- fatal_errors=<action>
- Action to take when encountering a fatal error:
- "bug" - BUG() on a fatal error. This is the default.
- "panic" - panic() on a fatal error.
-
- noflushoncommit(*)
- flushoncommit
- The 'flushoncommit' mount option forces any data dirtied by a write in a
- prior transaction to commit as part of the current commit. This makes
- the committed state a fully consistent view of the file system from the
- application's perspective (i.e., it includes all completed file system
- operations). This was previously the behavior only when a snapshot is
- created.
-
- inode_cache
- Enable free inode number caching. Defaults to off due to an overflow
- problem when the free space crcs don't fit inside a single page.
-
- max_inline=<bytes>
- Specify the maximum amount of space, in bytes, that can be inlined in
- a metadata B-tree leaf. The value is specified in bytes, optionally
- with a K, M, or G suffix, case insensitive. In practice, this value
- is limited by the root sector size, with some space unavailable due
- to leaf headers. For a 4k sector size, max inline data is ~3900 bytes.
-
- metadata_ratio=<value>
- Specify that 1 metadata chunk should be allocated after every <value>
- data chunks. Off by default.
-
- acl(*)
- noacl
- Enable/disable support for Posix Access Control Lists (ACLs). See the
- acl(5) manual page for more information about ACLs.
-
- barrier(*)
- nobarrier
- Enable/disable the use of block layer write barriers. Write barriers
- ensure that certain IOs make it through the device cache and are on
- persistent storage. If disabled on a device with a volatile
- (non-battery-backed) write-back cache, nobarrier option will lead to
- filesystem corruption on a system crash or power loss.
-
- datacow(*)
- nodatacow
- Enable/disable data copy-on-write for newly created files.
- Nodatacow implies nodatasum, and disables all compression.
-
- datasum(*)
- nodatasum
- Enable/disable data checksumming for newly created files.
- Datasum implies datacow.
-
- treelog(*)
- notreelog
- Enable/disable the tree logging used for fsync and O_SYNC writes.
-
- recovery
- Enable autorecovery attempts if a bad tree root is found at mount time.
- Currently this scans a list of several previous tree roots and tries to
- use the first readable.
-
- rescan_uuid_tree
- Force check and rebuild procedure of the UUID tree. This should not
- normally be needed.
-
- skip_balance
- Skip automatic resume of interrupted balance operation after mount.
- May be resumed with "btrfs balance resume."
-
- space_cache (*)
- Enable the on-disk freespace cache.
- nospace_cache
- Disable freespace cache loading without clearing the cache.
- clear_cache
- Force clearing and rebuilding of the disk space cache if something
- has gone wrong.
-
- ssd
- nossd
- ssd_spread
- Options to control ssd allocation schemes. By default, BTRFS will
- enable or disable ssd allocation heuristics depending on whether a
- rotational or non-rotational disk is in use. The ssd and nossd options
- can override this autodetection.
-
- The ssd_spread mount option attempts to allocate into big chunks
- of unused space, and may perform better on low-end ssds. ssd_spread
- implies ssd, enabling all other ssd heuristics as well.
-
- subvol=<path>
- Mount subvolume at <path> rather than the root subvolume. <path> is
- relative to the top level subvolume.
-
- subvolid=<ID>
- Mount subvolume specified by an ID number rather than the root subvolume.
- This allows mounting of subvolumes which are not in the root of the mounted
- filesystem.
- You can use "btrfs subvolume list" to see subvolume ID numbers.
-
- subvolrootid=<objectid> (deprecated)
- Mount subvolume specified by <objectid> rather than the root subvolume.
- This allows mounting of subvolumes which are not in the root of the mounted
- filesystem.
- You can use "btrfs subvolume show " to see the object ID for a subvolume.
-
- thread_pool=<number>
- The number of worker threads to allocate. The default number is equal
- to the number of CPUs + 2, or 8, whichever is smaller.
-
- user_subvol_rm_allowed
- Allow subvolumes to be deleted by a non-root user. Use with caution.
-
-MAILING LIST
-============
-
-There is a Btrfs mailing list hosted on vger.kernel.org. You can
-find details on how to subscribe here:
-
-http://vger.kernel.org/vger-lists.html#linux-btrfs
-
-Mailing list archives are available from gmane:
-
-http://dir.gmane.org/gmane.comp.file-systems.btrfs
-
-
-
-IRC
-===
-
-Discussion of Btrfs also occurs on the #btrfs channel of the Freenode
-IRC network.
-
-
-
- UTILITIES
- =========
-
-Userspace tools for creating and manipulating Btrfs file systems are
-available from the git repository at the following location:
-
- http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs.git
- git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs.git
-
-These include the following tools:
-
-* mkfs.btrfs: create a filesystem
-
-* btrfs: a single tool to manage the filesystems, refer to the manpage for more details
-
-* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem
-
-Other tools for specific tasks:
-
-* btrfs-convert: in-place conversion from ext2/3/4 filesystems
+ https://btrfs.wiki.kernel.org
-* btrfs-image: dump filesystem metadata for debugging
+that maintains information about administration tasks, frequently asked
+questions, use cases, mount options, comprehensible changelogs, features,
+manual pages, source code repositories, contacts etc.
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f6dac40f87ff..80e8472d618b 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -148,8 +148,7 @@ int __init btrfs_prelim_ref_init(void)
void btrfs_prelim_ref_exit(void)
{
- if (btrfs_prelim_ref_cache)
- kmem_cache_destroy(btrfs_prelim_ref_cache);
+ kmem_cache_destroy(btrfs_prelim_ref_cache);
}
/*
@@ -566,17 +565,14 @@ static void __merge_refs(struct list_head *head, int mode)
struct __prelim_ref *pos2 = pos1, *tmp;
list_for_each_entry_safe_continue(pos2, tmp, head, list) {
- struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
+ struct __prelim_ref *ref1 = pos1, *ref2 = pos2;
struct extent_inode_elem *eie;
if (!ref_for_same_block(ref1, ref2))
continue;
if (mode == 1) {
- if (!ref1->parent && ref2->parent) {
- xchg = ref1;
- ref1 = ref2;
- ref2 = xchg;
- }
+ if (!ref1->parent && ref2->parent)
+ swap(ref1, ref2);
} else {
if (ref1->parent != ref2->parent)
continue;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 861d472564c1..e34a71b3e225 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -95,6 +95,7 @@
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/vmalloc.h>
+#include <linux/string.h>
#include "ctree.h"
#include "disk-io.h"
#include "hash.h"
@@ -105,6 +106,7 @@
#include "locking.h"
#include "check-integrity.h"
#include "rcu-string.h"
+#include "compression.h"
#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
@@ -176,7 +178,7 @@ struct btrfsic_block {
* Elements of this type are allocated dynamically and required because
* each block object can refer to and can be ref from multiple blocks.
* The key to lookup them in the hashtable is the dev_bytenr of
- * the block ref to plus the one from the block refered from.
+ * the block ref to plus the one from the block referred from.
* The fact that they are searchable via a hashtable and that a
* ref_cnt is maintained is not required for the btrfs integrity
* check algorithm itself, it is only used to make the output more
@@ -3076,7 +3078,7 @@ int btrfsic_mount(struct btrfs_root *root,
list_for_each_entry(device, dev_head, dev_list) {
struct btrfsic_dev_state *ds;
- char *p;
+ const char *p;
if (!device->bdev || !device->name)
continue;
@@ -3092,11 +3094,7 @@ int btrfsic_mount(struct btrfs_root *root,
ds->state = state;
bdevname(ds->bdev, ds->name);
ds->name[BDEVNAME_SIZE - 1] = '\0';
- for (p = ds->name; *p != '\0'; p++);
- while (p > ds->name && *p != '/')
- p--;
- if (*p == '/')
- p++;
+ p = kbasename(ds->name);
strlcpy(ds->name, p, sizeof(ds->name));
btrfsic_dev_state_hashtable_add(ds,
&btrfsic_dev_state_hashtable);
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 13a4dc0436c9..f49d8b8c0f00 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,6 +48,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
unsigned long pg_index,
unsigned long pg_offset);
+
+enum btrfs_compression_type {
+ BTRFS_COMPRESS_NONE = 0,
+ BTRFS_COMPRESS_ZLIB = 1,
+ BTRFS_COMPRESS_LZO = 2,
+ BTRFS_COMPRESS_TYPES = 2,
+ BTRFS_COMPRESS_LAST = 3,
+};
+
struct btrfs_compress_op {
struct list_head *(*alloc_workspace)(void);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 769e0ff1b4ce..77592931ab4f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -311,7 +311,7 @@ struct tree_mod_root {
struct tree_mod_elem {
struct rb_node node;
- u64 index; /* shifted logical */
+ u64 logical;
u64 seq;
enum mod_log_op op;
@@ -435,11 +435,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
/*
* key order of the log:
- * index -> sequence
+ * node/leaf start address -> sequence
*
- * the index is the shifted logical of the *new* root node for root replace
- * operations, or the shifted logical of the affected block for all other
- * operations.
+ * The 'start address' is the logical address of the *new* root node
+ * for root replace operations, or the logical address of the affected
+ * block for all other operations.
*
* Note: must be called with write lock (tree_mod_log_write_lock).
*/
@@ -460,9 +460,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
while (*new) {
cur = container_of(*new, struct tree_mod_elem, node);
parent = *new;
- if (cur->index < tm->index)
+ if (cur->logical < tm->logical)
new = &((*new)->rb_left);
- else if (cur->index > tm->index)
+ else if (cur->logical > tm->logical)
new = &((*new)->rb_right);
else if (cur->seq < tm->seq)
new = &((*new)->rb_left);
@@ -523,7 +523,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
if (!tm)
return NULL;
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->logical = eb->start;
if (op != MOD_LOG_KEY_ADD) {
btrfs_node_key(eb, &tm->key, slot);
tm->blockptr = btrfs_node_blockptr(eb, slot);
@@ -588,7 +588,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
goto free_tms;
}
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->logical = eb->start;
tm->slot = src_slot;
tm->move.dst_slot = dst_slot;
tm->move.nr_items = nr_items;
@@ -699,7 +699,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
goto free_tms;
}
- tm->index = new_root->start >> PAGE_CACHE_SHIFT;
+ tm->logical = new_root->start;
tm->old_root.logical = old_root->start;
tm->old_root.level = btrfs_header_level(old_root);
tm->generation = btrfs_header_generation(old_root);
@@ -739,16 +739,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
struct rb_node *node;
struct tree_mod_elem *cur = NULL;
struct tree_mod_elem *found = NULL;
- u64 index = start >> PAGE_CACHE_SHIFT;
tree_mod_log_read_lock(fs_info);
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
cur = container_of(node, struct tree_mod_elem, node);
- if (cur->index < index) {
+ if (cur->logical < start) {
node = node->rb_left;
- } else if (cur->index > index) {
+ } else if (cur->logical > start) {
node = node->rb_right;
} else if (cur->seq < min_seq) {
node = node->rb_left;
@@ -1230,9 +1229,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
return NULL;
/*
- * the very last operation that's logged for a root is the replacement
- * operation (if it is replaced at all). this has the index of the *new*
- * root, making it the very first operation that's logged for this root.
+ * the very last operation that's logged for a root is the
+ * replacement operation (if it is replaced at all). this has
+ * the logical address of the *new* root, making it the very
+ * first operation that's logged for this root.
*/
while (1) {
tm = tree_mod_log_search_oldest(fs_info, root_logical,
@@ -1336,7 +1336,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
if (!next)
break;
tm = container_of(next, struct tree_mod_elem, node);
- if (tm->index != first_tm->index)
+ if (tm->logical != first_tm->logical)
break;
}
tree_mod_log_read_unlock(fs_info);
@@ -5361,7 +5361,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
}
- tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS);
+ tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL);
if (!tmp_buf) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bfe4a337fb4d..84a6a5b3384a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -100,6 +100,9 @@ struct btrfs_ordered_sum;
/* tracks free space in block groups. */
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+/* device stats in the device tree */
+#define BTRFS_DEV_STATS_OBJECTID 0ULL
+
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
@@ -715,14 +718,6 @@ struct btrfs_timespec {
__le32 nsec;
} __attribute__ ((__packed__));
-enum btrfs_compression_type {
- BTRFS_COMPRESS_NONE = 0,
- BTRFS_COMPRESS_ZLIB = 1,
- BTRFS_COMPRESS_LZO = 2,
- BTRFS_COMPRESS_TYPES = 2,
- BTRFS_COMPRESS_LAST = 3,
-};
-
struct btrfs_inode_item {
/* nfs style generation number */
__le64 generation;
@@ -793,7 +788,7 @@ struct btrfs_root_item {
/*
* This generation number is used to test if the new fields are valid
- * and up to date while reading the root item. Everytime the root item
+ * and up to date while reading the root item. Every time the root item
* is written out, the "generation" field is copied into this field. If
* anyone ever mounted the fs with an older kernel, we will have
* mismatching generation values here and thus must invalidate the
@@ -1002,8 +997,10 @@ struct btrfs_dev_replace {
pid_t lock_owner;
atomic_t nesting_level;
struct mutex lock_finishing_cancel_unmount;
- struct mutex lock_management_lock;
- struct mutex lock;
+ rwlock_t lock;
+ atomic_t read_locks;
+ atomic_t blocking_readers;
+ wait_queue_head_t read_lock_wq;
struct btrfs_scrub_progress scrub_progress;
};
@@ -1222,10 +1219,10 @@ struct btrfs_space_info {
* we've called update_block_group and dropped the bytes_used counter
* and increased the bytes_pinned counter. However this means that
* bytes_pinned does not reflect the bytes that will be pinned once the
- * delayed refs are flushed, so this counter is inc'ed everytime we call
- * btrfs_free_extent so it is a realtime count of what will be freed
- * once the transaction is committed. It will be zero'ed everytime the
- * transaction commits.
+ * delayed refs are flushed, so this counter is inc'ed every time we
+ * call btrfs_free_extent so it is a realtime count of what will be
+ * freed once the transaction is committed. It will be zero'ed every
+ * time the transaction commits.
*/
struct percpu_counter total_bytes_pinned;
@@ -1822,6 +1819,9 @@ struct btrfs_fs_info {
spinlock_t reada_lock;
struct radix_tree_root reada_tree;
+ /* readahead works cnt */
+ atomic_t reada_works_cnt;
+
/* Extent buffer radix tree */
spinlock_t buffer_lock;
struct radix_tree_root buffer_radix;
@@ -2185,13 +2185,43 @@ struct btrfs_ioctl_defrag_range_args {
*/
#define BTRFS_QGROUP_RELATION_KEY 246
+/*
+ * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
+ */
#define BTRFS_BALANCE_ITEM_KEY 248
/*
- * Persistantly stores the io stats in the device tree.
- * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
+ * The key type for tree items that are stored persistently, but do not need to
+ * exist for extended period of time. The items can exist in any tree.
+ *
+ * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - balance status item
+ * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
*/
-#define BTRFS_DEV_STATS_KEY 249
+#define BTRFS_TEMPORARY_ITEM_KEY 248
+
+/*
+ * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
+ */
+#define BTRFS_DEV_STATS_KEY 249
+
+/*
+ * The key type for tree items that are stored persistently and usually exist
+ * for a long period, eg. filesystem lifetime. The item kinds can be status
+ * information, stats or preference values. The item can exist in any tree.
+ *
+ * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - device statistics, store IO stats in the device tree, one key for all
+ * stats
+ * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
+ */
+#define BTRFS_PERSISTENT_ITEM_KEY 249
/*
* Persistantly stores the device replace state in the device tree.
@@ -2241,7 +2271,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
-#define BTRFS_MOUNT_RECOVERY (1 << 18)
+#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18)
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
@@ -2250,9 +2280,10 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
+#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
-#define BTRFS_DEFAULT_MAX_INLINE (8192)
+#define BTRFS_DEFAULT_MAX_INLINE (2048)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2353,6 +2384,9 @@ struct btrfs_map_token {
unsigned long offset;
};
+#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
+ ((bytes) >> (fs_info)->sb->s_blocksize_bits)
+
static inline void btrfs_init_map_token (struct btrfs_map_token *token)
{
token->kaddr = NULL;
@@ -3448,8 +3482,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
{
- return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
- 2 * num_items;
+ return root->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/*
@@ -4027,7 +4060,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir, u64 objectid,
const char *name, int name_len);
-int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -4089,6 +4122,7 @@ void btrfs_test_inode_set_ops(struct inode *inode);
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_update_iflags(struct inode *inode);
void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
int btrfs_is_empty_uuid(u8 *uuid);
@@ -4151,7 +4185,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
-int btrfs_parse_options(struct btrfs_root *root, char *options);
+int btrfs_parse_options(struct btrfs_root *root, char *options,
+ unsigned long new_flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
#ifdef CONFIG_PRINTK
@@ -4525,8 +4560,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct btrfs_key *start, struct btrfs_key *end);
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
-int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err);
+int btree_readahead_hook(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, u64 start, int err);
static inline int is_fstree(u64 rootid)
{
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index b57daa895cea..6cef0062f929 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -43,8 +43,7 @@ int __init btrfs_delayed_inode_init(void)
void btrfs_delayed_inode_exit(void)
{
- if (delayed_node_cache)
- kmem_cache_destroy(delayed_node_cache);
+ kmem_cache_destroy(delayed_node_cache);
}
static inline void btrfs_init_delayed_node(
@@ -651,9 +650,14 @@ static int btrfs_delayed_inode_reserve_metadata(
goto out;
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!WARN_ON(ret))
+ if (!ret)
goto out;
+ if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ btrfs_debug(root->fs_info,
+ "block rsv migrate returned %d", ret);
+ WARN_ON(1);
+ }
/*
* Ok this is a problem, let's just steal from the global rsv
* since this really shouldn't happen that often.
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 914ac13bd92f..430b3689b112 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -929,14 +929,10 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
void btrfs_delayed_ref_exit(void)
{
- if (btrfs_delayed_ref_head_cachep)
- kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
- if (btrfs_delayed_tree_ref_cachep)
- kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
- if (btrfs_delayed_data_ref_cachep)
- kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
- if (btrfs_delayed_extent_op_cachep)
- kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
+ kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
+ kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
+ kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
+ kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
}
int btrfs_delayed_ref_init(void)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index cbb7dbfb3fff..a1d6652e0c47 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_dev_replace_item *ptr;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
if (!dev_replace->is_valid ||
!dev_replace->item_needs_writeback) {
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
return 0;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);