diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 18:12:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 18:12:42 -0700 |
commit | 968f3e374faf41e5e6049399eb7302777a09a1e8 (patch) | |
tree | 613c5aa9a005cfbe3fada77fcb0ab24deda126d9 | |
parent | e531cdf50a8a0fb7a4d51c06e52097bd01e9bf7c (diff) | |
parent | 389f239c53420802ad5085e51e88c37e2df5e003 (diff) |
Merge branch 'for-linus-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"We have a good sized cleanup of our internal read ahead code, and the
first series of commits from Chandan to enable PAGE_SIZE > sectorsize
Otherwise, it's a normal series of cleanups and fixes, with many
thanks to Dave Sterba for doing most of the patch wrangling this time"
* 'for-linus-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (82 commits)
btrfs: make sure we stay inside the bvec during __btrfs_lookup_bio_sums
btrfs: Fix misspellings in comments.
btrfs: Print Warning only if ENOSPC_DEBUG is enabled
btrfs: scrub: silence an uninitialized variable warning
btrfs: move btrfs_compression_type to compression.h
btrfs: rename btrfs_print_info to btrfs_print_mod_info
Btrfs: Show a warning message if one of objectid reaches its highest value
Documentation: btrfs: remove usage specific information
btrfs: use kbasename in btrfsic_mount
Btrfs: do not collect ordered extents when logging that inode exists
Btrfs: fix race when checking if we can skip fsync'ing an inode
Btrfs: fix listxattrs not listing all xattrs packed in the same item
Btrfs: fix deadlock between direct IO reads and buffered writes
Btrfs: fix extent_same allowing destination offset beyond i_size
Btrfs: fix file loss on log replay after renaming a file and fsync
Btrfs: fix unreplayable log after snapshot delete + parent dir fsync
Btrfs: fix lockdep deadlock warning due to dev_replace
btrfs: drop unused argument in btrfs_ioctl_get_supported_features
btrfs: add GET_SUPPORTED_FEATURES to the control device ioctls
btrfs: change max_inline default to 2048
...
36 files changed, 1102 insertions, 931 deletions
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt index c772b47e7ef0..f9dad22d95ce 100644 --- a/Documentation/filesystems/btrfs.txt +++ b/Documentation/filesystems/btrfs.txt @@ -1,20 +1,10 @@ - BTRFS ===== -Btrfs is a copy on write filesystem for Linux aimed at -implementing advanced features while focusing on fault tolerance, -repair and easy administration. Initially developed by Oracle, Btrfs -is licensed under the GPL and open for contribution from anyone. - -Linux has a wealth of filesystems to choose from, but we are facing a -number of challenges with scaling to the large storage subsystems that -are becoming common in today's data centers. Filesystems need to scale -in their ability to address and manage large storage, and also in -their ability to detect, repair and tolerate errors in the data stored -on disk. Btrfs is under heavy development, and is not suitable for -any uses other than benchmarking and review. The Btrfs disk format is -not yet finalized. +Btrfs is a copy on write filesystem for Linux aimed at implementing advanced +features while focusing on fault tolerance, repair and easy administration. +Jointly developed by several companies, licensed under the GPL and open for +contribution from anyone. The main Btrfs features include: @@ -28,243 +18,14 @@ The main Btrfs features include: * Checksums on data and metadata (multiple algorithms available) * Compression * Integrated multiple device support, with several raid algorithms - * Online filesystem check (not yet implemented) - * Very fast offline filesystem check - * Efficient incremental backup and FS mirroring (not yet implemented) + * Offline filesystem check + * Efficient incremental backup and FS mirroring * Online filesystem defragmentation +For more information please refer to the wiki -Mount Options -============= - -When mounting a btrfs filesystem, the following option are accepted. -Options with (*) are default options and will not show in the mount options. - - alloc_start=<bytes> - Debugging option to force all block allocations above a certain - byte threshold on each block device. The value is specified in - bytes, optionally with a K, M, or G suffix, case insensitive. - Default is 1MB. - - noautodefrag(*) - autodefrag - Disable/enable auto defragmentation. - Auto defragmentation detects small random writes into files and queue - them up for the defrag process. Works best for small files; - Not well suited for large database workloads. - - check_int - check_int_data - check_int_print_mask=<value> - These debugging options control the behavior of the integrity checking - module (the BTRFS_FS_CHECK_INTEGRITY config option required). - - check_int enables the integrity checker module, which examines all - block write requests to ensure on-disk consistency, at a large - memory and CPU cost. - - check_int_data includes extent data in the integrity checks, and - implies the check_int option. - - check_int_print_mask takes a bitmask of BTRFSIC_PRINT_MASK_* values - as defined in fs/btrfs/check-integrity.c, to control the integrity - checker module behavior. - - See comments at the top of fs/btrfs/check-integrity.c for more info. - - commit=<seconds> - Set the interval of periodic commit, 30 seconds by default. Higher - values defer data being synced to permanent storage with obvious - consequences when the system crashes. The upper bound is not forced, - but a warning is printed if it's more than 300 seconds (5 minutes). - - compress - compress=<type> - compress-force - compress-force=<type> - Control BTRFS file data compression. Type may be specified as "zlib" - "lzo" or "no" (for no compression, used for remounting). If no type - is specified, zlib is used. If compress-force is specified, - all files will be compressed, whether or not they compress well. - If compression is enabled, nodatacow and nodatasum are disabled. - - degraded - Allow mounts to continue with missing devices. A read-write mount may - fail with too many devices missing, for example if a stripe member - is completely missing. - - device=<devicepath> - Specify a device during mount so that ioctls on the control device - can be avoided. Especially useful when trying to mount a multi-device - setup as root. May be specified multiple times for multiple devices. - - nodiscard(*) - discard - Disable/enable discard mount option. - Discard issues frequent commands to let the block device reclaim space - freed by the filesystem. - This is useful for SSD devices, thinly provisioned - LUNs and virtual machine images, but may have a significant - performance impact. (The fstrim command is also available to - initiate batch trims from userspace). - - noenospc_debug(*) - enospc_debug - Disable/enable debugging option to be more verbose in some ENOSPC conditions. - - fatal_errors=<action> - Action to take when encountering a fatal error: - "bug" - BUG() on a fatal error. This is the default. - "panic" - panic() on a fatal error. - - noflushoncommit(*) - flushoncommit - The 'flushoncommit' mount option forces any data dirtied by a write in a - prior transaction to commit as part of the current commit. This makes - the committed state a fully consistent view of the file system from the - application's perspective (i.e., it includes all completed file system - operations). This was previously the behavior only when a snapshot is - created. - - inode_cache - Enable free inode number caching. Defaults to off due to an overflow - problem when the free space crcs don't fit inside a single page. - - max_inline=<bytes> - Specify the maximum amount of space, in bytes, that can be inlined in - a metadata B-tree leaf. The value is specified in bytes, optionally - with a K, M, or G suffix, case insensitive. In practice, this value - is limited by the root sector size, with some space unavailable due - to leaf headers. For a 4k sector size, max inline data is ~3900 bytes. - - metadata_ratio=<value> - Specify that 1 metadata chunk should be allocated after every <value> - data chunks. Off by default. - - acl(*) - noacl - Enable/disable support for Posix Access Control Lists (ACLs). See the - acl(5) manual page for more information about ACLs. - - barrier(*) - nobarrier - Enable/disable the use of block layer write barriers. Write barriers - ensure that certain IOs make it through the device cache and are on - persistent storage. If disabled on a device with a volatile - (non-battery-backed) write-back cache, nobarrier option will lead to - filesystem corruption on a system crash or power loss. - - datacow(*) - nodatacow - Enable/disable data copy-on-write for newly created files. - Nodatacow implies nodatasum, and disables all compression. - - datasum(*) - nodatasum - Enable/disable data checksumming for newly created files. - Datasum implies datacow. - - treelog(*) - notreelog - Enable/disable the tree logging used for fsync and O_SYNC writes. - - recovery - Enable autorecovery attempts if a bad tree root is found at mount time. - Currently this scans a list of several previous tree roots and tries to - use the first readable. - - rescan_uuid_tree - Force check and rebuild procedure of the UUID tree. This should not - normally be needed. - - skip_balance - Skip automatic resume of interrupted balance operation after mount. - May be resumed with "btrfs balance resume." - - space_cache (*) - Enable the on-disk freespace cache. - nospace_cache - Disable freespace cache loading without clearing the cache. - clear_cache - Force clearing and rebuilding of the disk space cache if something - has gone wrong. - - ssd - nossd - ssd_spread - Options to control ssd allocation schemes. By default, BTRFS will - enable or disable ssd allocation heuristics depending on whether a - rotational or non-rotational disk is in use. The ssd and nossd options - can override this autodetection. - - The ssd_spread mount option attempts to allocate into big chunks - of unused space, and may perform better on low-end ssds. ssd_spread - implies ssd, enabling all other ssd heuristics as well. - - subvol=<path> - Mount subvolume at <path> rather than the root subvolume. <path> is - relative to the top level subvolume. - - subvolid=<ID> - Mount subvolume specified by an ID number rather than the root subvolume. - This allows mounting of subvolumes which are not in the root of the mounted - filesystem. - You can use "btrfs subvolume list" to see subvolume ID numbers. - - subvolrootid=<objectid> (deprecated) - Mount subvolume specified by <objectid> rather than the root subvolume. - This allows mounting of subvolumes which are not in the root of the mounted - filesystem. - You can use "btrfs subvolume show " to see the object ID for a subvolume. - - thread_pool=<number> - The number of worker threads to allocate. The default number is equal - to the number of CPUs + 2, or 8, whichever is smaller. - - user_subvol_rm_allowed - Allow subvolumes to be deleted by a non-root user. Use with caution. - -MAILING LIST -============ - -There is a Btrfs mailing list hosted on vger.kernel.org. You can -find details on how to subscribe here: - -http://vger.kernel.org/vger-lists.html#linux-btrfs - -Mailing list archives are available from gmane: - -http://dir.gmane.org/gmane.comp.file-systems.btrfs - - - -IRC -=== - -Discussion of Btrfs also occurs on the #btrfs channel of the Freenode -IRC network. - - - - UTILITIES - ========= - -Userspace tools for creating and manipulating Btrfs file systems are -available from the git repository at the following location: - - http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs.git - git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs.git - -These include the following tools: - -* mkfs.btrfs: create a filesystem - -* btrfs: a single tool to manage the filesystems, refer to the manpage for more details - -* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem - -Other tools for specific tasks: - -* btrfs-convert: in-place conversion from ext2/3/4 filesystems + https://btrfs.wiki.kernel.org -* btrfs-image: dump filesystem metadata for debugging +that maintains information about administration tasks, frequently asked +questions, use cases, mount options, comprehensible changelogs, features, +manual pages, source code repositories, contacts etc. diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f6dac40f87ff..80e8472d618b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -148,8 +148,7 @@ int __init btrfs_prelim_ref_init(void) void btrfs_prelim_ref_exit(void) { - if (btrfs_prelim_ref_cache) - kmem_cache_destroy(btrfs_prelim_ref_cache); + kmem_cache_destroy(btrfs_prelim_ref_cache); } /* @@ -566,17 +565,14 @@ static void __merge_refs(struct list_head *head, int mode) struct __prelim_ref *pos2 = pos1, *tmp; list_for_each_entry_safe_continue(pos2, tmp, head, list) { - struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2; + struct __prelim_ref *ref1 = pos1, *ref2 = pos2; struct extent_inode_elem *eie; if (!ref_for_same_block(ref1, ref2)) continue; if (mode == 1) { - if (!ref1->parent && ref2->parent) { - xchg = ref1; - ref1 = ref2; - ref2 = xchg; - } + if (!ref1->parent && ref2->parent) + swap(ref1, ref2); } else { if (ref1->parent != ref2->parent) continue; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 861d472564c1..e34a71b3e225 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -95,6 +95,7 @@ #include <linux/genhd.h> #include <linux/blkdev.h> #include <linux/vmalloc.h> +#include <linux/string.h> #include "ctree.h" #include "disk-io.h" #include "hash.h" @@ -105,6 +106,7 @@ #include "locking.h" #include "check-integrity.h" #include "rcu-string.h" +#include "compression.h" #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 @@ -176,7 +178,7 @@ struct btrfsic_block { * Elements of this type are allocated dynamically and required because * each block object can refer to and can be ref from multiple blocks. * The key to lookup them in the hashtable is the dev_bytenr of - * the block ref to plus the one from the block refered from. + * the block ref to plus the one from the block referred from. * The fact that they are searchable via a hashtable and that a * ref_cnt is maintained is not required for the btrfs integrity * check algorithm itself, it is only used to make the output more @@ -3076,7 +3078,7 @@ int btrfsic_mount(struct btrfs_root *root, list_for_each_entry(device, dev_head, dev_list) { struct btrfsic_dev_state *ds; - char *p; + const char *p; if (!device->bdev || !device->name) continue; @@ -3092,11 +3094,7 @@ int btrfsic_mount(struct btrfs_root *root, ds->state = state; bdevname(ds->bdev, ds->name); ds->name[BDEVNAME_SIZE - 1] = '\0'; - for (p = ds->name; *p != '\0'; p++); - while (p > ds->name && *p != '/') - p--; - if (*p == '/') - p++; + p = kbasename(ds->name); strlcpy(ds->name, p, sizeof(ds->name)); btrfsic_dev_state_hashtable_add(ds, &btrfsic_dev_state_hashtable); diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 13a4dc0436c9..f49d8b8c0f00 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -48,6 +48,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, unsigned long pg_index, unsigned long pg_offset); + +enum btrfs_compression_type { + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_LZO = 2, + BTRFS_COMPRESS_TYPES = 2, + BTRFS_COMPRESS_LAST = 3, +}; + struct btrfs_compress_op { struct list_head *(*alloc_workspace)(void); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 769e0ff1b4ce..77592931ab4f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -311,7 +311,7 @@ struct tree_mod_root { struct tree_mod_elem { struct rb_node node; - u64 index; /* shifted logical */ + u64 logical; u64 seq; enum mod_log_op op; @@ -435,11 +435,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, /* * key order of the log: - * index -> sequence + * node/leaf start address -> sequence * - * the index is the shifted logical of the *new* root node for root replace - * operations, or the shifted logical of the affected block for all other - * operations. + * The 'start address' is the logical address of the *new* root node + * for root replace operations, or the logical address of the affected + * block for all other operations. * * Note: must be called with write lock (tree_mod_log_write_lock). */ @@ -460,9 +460,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) while (*new) { cur = container_of(*new, struct tree_mod_elem, node); parent = *new; - if (cur->index < tm->index) + if (cur->logical < tm->logical) new = &((*new)->rb_left); - else if (cur->index > tm->index) + else if (cur->logical > tm->logical) new = &((*new)->rb_right); else if (cur->seq < tm->seq) new = &((*new)->rb_left); @@ -523,7 +523,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot, if (!tm) return NULL; - tm->index = eb->start >> PAGE_CACHE_SHIFT; + tm->logical = eb->start; if (op != MOD_LOG_KEY_ADD) { btrfs_node_key(eb, &tm->key, slot); tm->blockptr = btrfs_node_blockptr(eb, slot); @@ -588,7 +588,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, goto free_tms; } - tm->index = eb->start >> PAGE_CACHE_SHIFT; + tm->logical = eb->start; tm->slot = src_slot; tm->move.dst_slot = dst_slot; tm->move.nr_items = nr_items; @@ -699,7 +699,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, goto free_tms; } - tm->index = new_root->start >> PAGE_CACHE_SHIFT; + tm->logical = new_root->start; tm->old_root.logical = old_root->start; tm->old_root.level = btrfs_header_level(old_root); tm->generation = btrfs_header_generation(old_root); @@ -739,16 +739,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, struct rb_node *node; struct tree_mod_elem *cur = NULL; struct tree_mod_elem *found = NULL; - u64 index = start >> PAGE_CACHE_SHIFT; tree_mod_log_read_lock(fs_info); tm_root = &fs_info->tree_mod_log; node = tm_root->rb_node; while (node) { cur = container_of(node, struct tree_mod_elem, node); - if (cur->index < index) { + if (cur->logical < start) { node = node->rb_left; - } else if (cur->index > index) { + } else if (cur->logical > start) { node = node->rb_right; } else if (cur->seq < min_seq) { node = node->rb_left; @@ -1230,9 +1229,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, return NULL; /* - * the very last operation that's logged for a root is the replacement - * operation (if it is replaced at all). this has the index of the *new* - * root, making it the very first operation that's logged for this root. + * the very last operation that's logged for a root is the + * replacement operation (if it is replaced at all). this has + * the logical address of the *new* root, making it the very + * first operation that's logged for this root. */ while (1) { tm = tree_mod_log_search_oldest(fs_info, root_logical, @@ -1336,7 +1336,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!next) break; tm = container_of(next, struct tree_mod_elem, node); - if (tm->index != first_tm->index) + if (tm->logical != first_tm->logical) break; } tree_mod_log_read_unlock(fs_info); @@ -5361,7 +5361,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); + tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL); if (!tmp_buf) { ret = -ENOMEM; goto out; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bfe4a337fb4d..84a6a5b3384a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -100,6 +100,9 @@ struct btrfs_ordered_sum; /* tracks free space in block groups. */ #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL +/* device stats in the device tree */ +#define BTRFS_DEV_STATS_OBJECTID 0ULL + /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL @@ -715,14 +718,6 @@ struct btrfs_timespec { __le32 nsec; } __attribute__ ((__packed__)); -enum btrfs_compression_type { - BTRFS_COMPRESS_NONE = 0, - BTRFS_COMPRESS_ZLIB = 1, - BTRFS_COMPRESS_LZO = 2, - BTRFS_COMPRESS_TYPES = 2, - BTRFS_COMPRESS_LAST = 3, -}; - struct btrfs_inode_item { /* nfs style generation number */ __le64 generation; @@ -793,7 +788,7 @@ struct btrfs_root_item { /* * This generation number is used to test if the new fields are valid - * and up to date while reading the root item. Everytime the root item + * and up to date while reading the root item. Every time the root item * is written out, the "generation" field is copied into this field. If * anyone ever mounted the fs with an older kernel, we will have * mismatching generation values here and thus must invalidate the @@ -1002,8 +997,10 @@ struct btrfs_dev_replace { pid_t lock_owner; atomic_t nesting_level; struct mutex lock_finishing_cancel_unmount; - struct mutex lock_management_lock; - struct mutex lock; + rwlock_t lock; + atomic_t read_locks; + atomic_t blocking_readers; + wait_queue_head_t read_lock_wq; struct btrfs_scrub_progress scrub_progress; }; @@ -1222,10 +1219,10 @@ struct btrfs_space_info { * we've called update_block_group and dropped the bytes_used counter * and increased the bytes_pinned counter. However this means that * bytes_pinned does not reflect the bytes that will be pinned once the - * delayed refs are flushed, so this counter is inc'ed everytime we call - * btrfs_free_extent so it is a realtime count of what will be freed - * once the transaction is committed. It will be zero'ed everytime the - * transaction commits. + * delayed refs are flushed, so this counter is inc'ed every time we + * call btrfs_free_extent so it is a realtime count of what will be + * freed once the transaction is committed. It will be zero'ed every + * time the transaction commits. */ struct percpu_counter total_bytes_pinned; @@ -1822,6 +1819,9 @@ struct btrfs_fs_info { spinlock_t reada_lock; struct radix_tree_root reada_tree; + /* readahead works cnt */ + atomic_t reada_works_cnt; + /* Extent buffer radix tree */ spinlock_t buffer_lock; struct radix_tree_root buffer_radix; @@ -2185,13 +2185,43 @@ struct btrfs_ioctl_defrag_range_args { */ #define BTRFS_QGROUP_RELATION_KEY 246 +/* + * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. + */ #define BTRFS_BALANCE_ITEM_KEY 248 /* - * Persistantly stores the io stats in the device tree. - * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). + * The key type for tree items that are stored persistently, but do not need to + * exist for extended period of time. The items can exist in any tree. + * + * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] + * + * Existing items: + * + * - balance status item + * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) */ -#define BTRFS_DEV_STATS_KEY 249 +#define BTRFS_TEMPORARY_ITEM_KEY 248 + +/* + * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY + */ +#define BTRFS_DEV_STATS_KEY 249 + +/* + * The key type for tree items that are stored persistently and usually exist + * for a long period, eg. filesystem lifetime. The item kinds can be status + * information, stats or preference values. The item can exist in any tree. + * + * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] + * + * Existing items: + * + * - device statistics, store IO stats in the device tree, one key for all + * stats + * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0) + */ +#define BTRFS_PERSISTENT_ITEM_KEY 249 /* * Persistantly stores the device replace state in the device tree. @@ -2241,7 +2271,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) -#define BTRFS_MOUNT_RECOVERY (1 << 18) +#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18) #define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) @@ -2250,9 +2280,10 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24) #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) #define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) +#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) -#define BTRFS_DEFAULT_MAX_INLINE (8192) +#define BTRFS_DEFAULT_MAX_INLINE (2048) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -2353,6 +2384,9 @@ struct btrfs_map_token { unsigned long offset; }; +#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \ + ((bytes) >> (fs_info)->sb->s_blocksize_bits) + static inline void btrfs_init_map_token (struct btrfs_map_token *token) { token->kaddr = NULL; @@ -3448,8 +3482,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes); static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, unsigned num_items) { - return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * - 2 * num_items; + return root->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; } /* @@ -4027,7 +4060,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, u64 objectid, const char *name, int name_len); -int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, +int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, int front); int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -4089,6 +4122,7 @@ void btrfs_test_inode_set_ops(struct inode *inode); /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int btrfs_ioctl_get_supported_features(void __user *arg); void btrfs_update_iflags(struct inode *inode); void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); int btrfs_is_empty_uuid(u8 *uuid); @@ -4151,7 +4185,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info); ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* super.c */ -int btrfs_parse_options(struct btrfs_root *root, char *options); +int btrfs_parse_options(struct btrfs_root *root, char *options, + unsigned long new_flags); int btrfs_sync_fs(struct super_block *sb, int wait); #ifdef CONFIG_PRINTK @@ -4525,8 +4560,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct btrfs_key *start, struct btrfs_key *end); int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); -int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, - u64 start, int err); +int btree_readahead_hook(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, u64 start, int err); static inline int is_fstree(u64 rootid) { diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index b57daa895cea..6cef0062f929 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -43,8 +43,7 @@ int __init btrfs_delayed_inode_init(void) void btrfs_delayed_inode_exit(void) { - if (delayed_node_cache) - kmem_cache_destroy(delayed_node_cache); + kmem_cache_destroy(delayed_node_cache); } static inline void btrfs_init_delayed_node( @@ -651,9 +650,14 @@ static int btrfs_delayed_inode_reserve_metadata( goto out; ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); - if (!WARN_ON(ret)) + if (!ret) goto out; + if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + btrfs_debug(roo |