summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 13:35:29 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 13:35:29 -0800
commit5cea7647e64657138138a3794ae172ee0fc175da (patch)
tree38adc54cba508db574e190e9d9aa601c36a8fd7c
parent808eb24e0e0939b487bf90e3888a9636f1c83acb (diff)
parentd28e649a5c58b779b303c252c66ee84a0f2c3b32 (diff)
Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "There are some new user features and the usual load of invisible enhancements or cleanups. New features: - extend mount options to specify zlib compression level, -o compress=zlib:9 - v2 of ioctl "extent to inode mapping", addressing a usecase where we want to retrieve more but inaccurate results and do the postprocessing in userspace, aiding defragmentation or deduplication tools - populate compression heuristics logic, do data sampling and try to guess compressibility by: looking for repeated patterns, counting unique byte values and distribution, calculating Shannon entropy; this will need more benchmarking and possibly fine tuning, but the base should be good enough - enable indexing for btrfs as lower filesystem in overlayfs - speedup page cache readahead during send on large files Internal enhancements: - more sanity checks of b-tree items when reading them from disk - more EINVAL/EUCLEAN fixups, missing BLK_STS_* conversion, other errno or error handling fixes - remove some homegrown IO-related logic, that's been obsoleted by core block layer changes (batching, plug/unplug, own counters) - add ref-verify, optional debugging feature to verify extent reference accounting - simplify code handling outstanding extents, make it more clear where and how the accounting is done - make delalloc reservations per-inode, simplify the code and make the logic more straightforward - extensive cleanup of delayed refs code Notable fixes: - fix send ioctl on 32bit with 64bit kernel" * 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (102 commits) btrfs: Fix bug for misused dev_t when lookup in dev state hash table. Btrfs: heuristic: add Shannon entropy calculation Btrfs: heuristic: add byte core set calculation Btrfs: heuristic: add byte set calculation Btrfs: heuristic: add detection of repeated data patterns Btrfs: heuristic: implement sampling logic Btrfs: heuristic: add bucket and sample counters and other defines Btrfs: compression: separate heuristic/compression workspaces btrfs: move btrfs_truncate_block out of trans handle btrfs: don't call btrfs_start_delalloc_roots in flushoncommit btrfs: track refs in a rb_tree instead of a list btrfs: add a comp_refs() helper btrfs: switch args for comp_*_refs btrfs: make the delalloc block rsv per inode btrfs: add tracepoints for outstanding extents mods Btrfs: rework outstanding_extents btrfs: increase output size for LOGICAL_INO_V2 ioctl btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2 btrfs: add a flag to iterate_inodes_from_logical to find all extent refs for uncompressed extents btrfs: send: remove unused code ...
-rw-r--r--fs/btrfs/Kconfig11
-rw-r--r--fs/btrfs/Makefile3
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/backref.c72
-rw-r--r--fs/btrfs/backref.h8
-rw-r--r--fs/btrfs/btrfs_inode.h29
-rw-r--r--fs/btrfs/check-integrity.c8
-rw-r--r--fs/btrfs/compression.c493
-rw-r--r--fs/btrfs/compression.h6
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/ctree.h30
-rw-r--r--fs/btrfs/delayed-inode.c46
-rw-r--r--fs/btrfs/delayed-ref.c296
-rw-r--r--fs/btrfs/delayed-ref.h54
-rw-r--r--fs/btrfs/disk-io.c227
-rw-r--r--fs/btrfs/extent-tree.c829
-rw-r--r--fs/btrfs/extent_io.c44
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file.c50
-rw-r--r--fs/btrfs/free-space-tree.c4
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c327
-rw-r--r--fs/btrfs/ioctl.c156
-rw-r--r--fs/btrfs/lzo.c5
-rw-r--r--fs/btrfs/ordered-data.c21
-rw-r--r--fs/btrfs/qgroup.c8
-rw-r--r--fs/btrfs/raid56.c30
-rw-r--r--fs/btrfs/ref-verify.c1031
-rw-r--r--fs/btrfs/ref-verify.h62
-rw-r--r--fs/btrfs/relocation.c17
-rw-r--r--fs/btrfs/root-tree.c4
-rw-r--r--fs/btrfs/scrub.c22
-rw-r--r--fs/btrfs/send.c74
-rw-r--r--fs/btrfs/send.h2
-rw-r--r--fs/btrfs/super.c37
-rw-r--r--fs/btrfs/sysfs.c63
-rw-r--r--fs/btrfs/sysfs.h26
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c3
-rw-r--r--fs/btrfs/tests/inode-tests.c20
-rw-r--r--fs/btrfs/tests/qgroup-tests.c30
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/tree-checker.c425
-rw-r--r--fs/btrfs/tree-checker.h26
-rw-r--r--fs/btrfs/tree-log.c34
-rw-r--r--fs/btrfs/volumes.c168
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/btrfs/zlib.c15
-rw-r--r--fs/btrfs/zstd.c5
-rw-r--r--include/trace/events/btrfs.h41
-rw-r--r--include/uapi/linux/btrfs.h8
-rw-r--r--include/uapi/linux/btrfs_tree.h1
51 files changed, 3356 insertions, 1556 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index a26c63b4ad68..2e558227931a 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -91,3 +91,14 @@ config BTRFS_ASSERT
any of the assertions trip. This is meant for btrfs developers only.
If unsure, say N.
+
+config BTRFS_FS_REF_VERIFY
+ bool "Btrfs with the ref verify tool compiled in"
+ depends on BTRFS_FS
+ default n
+ help
+ Enable run-time extent reference verification instrumentation. This
+ is meant to be used by btrfs developers for tracking down extent
+ reference problems or verifying they didn't break something.
+
+ If unsure, say N.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index f2cd9dedb037..6fe881d5cb38 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -10,10 +10,11 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o props.o hash.o free-space-tree.o
+ uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
+btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index e00c8a9fd5bb..d5540749f0e5 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -67,7 +67,7 @@ struct btrfs_workqueue {
static void normal_work_helper(struct btrfs_work *work);
#define BTRFS_WORK_HELPER(name) \
-void btrfs_##name(struct work_struct *arg) \
+noinline_for_stack void btrfs_##name(struct work_struct *arg) \
{ \
struct btrfs_work *work = container_of(arg, struct btrfs_work, \
normal_work); \
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b517ef1477ea..7d0dc100a09a 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -40,12 +40,14 @@ static int check_extent_in_eb(const struct btrfs_key *key,
const struct extent_buffer *eb,
const struct btrfs_file_extent_item *fi,
u64 extent_item_pos,
- struct extent_inode_elem **eie)
+ struct extent_inode_elem **eie,
+ bool ignore_offset)
{
u64 offset = 0;
struct extent_inode_elem *e;
- if (!btrfs_file_extent_compression(eb, fi) &&
+ if (!ignore_offset &&
+ !btrfs_file_extent_compression(eb, fi) &&
!btrfs_file_extent_encryption(eb, fi) &&
!btrfs_file_extent_other_encoding(eb, fi)) {
u64 data_offset;
@@ -84,7 +86,8 @@ static void free_inode_elem_list(struct extent_inode_elem *eie)
static int find_extent_in_eb(const struct extent_buffer *eb,
u64 wanted_disk_byte, u64 extent_item_pos,
- struct extent_inode_elem **eie)
+ struct extent_inode_elem **eie,
+ bool ignore_offset)
{
u64 disk_byte;
struct btrfs_key key;
@@ -113,7 +116,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
if (disk_byte != wanted_disk_byte)
continue;
- ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
+ ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset);
if (ret < 0)
return ret;
}
@@ -419,7 +422,7 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
struct ulist *parents, struct prelim_ref *ref,
int level, u64 time_seq, const u64 *extent_item_pos,
- u64 total_refs)
+ u64 total_refs, bool ignore_offset)
{
int ret = 0;
int slot;
@@ -472,7 +475,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
if (extent_item_pos) {
ret = check_extent_in_eb(&key, eb, fi,
*extent_item_pos,
- &eie);
+ &eie, ignore_offset);
if (ret < 0)
break;
}
@@ -510,7 +513,8 @@ next:
static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq,
struct prelim_ref *ref, struct ulist *parents,
- const u64 *extent_item_pos, u64 total_refs)
+ const u64 *extent_item_pos, u64 total_refs,
+ bool ignore_offset)
{
struct btrfs_root *root;
struct btrfs_key root_key;
@@ -581,7 +585,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
}
ret = add_all_parents(root, path, parents, ref, level, time_seq,
- extent_item_pos, total_refs);
+ extent_item_pos, total_refs, ignore_offset);
out:
path->lowest_level = 0;
btrfs_release_path(path);
@@ -616,7 +620,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq,
struct preftrees *preftrees,
const u64 *extent_item_pos, u64 total_refs,
- struct share_check *sc)
+ struct share_check *sc, bool ignore_offset)
{
int err;
int ret = 0;
@@ -661,7 +665,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
}
err = resolve_indirect_ref(fs_info, path, time_seq, ref,
parents, extent_item_pos,
- total_refs);
+ total_refs, ignore_offset);
/*
* we can only tolerate ENOENT,otherwise,we should catch error
* and return directly.
@@ -769,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct btrfs_key key;
struct btrfs_key tmp_op_key;
struct btrfs_key *op_key = NULL;
+ struct rb_node *n;
int count;
int ret = 0;
@@ -778,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}
spin_lock(&head->lock);
- list_for_each_entry(node, &head->ref_list, list) {
+ for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
+ node = rb_entry(n, struct btrfs_delayed_ref_node,
+ ref_node);
if (node->seq > seq)
continue;
@@ -1107,13 +1114,17 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
*
* Otherwise this returns 0 for success and <0 for an error.
*
+ * If ignore_offset is set to false, only extent refs whose offsets match
+ * extent_item_pos are returned. If true, every extent ref is returned
+ * and extent_item_pos is ignored.
+ *
* FIXME some caching might speed things up
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist *refs,
struct ulist *roots, const u64 *extent_item_pos,
- struct share_check *sc)
+ struct share_check *sc, bool ignore_offset)
{
struct btrfs_key key;
struct btrfs_path *path;
@@ -1178,7 +1189,7 @@ again:
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
- refcount_inc(&head->node.refs);
+ refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(path);
@@ -1189,7 +1200,7 @@ again:
*/
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(&head->node);
+ btrfs_put_delayed_ref_head(head);
goto again;
}
spin_unlock(&delayed_refs->lock);
@@ -1235,7 +1246,7 @@ again:
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
- extent_item_pos, total_refs, sc);
+ extent_item_pos, total_refs, sc, ignore_offset);
if (ret)
goto out;
@@ -1282,7 +1293,7 @@ again:
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
ret = find_extent_in_eb(eb, bytenr,
- *extent_item_pos, &eie);
+ *extent_item_pos, &eie, ignore_offset);
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
if (ret < 0)
@@ -1350,7 +1361,7 @@ static void free_leaf_list(struct ulist *blocks)
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **leafs,
- const u64 *extent_item_pos)
+ const u64 *extent_item_pos, bool ignore_offset)
{
int ret;
@@ -1359,7 +1370,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
- *leafs, NULL, extent_item_pos, NULL);
+ *leafs, NULL, extent_item_pos, NULL, ignore_offset);
if (ret < 0 && ret != -ENOENT) {
free_leaf_list(*leafs);
return ret;
@@ -1383,7 +1394,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots)
+ u64 time_seq, struct ulist **roots,
+ bool ignore_offset)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
@@ -1402,7 +1414,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
- tmp, *roots, NULL, NULL);
+ tmp, *roots, NULL, NULL, ignore_offset);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
@@ -1421,14 +1433,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots)
+ u64 time_seq, struct ulist **roots,
+ bool ignore_offset)
{
int ret;
if (!trans)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
- time_seq, roots);
+ time_seq, roots, ignore_offset);
if (!trans)
up_read(&fs_info->commit_root_sem);
return ret;
@@ -1483,7 +1496,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
- roots, NULL, &shared);
+ roots, NULL, &shared, false);
if (ret == BACKREF_FOUND_SHARED) {
/* this is the only condition under which we return 1 */
ret = 1;
@@ -1877,7 +1890,8 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid, u64 extent_item_pos,
int search_commit_root,
- iterate_extent_inodes_t *iterate, void *ctx)
+ iterate_extent_inodes_t *iterate, void *ctx,
+ bool ignore_offset)
{
int ret;
struct btrfs_trans_handle *trans = NULL;
@@ -1903,14 +1917,15 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
tree_mod_seq_elem.seq, &refs,
- &extent_item_pos);
+ &extent_item_pos, ignore_offset);
if (ret)
goto out;
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
- tree_mod_seq_elem.seq, &roots);
+ tree_mod_seq_elem.seq, &roots,
+ ignore_offset);
if (ret)
break;
ULIST_ITER_INIT(&root_uiter);
@@ -1943,7 +1958,8 @@ out:
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- iterate_extent_inodes_t *iterate, void *ctx)
+ iterate_extent_inodes_t *iterate, void *ctx,
+ bool ignore_offset)
{
int ret;
u64 extent_item_pos;
@@ -1961,7 +1977,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, search_commit_root,
- iterate, ctx);
+ iterate, ctx, ignore_offset);
return ret;
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index e410335841aa..0c2fab8514ff 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -43,17 +43,19 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
u64 extent_offset, int search_commit_root,
- iterate_extent_inodes_t *iterate, void *ctx);
+ iterate_extent_inodes_t *iterate, void *ctx,
+ bool ignore_offset);
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
- iterate_extent_inodes_t *iterate, void *ctx);
+ iterate_extent_inodes_t *iterate, void *ctx,
+ bool ignore_offset);
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots);
+ u64 time_seq, struct ulist **roots, bool ignore_offset);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index eccadb5f62a5..63f0ccc92a71 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -36,14 +36,13 @@
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
-#define BTRFS_INODE_DELALLOC_META_RESERVED 4
-#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
-#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
-#define BTRFS_INODE_NEEDS_FULL_SYNC 7
-#define BTRFS_INODE_COPY_EVERYTHING 8
-#define BTRFS_INODE_IN_DELALLOC_LIST 9
-#define BTRFS_INODE_READDIO_NEED_LOCK 10
-#define BTRFS_INODE_HAS_PROPS 11
+#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
+#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
+#define BTRFS_INODE_NEEDS_FULL_SYNC 6
+#define BTRFS_INODE_COPY_EVERYTHING 7
+#define BTRFS_INODE_IN_DELALLOC_LIST 8
+#define BTRFS_INODE_READDIO_NEED_LOCK 9
+#define BTRFS_INODE_HAS_PROPS 10
/* in memory btrfs inode */
struct btrfs_inode {
@@ -176,7 +175,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
unsigned outstanding_extents;
- unsigned reserved_extents;
+
+ struct btrfs_block_rsv block_rsv;
/*
* Cached values of inode properties
@@ -267,6 +267,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
return false;
}
+static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
+ int mod)
+{
+ lockdep_assert_held(&inode->lock);
+ inode->outstanding_extents += mod;
+ if (btrfs_is_free_space_inode(inode))
+ return;
+ trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
+ mod);
+}
+
static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
int ret = 0;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 7d5a9b51f0d7..7d51b5a5b505 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -613,7 +613,7 @@ static void btrfsic_dev_state_hashtable_add(
struct btrfsic_dev_state_hashtable *h)
{
const unsigned int hashval =
- (((unsigned int)((uintptr_t)ds->bdev)) &
+ (((unsigned int)((uintptr_t)ds->bdev->bd_dev)) &
(BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
list_add(&ds->collision_resolving_node, h->table + hashval);
@@ -2803,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before
* btrfsic_mount(), this might return NULL */
- dev_state = btrfsic_dev_state_lookup(bio_dev(bio));
+ dev_state = btrfsic_dev_state_lookup(bio_dev(bio) + bio->bi_partno);
if (NULL != dev_state &&
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
unsigned int i = 0;
@@ -2913,7 +2913,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
state = kvzalloc(sizeof(*state), GFP_KERNEL);
if (!state) {
pr_info("btrfs check-integrity: allocation failed!\n");
- return -1;
+ return -ENOMEM;
}
if (!btrfsic_is_initialized) {
@@ -2945,7 +2945,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
if (NULL == ds) {
pr_info("btrfs check-integrity: kmalloc() failed!\n");
mutex_unlock(&btrfsic_mutex);
- return -1;
+ return -ENOMEM;
}
ds->bdev = device->bdev;
ds->state = state;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 280384bf34f1..b35ce16b3df3 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -33,6 +33,8 @@
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
+#include <linux/sort.h>
+#include <linux/log2.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -255,7 +257,8 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start,
cb->start + cb->len - 1,
NULL,
- bio->bi_status ? 0 : 1);
+ bio->bi_status ?
+ BLK_STS_OK : BLK_STS_NOTSUPP);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
@@ -706,7 +709,86 @@ out:
return ret;
}
-static struct {
+/*
+ * Heuristic uses systematic sampling to collect data from the input data
+ * range, the logic can be tuned by the following constants:
+ *
+ * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample
+ * @SAMPLING_INTERVAL - range from which the sampled data can be collected
+ */
+#define SAMPLING_READ_SIZE (16)
+#define SAMPLING_INTERVAL (256)
+
+/*
+ * For statistical analysis of the input data we consider bytes that form a
+ * Galois Field of 256 objects. Each object has an attribute count, ie. how
+ * many times the object appeared in the sample.
+ */
+#define BUCKET_SIZE (256)
+
+/*
+ * The size of the sample is based on a statistical sampling rule of thumb.
+ * The common way is to perform sampling tests as long as the number of
+ * elements in each cell is at least 5.
+ *
+ * Instead of 5, we choose 32 to obtain more accurate results.
+ * If the data contain the maximum number of symbols, which is 256, we obtain a
+ * sample size bound by 8192.
+ *
+ * For a sample of at most 8KB of data per data range: 16 consecutive bytes
+ * from up to 512 locations.
+ */
+#define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \
+ SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
+
+struct bucket_item {
+ u32 count;
+};
+
+struct heuristic_ws {
+ /* Partial copy of input data */
+ u8 *sample;
+ u32 sample_size;
+ /* Buckets store counters for each byte value */
+ struct bucket_item *bucket;
+ struct list_head list;
+};
+
+static void free_heuristic_ws(struct list_head *ws)
+{
+ struct heuristic_ws *workspace;
+
+ workspace = list_entry(ws, struct heuristic_ws, list);
+
+ kvfree(workspace->sample);
+ kfree(workspace->bucket);
+ kfree(workspace);
+}
+
+static struct list_head *alloc_heuristic_ws(void)
+{
+ struct heuristic_ws *ws;
+
+ ws = kzalloc(sizeof(*ws), GFP_KERNEL);
+ if (!ws)
+ return ERR_PTR(-ENOMEM);
+
+ ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
+ if (!ws->sample)
+ goto fail;
+
+ ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL);
+ if (!ws->bucket)
+ goto fail;
+
+ INIT_LIST_HEAD(&ws->list);
+ return &ws->list;
+fail:
+ free_heuristic_ws(&ws->list);
+ return ERR_PTR(-ENOMEM);
+}
+
+struct workspaces_list {
struct list_head idle_ws;
spinlock_t ws_lock;
/* Number of free workspaces */
@@ -715,7 +797,11 @@ static struct {
atomic_t total_ws;
/* Waiters for a free workspace */
wait_queue_head_t ws_wait;
-} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
+};
+
+static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
+
+static struct workspaces_list btrfs_heuristic_ws;
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zlib_compress,
@@ -725,11 +811,25 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
void __init btrfs_init_compress(void)
{
+ struct list_head *workspace;
int i;
- for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
- struct list_head *workspace;
+ INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws);
+ spin_lock_init(&btrfs_heuristic_ws.ws_lock);
+ atomic_set(&btrfs_heuristic_ws.total_ws, 0);
+ init_waitqueue_head(&btrfs_heuristic_ws.ws_wait);
+
+ workspace = alloc_heuristic_ws();
+ if (IS_ERR(workspace)) {
+ pr_warn(
+ "BTRFS: cannot preallocate heuristic workspace, will try later\n");
+ } else {
+ atomic_set(&btrfs_heuristic_ws.total_ws, 1);
+ btrfs_heuristic_ws.free_ws = 1;
+ list_add(workspace, &btrfs_heuristic_ws.idle_ws);
+ }
+ for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
spin_lock_init(&btrfs_comp_ws[i].ws_lock);
atomic_set(&btrfs_comp_ws[i].total_ws, 0);
@@ -756,18 +856,32 @@ void __init btrfs_init_compress(void)
* Preallocation makes a forward progress guarantees and we do not return
* errors.
*/
-static struct list_head *find_workspace(int type)
+static struct list_head *__find_workspace(int type, bool heuristic)
{
struct list_head *workspace;
int cpus = num_online_cpus();
int idx = type - 1;
unsigned nofs_flag;
+ struct list_head *idle_ws;
+ spinlock_t *ws_lock;
+ atomic_t *total_ws;
+ wait_queue_head_t *ws_wait;
+ int *free_ws;
+
+ if (heuristic) {
+ idle_ws = &btrfs_heuristic_ws.idle_ws;
+ ws_lock = &btrfs_heuristic_ws.ws_lock;
+ total_ws = &btrfs_heuristic_ws.total_ws;