From 25c9bc2e2bc9d67de7fc49ff2784cae5de755192 Mon Sep 17 00:00:00 2001 From: Gerhard Heift Date: Thu, 30 Jan 2014 16:23:57 +0100 Subject: btrfs: tree_search: eliminate redundant nr_items check If the amount of items reached the given limit of nr_items, we can leave copy_to_sk without updating the key. Also by returning 1 we leave the loop in search_ioctl without rechecking if we reached the given limit. Signed-off-by: Gerhard Heift Signed-off-by: Chris Mason Acked-by: David Sterba --- fs/btrfs/ioctl.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c2e796b664c1..65148cd102cd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1995,7 +1995,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (sizeof(sh) + item_len + *sk_offset > BTRFS_SEARCH_ARGS_BUFSIZE) { ret = 1; - goto overflow; + goto out; } sh.objectid = key->objectid; @@ -2017,8 +2017,10 @@ static noinline int copy_to_sk(struct btrfs_root *root, } (*num_found)++; - if (*num_found >= sk->nr_items) - break; + if (*num_found >= sk->nr_items) { + ret = 1; + goto out; + } } advance_key: ret = 0; @@ -2033,7 +2035,7 @@ advance_key: key->objectid++; } else ret = 1; -overflow: +out: return ret; } @@ -2085,7 +2087,7 @@ static noinline int search_ioctl(struct inode *inode, ret = copy_to_sk(root, path, &key, sk, args->buf, &sk_offset, &num_found); btrfs_release_path(path); - if (ret || num_found >= sk->nr_items) + if (ret) break; } -- cgit v1.2.3 From 12544442882e13aee98126928bb3a1a141484fe8 Mon Sep 17 00:00:00 2001 From: Gerhard Heift Date: Thu, 30 Jan 2014 16:23:58 +0100 Subject: btrfs: tree_search, search_ioctl: accept varying buffer rewrite search_ioctl to accept a buffer with varying size Signed-off-by: Gerhard Heift Signed-off-by: Chris Mason Acked-by: David Sterba --- fs/btrfs/ioctl.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 65148cd102cd..775640475e35 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1957,6 +1957,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, + size_t buf_size, char *buf, unsigned long *sk_offset, int *num_found) @@ -1989,11 +1990,10 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (!key_in_sk(key, sk)) continue; - if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + if (sizeof(sh) + item_len > buf_size) item_len = 0; - if (sizeof(sh) + item_len + *sk_offset > - BTRFS_SEARCH_ARGS_BUFSIZE) { + if (sizeof(sh) + item_len + *sk_offset > buf_size) { ret = 1; goto out; } @@ -2040,17 +2040,21 @@ out: } static noinline int search_ioctl(struct inode *inode, - struct btrfs_ioctl_search_args *args) + struct btrfs_ioctl_search_key *sk, + size_t buf_size, + char *buf) { struct btrfs_root *root; struct btrfs_key key; struct btrfs_path *path; - struct btrfs_ioctl_search_key *sk = &args->key; struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; int ret; int num_found = 0; unsigned long sk_offset = 0; + if (buf_size < sizeof(struct btrfs_ioctl_search_header)) + return -EOVERFLOW; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2084,7 +2088,7 @@ static noinline int search_ioctl(struct inode *inode, ret = 0; goto err; } - ret = copy_to_sk(root, path, &key, sk, args->buf, + ret = copy_to_sk(root, path, &key, sk, buf_size, buf, &sk_offset, &num_found); btrfs_release_path(path); if (ret) @@ -2113,7 +2117,7 @@ static noinline int btrfs_ioctl_tree_search(struct file *file, return PTR_ERR(args); inode = file_inode(file); - ret = search_ioctl(inode, args); + ret = search_ioctl(inode, &args->key, sizeof(args->buf), args->buf); if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ret = -EFAULT; kfree(args); -- cgit v1.2.3 From 8f5f6178f366bdb93d6af6f2bdca8ebca1ad9fe9 Mon Sep 17 00:00:00 2001 From: Gerhard Heift Date: Thu, 30 Jan 2014 16:23:59 +0100 Subject: btrfs: tree_search, copy_to_sk: return EOVERFLOW for too small buffer In copy_to_sk, if an item is too large for the given buffer, it now returns -EOVERFLOW instead of copying a search_header with len = 0. For backward compatibility for the first item it still copies such a header to the buffer, but not any other following items, which could have fitted. tree_search changes -EOVERFLOW back to 0 to behave similiar to the way it behaved before this patch. Signed-off-by: Gerhard Heift Signed-off-by: Chris Mason Acked-by: David Sterba --- fs/btrfs/ioctl.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 775640475e35..6e09fc1c1c18 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1990,8 +1990,20 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (!key_in_sk(key, sk)) continue; - if (sizeof(sh) + item_len > buf_size) + if (sizeof(sh) + item_len > buf_size) { + if (*num_found) { + ret = 1; + goto out; + } + + /* + * return one empty item back for v1, which does not + * handle -EOVERFLOW + */ + item_len = 0; + ret = -EOVERFLOW; + } if (sizeof(sh) + item_len + *sk_offset > buf_size) { ret = 1; @@ -2017,6 +2029,9 @@ static noinline int copy_to_sk(struct btrfs_root *root, } (*num_found)++; + if (ret) /* -EOVERFLOW from above */ + goto out; + if (*num_found >= sk->nr_items) { ret = 1; goto out; @@ -2095,7 +2110,8 @@ static noinline int search_ioctl(struct inode *inode, break; } - ret = 0; + if (ret > 0) + ret = 0; err: sk->nr_items = num_found; btrfs_free_path(path); @@ -2118,6 +2134,14 @@ static noinline int btrfs_ioctl_tree_search(struct file *file, inode = file_inode(file); ret = search_ioctl(inode, &args->key, sizeof(args->buf), args->buf); + + /* + * In the origin implementation an overflow is handled by returning a + * search header with a len of zero, so reset ret. + */ + if (ret == -EOVERFLOW) + ret = 0; + if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ret = -EFAULT; kfree(args); -- cgit v1.2.3 From 9b6e817d022fd44fe99db92f00d4b18ac2d8f429 Mon Sep 17 00:00:00 2001 From: Gerhard Heift Date: Thu, 30 Jan 2014 16:24:00 +0100 Subject: btrfs: tree_search, copy_to_sk: return needed size on EOVERFLOW If an item in tree_search is too large to be stored in the given buffer, return the needed size (including the header). Signed-off-by: Gerhard Heift Signed-off-by: Chris Mason Acked-by: David Sterba --- fs/btrfs/ioctl.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6e09fc1c1c18..3d89fd888399 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1957,7 +1957,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, - size_t buf_size, + size_t *buf_size, char *buf, unsigned long *sk_offset, int *num_found) @@ -1990,7 +1990,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (!key_in_sk(key, sk)) continue; - if (sizeof(sh) + item_len > buf_size) { + if (sizeof(sh) + item_len > *buf_size) { if (*num_found) { ret = 1; goto out; @@ -2001,11 +2001,12 @@ static noinline int copy_to_sk(struct btrfs_root *root, * handle -EOVERFLOW */ + *buf_size = sizeof(sh) + item_len; item_len = 0; ret = -EOVERFLOW; } - if (sizeof(sh) + item_len + *sk_offset > buf_size) { + if (sizeof(sh) + item_len + *sk_offset > *buf_size) { ret = 1; goto out; } @@ -2056,7 +2057,7 @@ out: static noinline int search_ioctl(struct inode *inode, struct btrfs_ioctl_search_key *sk, - size_t buf_size, + size_t *buf_size, char *buf) { struct btrfs_root *root; @@ -2067,8 +2068,10 @@ static noinline int search_ioctl(struct inode *inode, int num_found = 0; unsigned long sk_offset = 0; - if (buf_size < sizeof(struct btrfs_ioctl_search_header)) + if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { + *buf_size = sizeof(struct btrfs_ioctl_search_header); return -EOVERFLOW; + } path = btrfs_alloc_path(); if (!path) @@ -2121,9 +2124,10 @@ err: static noinline int btrfs_ioctl_tree_search(struct file *file, void __user *argp) { - struct btrfs_ioctl_search_args *args; - struct inode *inode; - int ret; + struct btrfs_ioctl_search_args *args; + struct inode *inode; + int ret; + size_t buf_size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -2132,8 +2136,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file, if (IS_ERR(args)) return PTR_ERR(args); + buf_size = sizeof(args->buf); + inode = file_inode(file); - ret = search_ioctl(inode, &args->key, sizeof(args->buf), args->buf); + ret = search_ioctl(inode, &args->key, &buf_size, args->buf); /* * In the origin implementation an overflow is handled by returning a -- cgit v1.2.3 From 550ac1d85ef99f3390a6ea87c70b7683647f6110 Mon Sep 17 00:00:00 2001 From: Gerhard Heift Date: Thu, 30 Jan 2014 16:24:01 +0100 Subject: btrfs: new function read_extent_buffer_to_user This new function reads the content of an extent directly to user memory. Signed-off-by: Gerhard Heift Signed-off-by: Chris Mason Acked-by: David Sterba --- fs/btrfs/extent_io.c | 37 +++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_io.h | 3 +++ 2 files changed, 40 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0b5fa91d9a88..930f23dfaa2b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5067,6 +5067,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } } +int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char __user *dst = (char __user *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + + while (len > 0) { + page = extent_buffer_page(eb, i); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = page_address(page); + if (copy_to_user(dst, kaddr + offset, cur)) { + ret = -EFAULT; + break; + } + + dst += cur; + len -= cur; + offset = 0; + i++; + } + + return ret; +} + int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 8b63f2d46518..15ce5f2a2b62 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -304,6 +304,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, void read_extent_buffer(struct extent_buffer *eb, void *dst, unsigned long start, unsigned long len); +int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, + unsigned long start, + unsigned long len); void write_extent_buffer(struct extent_buffer *eb, const void *src, unsigned long start, unsigned long len); void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, -- cgit v1.2.3 From ba346b357d70becdd8e20ff9493cd56101ee0f46 Mon Sep 17 00:00:00 2001 From: Gerhard Heift Date: Thu, 30 Jan 2014 16:24:02 +0100 Subject: btrfs: tree_search, search_ioctl: direct copy to userspace By copying each found item seperatly to userspace, we do not need extra buffer in the kernel. Signed-off-by: Gerhard Heift Signed-off-by: Chris Mason Acked-by: David Sterba --- fs/btrfs/ioctl.c | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3d89fd888399..393a543a519e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1958,7 +1958,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, size_t *buf_size, - char *buf, + char __user *ubuf, unsigned long *sk_offset, int *num_found) { @@ -2018,14 +2018,22 @@ static noinline int copy_to_sk(struct btrfs_root *root, sh.transid = found_transid; /* copy search result header */ - memcpy(buf + *sk_offset, &sh, sizeof(sh)); + if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { + ret = -EFAULT; + goto out; + } + *sk_offset += sizeof(sh); if (item_len) { - char *p = buf + *sk_offset; + char __user *up = ubuf + *sk_offset; /* copy the item */ - read_extent_buffer(leaf, p, - item_off, item_len); + if (read_extent_buffer_to_user(leaf, up, + item_off, item_len)) { + ret = -EFAULT; + goto out; + } + *sk_offset += item_len; } (*num_found)++; @@ -2052,13 +2060,22 @@ advance_key: } else ret = 1; out: + /* + * 0: all items from this leaf copied, continue with next + * 1: * more items can be copied, but unused buffer is too small + * * all items were found + * Either way, it will stops the loop which iterates to the next + * leaf + * -EOVERFLOW: item was to large for buffer + * -EFAULT: could not copy extent buffer back to userspace + */ return ret; } static noinline int search_ioctl(struct inode *inode, struct btrfs_ioctl_search_key *sk, size_t *buf_size, - char *buf) + char __user *ubuf) { struct btrfs_root *root; struct btrfs_key key; @@ -2106,7 +2123,7 @@ static noinline int search_ioctl(struct inode *inode, ret = 0; goto err; } - ret = copy_to_sk(root, path, &key, sk, buf_size, buf, + ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, &sk_offset, &num_found); btrfs_release_path(path); if (ret) @@ -2124,7 +2141,8 @@ err: static noinline int btrfs_ioctl_tree_search(struct file *file, void __user *argp) { - struct btrfs_ioctl_search_args *args; + struct btrfs_ioctl_search_args __user *uargs; + struct btrfs_ioctl_search_key sk; struct inode *inode; int ret; size_t buf_size; @@ -2132,14 +2150,15 @@ static noinline int btrfs_ioctl_tree_search(struct file *file, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - args = memdup_user(argp, sizeof(*args)); - if (IS_ERR(args)) - return PTR_ERR(args); + uargs = (struct btrfs_ioctl_search_args __user *)argp; - buf_size = sizeof(args->buf); + if (copy_from_user(&sk, &uargs->key, sizeof(sk))) + return -EFAULT; + + buf_size = sizeof(uargs->buf); inode = file_inode(file); - ret = search_ioctl(inode, &args->key, &buf_size, args->buf); + ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); /* * In the origin implementation an overflow is handled by returning a @@ -2148,9 +2167,8 @@ static noinline int btrfs_ioctl_tree_search(struct file *file, if (ret == -EOVERFLOW) ret = 0; - if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) ret = -EFAULT; - kfree(args); return ret; } -- cgit v1.2.3 From cc68a8a5a4330a4bb72922d0c7a7044ae13ee692 Mon Sep 17 00:00:00 2001 From: Gerhard Heift Date: Thu, 30 Jan 2014 16:24:03 +0100 Subject: btrfs: new ioctl TREE_SEARCH_V2 This new ioctl call allows the user to supply a buffer of varying size in which a tree search can store its results. This is much more flexible if you want to receive items which are larger than the current fixed buffer of 3992 bytes or if you want to fetch more items at once. Items larger than this buffer are for example some of the type EXTENT_CSUM. Signed-off-by: Gerhard Heift Signed-off-by: Chris Mason Acked-by: David Sterba --- fs/btrfs/ioctl.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/btrfs.h | 10 ++++++++++ 2 files changed, 51 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 393a543a519e..6ea15469c63f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2172,6 +2172,45 @@ static noinline int btrfs_ioctl_tree_search(struct file *file, return ret; } +static noinline int btrfs_ioctl_tree_search_v2(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_search_args_v2 __user *uarg; + struct btrfs_ioctl_search_args_v2 args; + struct inode *inode; + int ret; + size_t buf_size; + const size_t buf_limit = 16 * 1024 * 1024; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* copy search header and buffer size */ + uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; + if (copy_from_user(&args, uarg, sizeof(args))) + return -EFAULT; + + buf_size = args.buf_size; + + if (buf_size < sizeof(struct btrfs_ioctl_search_header)) + return -EOVERFLOW; + + /* limit result size to 16MB */ + if (buf_size > buf_limit) + buf_size = buf_limit; + + inode = file_inode(file); + ret = search_ioctl(inode, &args.key, &buf_size, + (char *)(&uarg->buf[0])); + if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) + ret = -EFAULT; + else if (ret == -EOVERFLOW && + copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) + ret = -EFAULT; + + return ret; +} + /* * Search INODE_REFs to identify path name of 'dirid' directory * in a 'tree_id' tree. and sets path name to 'name'. @@ -5252,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_trans_end(file); case BTRFS_IOC_TREE_SEARCH: return btrfs_ioctl_tree_search(file, argp); + case BTRFS_IOC_TREE_SEARCH_V2: + return btrfs_ioctl_tree_search_v2(file, argp); case BTRFS_IOC_INO_LOOKUP: return btrfs_ioctl_ino_lookup(file, argp); case BTRFS_IOC_INO_PATHS: diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 7554fd381a56..6f9c38ce45c7 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -306,6 +306,14 @@ struct btrfs_ioctl_search_args { char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; }; +struct btrfs_ioctl_search_args_v2 { + struct btrfs_ioctl_search_key key; /* in/out - search parameters */ + __u64 buf_size; /* in - size of buffer + * out - on EOVERFLOW: needed size + * to store item */ + __u64 buf[0]; /* out - found items */ +}; + struct btrfs_ioctl_clone_range_args { __s64 src_fd; __u64 src_offset, src_length; @@ -558,6 +566,8 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) struct btrfs_ioctl_defrag_range_args) #define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ struct btrfs_ioctl_search_args) +#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args_v2) #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ struct btrfs_ioctl_ino_lookup_args) #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64) -- cgit v1.2.3 From 5fbc7c59fd22c5a6531b40b0759624b680a95e52 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 11 Jun 2014 10:55:22 +0800 Subject: Btrfs: fix unfinished readahead thread for raid5/6 degraded mounting Steps to reproduce: # mkfs.btrfs -f /dev/sd[b-f] -m raid5 -d raid5 # mkfs.ext4 /dev/sdc --->corrupt one of btrfs device # mount /dev/sdb /mnt -o degraded # btrfs scrub start -BRd /mnt This is because readahead would skip missing device, this is not true for RAID5/6, because REQ_GET_READ_MIRRORS return 1 for RAID5/6 block mapping. If expected data locates in missing device, readahead thread would not call __readahead_hook() which makes event @rc->elems=0 wait forever. Fix this problem by checking return value of btrfs_map_block(),we can only skip missing device safely if there are several mirrors. Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/reada.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 30947f923620..09230cf3a244 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, continue; } if (!dev->bdev) { - /* cannot read ahead on missing device */ - continue; + /* + * cannot read ahead on missing device, but for RAID5/6, + * REQ_GET_READ_MIRRORS return 1. So don't skip missing + * device for such case. + */ + if (nzones > 1) + continue; } if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { -- cgit v1.2.3 From f1e3c289498f26d98b65ddaae0cd892c98a6ce84 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 11 Jun 2014 12:00:25 -0400 Subject: btrfs: prevent RCU warning when dereferencing radix tree slot Mark the dereference as protected by lock. Not doing so triggers an RCU warning since the radix tree assumed that RCU is in use. Signed-off-by: Sasha Levin Signed-off-by: Chris Mason --- fs/btrfs/tests/btrfs-tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index a5dcacb5df9c..9626252ee6b4 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -135,7 +135,7 @@ restart: radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { struct extent_buffer *eb; - eb = radix_tree_deref_slot(slot); + eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); if (!eb) continue; /* Shouldn't happen but that kind of thinking creates CVE's */ -- cgit v1.2.3 From b050f9f6ddefe5de9c130fda6493ccaacd5168ba Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 12 Jun 2014 02:47:37 +0100 Subject: Btrfs: fix qgroups sanity test crash or hang Often when running the qgroups sanity test, a crash or a hang happened. This is because the extent buffer the test uses for the root node doesn't have an header level explicitly set, making it have a random level value. This is a problem when it's not zero for the btrfs_search_slot() calls the test ends up doing, resulting in crashes or hangs such as the following: [ 6454.127192] Btrfs loaded, debug=on, assert=on, integrity-checker=on (...) [ 6454.127760] BTRFS: selftest: Running qgroup tests [ 6454.127964] BTRFS: selftest: Running test_test_no_shared_qgroup [ 6454.127966] BTRFS: selftest: Qgroup basic add [ 6480.152005] BUG: soft lockup - CPU#0 stuck for 23s! [modprobe:5383] [ 6480.152005] Modules linked in: btrfs(+) xor raid6_pq binfmt_misc nfsd auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc i2c_piix4 i2c_core pcspkr evbug psmouse serio_raw e1000 [last unloaded: btrfs] [ 6480.152005] irq event stamp: 188448 [ 6480.152005] hardirqs last enabled at (188447): [] restore_args+0x0/0x30 [ 6480.152005] hardirqs last disabled at (188448): [] apic_timer_interrupt+0x6a/0x80 [ 6480.152005] softirqs last enabled at (188446): [] __do_softirq+0x1cf/0x450 [ 6480.152005] softirqs last disabled at (188441): [] irq_exit+0xb5/0xc0 [ 6480.152005] CPU: 0 PID: 5383 Comm: modprobe Not tainted 3.15.0-rc8-fdm-btrfs-next-33+ #4 [ 6480.152005] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 6480.152005] task: ffff8802146125a0 ti: ffff8800d0d00000 task.ti: ffff8800d0d00000 [ 6480.152005] RIP: 0010:[] [] __write_lock_failed+0x13/0x20 [ 6480.152005] RSP: 0018:ffff8800d0d038e8 EFLAGS: 00000287 [ 6480.152005] RAX: 0000000000000000 RBX: ffffffff8168ef5c RCX: 000005deb8525852 [ 6480.152005] RDX: 0000000000000000 RSI: 0000000000001d45 RDI: ffff8802105000b8 [ 6480.152005] RBP: ffff8800d0d038e8 R08: fffffe12710f63db R09: ffffffffa03196fb [ 6480.152005] R10: ffff8802146125a0 R11: ffff880214612e28 R12: ffff8800d0d03858 [ 6480.152005] R13: 0000000000000000 R14: ffff8800d0d00000 R15: ffff8802146125a0 [ 6480.152005] FS: 00007f14ff804700(0000) GS:ffff880215e00000(0000) knlGS:0000000000000000 [ 6480.152005] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 6480.152005] CR2: 00007fff4df0dac8 CR3: 00000000d1796000 CR4: 00000000000006f0 [ 6480.152005] Stack: [ 6480.152005] ffff8800d0d03908 ffffffff810ae967 0000000000000001 ffff8802105000b8 [ 6480.152005] ffff8800d0d03938 ffffffff8168e57e ffffffffa0319c16 0000000000000007 [ 6480.152005] ffff880210500000 ffff880210500100 ffff8800d0d039b8 ffffffffa0319c16 [ 6480.152005] Call Trace: [ 6480.152005] [] do_raw_write_lock+0x47/0xa0 [ 6480.152005] [] _raw_write_lock+0x5e/0x80 [ 6480.152005] [] ? btrfs_tree_lock+0x116/0x270 [btrfs] [ 6480.152005] [] btrfs_tree_lock+0x116/0x270 [btrfs] [ 6480.152005] [] btrfs_lock_root_node+0x3b/0x50 [btrfs] [ 6480.152005] [] btrfs_search_slot+0x916/0xa20 [btrfs] [ 6480.152005] [] ? create_object+0x23f/0x300 [ 6480.152005] [] btrfs_insert_empty_items+0x78/0xd0 [btrfs] [ 6480.152005] [] insert_normal_tree_ref.constprop.4+0xa2/0x19a [btrfs] [ 6480.152005] [] test_no_shared_qgroup+0xb1/0x1ca [btrfs] [ 6480.152005] [] ? local_clock+0x16/0x30 [ 6480.152005] [] btrfs_test_qgroups+0x1ae/0x1d7 [btrfs] [ 6480.152005] [] ? ftrace_define_fields_btrfs_space_reservation+0xfd/0xfd [btrfs] [ 6480.152005] [] init_btrfs_fs+0xb4/0x153 [btrfs] [ 6480.152005] [] do_one_initcall+0x102/0x150 [ 6480.152005] [] ? set_memory_nx+0x43/0x50 [ 6480.152005] [] ? set_section_ro_nx+0x6d/0x74 [ 6480.152005] [] load_module+0x1cdc/0x2630 (...) Therefore initialize the extent buffer as an empty leaf (level 0). Issue easy to reproduce when btrfs is built as a module via: $ for ((i = 1; i <= 1000000; i++)); do rmmod btrfs; modprobe btrfs; done Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/tests/qgroup-tests.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index fa691b754aaf..ec3dcb202357 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -415,6 +415,8 @@ int btrfs_test_qgroups(void) ret = -ENOMEM; goto out; } + btrfs_set_header_level(root->node, 0); + btrfs_set_header_nritems(root->node, 0); root->alloc_bytenr += 8192; tmp_root = btrfs_alloc_dummy_root(); -- cgit v1.2.3 From d737278091fccda77f6896012ba86c5ffb57993f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 12 Jun 2014 00:14:59 -0500 Subject: btrfs: free ulist in qgroup_shared_accounting() error path If tmp = ulist_alloc(GFP_NOFS) fails, we return without freeing the previously allocated qgroups = ulist_alloc(GFP_NOFS) and cause a memory leak. Signed-off-by: Eric Sandeen Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cf5aead95a7f..98cb6b2630f9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1798,8 +1798,10 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, return -ENOMEM; tmp = ulist_alloc(GFP_NOFS); - if (!tmp) + if (!tmp) { + ulist_free(qgroups); return -ENOMEM; + } btrfs_get_tree_mod_seq(fs_info, &elem); ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, -- cgit v1.2.3 From 3e2426bd0eb980648449e7a2f5a23e3cd3c7725c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 12 Jun 2014 00:39:58 -0500 Subject: btrfs: fix use of uninit "ret" in end_extent_writepage() If this condition in end_extent_writepage() is false: if (tree->ops && tree->ops->writepage_end_io_hook) we will then test an uninitialized "ret" at: ret = ret < 0 ? ret : -EIO; The test for ret is for the case where ->writepage_end_io_hook failed, and we'd choose that ret as the error; but if there is no ->writepage_end_io_hook, nothing sets ret. Initializing ret to 0 should be sufficient; if writepage_end_io_hook wasn't set, (!uptodate) means non-zero err was passed in, so we choose -EIO in that case. Signed-of-by: Eric Sandeen Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 930f23dfaa2b..23398ad430a0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2354,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) { int uptodate = (err == 0); struct extent_io_tree *tree; - int ret; + int ret = 0; tree = &BTRFS_I(page->mapping->host)->io_tree; -- cgit v1.2.3 From 47a306a74842248dcd537b85f9a36c7b156c59a9 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 12 Jun 2014 00:53:44 -0500 Subject: btrfs: fix error handling in create_pending_snapshot fcebe456 cut and pasted some code to a later point in create_pending_snapshot(), but didn't switch to the appropriate error handling for this stage of the function. Signed-off-by: Eric Sandeen Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9630f10f8e1e..511839c04f11 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1284,11 +1284,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } - pending->error = btrfs_qgroup_inherit(trans, fs_info, - root->root_key.objectid, - objectid, pending->inherit); - if (pending->error) - goto no_free_objectid; + ret = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } /* see comments in should_cow_block() */ set_bit(BTRFS_ROOT_FORCE_COW, &root->state); -- cgit v1.2.3