summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2020-06-16 10:17:36 +0800
committerDavid Sterba <dsterba@suse.com>2020-07-27 12:55:38 +0200
commit2dfb1e43f57dd3aeaa66f7cf05d068db2d4c8788 (patch)
treee05cf5303cdb46912405f2f165e24f1f54bf1980 /fs/btrfs/disk-io.c
parent082b6c970f02fefd278c7833880cda29691a5f34 (diff)
btrfs: preallocate anon block device at first phase of snapshot creation
[BUG] When the anonymous block device pool is exhausted, subvolume/snapshot creation fails with EMFILE (Too many files open). This has been reported by a user. The allocation happens in the second phase during transaction commit where it's only way out is to abort the transaction BTRFS: Transaction aborted (error -24) WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs] RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs] Call Trace: create_pending_snapshots+0x82/0xa0 [btrfs] btrfs_commit_transaction+0x275/0x8c0 [btrfs] btrfs_mksubvol+0x4b9/0x500 [btrfs] btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs] btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs] btrfs_ioctl+0x11a4/0x2da0 [btrfs] do_vfs_ioctl+0xa9/0x640 ksys_ioctl+0x67/0x90 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace 33f2f83f3d5250e9 ]--- BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown BTRFS info (device sda1): forced readonly BTRFS warning (device sda1): Skipping commit of aborted transaction. BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown [CAUSE] When the global anonymous block device pool is exhausted, the following call chain will fail, and lead to transaction abort: btrfs_ioctl_snap_create_v2() |- btrfs_ioctl_snap_create_transid() |- btrfs_mksubvol() |- btrfs_commit_transaction() |- create_pending_snapshot() |- btrfs_get_fs_root() |- btrfs_init_fs_root() |- get_anon_bdev() [FIX] Although we can't enlarge the anonymous block device pool, at least we can preallocate anon_dev for subvolume/snapshot in the first phase, outside of transaction context and exactly at the moment the user calls the creation ioctl. Reported-by: Greed Rong <greedrong@gmail.com> Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/ CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c71
1 files changed, 64 insertions, 7 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c90edf04a9db..9b307034d32c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1391,7 +1391,12 @@ alloc_fail:
goto out;
}
-static int btrfs_init_fs_root(struct btrfs_root *root)
+/*
+ * Initialize subvolume root in-memory structure
+ *
+ * @anon_dev: anonymous device to attach to the root, if zero, allocate new
+ */
+static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
{
int ret;
unsigned int nofs_flag;
@@ -1430,9 +1435,13 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
*/
if (is_fstree(root->root_key.objectid) &&
btrfs_root_refs(&root->root_item) > 0) {
- ret = get_anon_bdev(&root->anon_dev);
- if (ret)
- goto fail;
+ if (!anon_dev) {
+ ret = get_anon_bdev(&root->anon_dev);
+ if (ret)
+ goto fail;
+ } else {
+ root->anon_dev = anon_dev;
+ }
}
mutex_lock(&root->objectid_mutex);
@@ -1537,8 +1546,27 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
}
-struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
- u64 objectid, bool check_ref)
+/*
+ * Get an in-memory reference of a root structure.
+ *
+ * For essential trees like root/extent tree, we grab it from fs_info directly.
+ * For subvolume trees, we check the cached filesystem roots first. If not
+ * found, then read it from disk and add it to cached fs roots.
+ *
+ * Caller should release the root by calling btrfs_put_root() after the usage.
+ *
+ * NOTE: Reloc and log trees can't be read by this function as they share the
+ * same root objectid.
+ *
+ * @objectid: root id
+ * @anon_dev: preallocated anonymous block device number for new roots,
+ * pass 0 for new allocation.
+ * @check_ref: whether to check root item references, If true, return -ENOENT
+ * for orphan roots
+ */
+static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
+ u64 objectid, dev_t anon_dev,
+ bool check_ref)
{
struct btrfs_root *root;
struct btrfs_path *path;
@@ -1567,6 +1595,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
+ /* Shouldn't get preallocated anon_dev for cached roots */
+ ASSERT(!anon_dev);
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
return ERR_PTR(-ENOENT);
@@ -1586,7 +1616,7 @@ again:
goto fail;
}
- ret = btrfs_init_fs_root(root);
+ ret = btrfs_init_fs_root(root, anon_dev);
if (ret)
goto fail;
@@ -1619,6 +1649,33 @@ fail:
return ERR_PTR(ret);
}
+/*
+ * Get in-memory reference of a root structure
+ *
+ * @objectid: tree objectid
+ * @check_ref: if set, verify that the tree exists and the item has at least
+ * one reference
+ */
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+ u64 objectid, bool check_ref)
+{
+ return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+}
+
+/*
+ * Get in-memory reference of a root structure, created as new, optionally pass
+ * the anonymous block device id
+ *
+ * @objectid: tree objectid
+ * @anon_dev: if zero, allocate a new anonymous block device or use the
+ * parameter value
+ */
+struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
+ u64 objectid, dev_t anon_dev)
+{
+ return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
+}
+
static int btrfs_congested_fn(void *congested_data, int bdi_bits)
{
struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;