summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2018-06-11 19:24:28 +0100
committerDavid Sterba <dsterba@suse.com>2018-08-23 17:37:26 +0200
commitd4682ba03ef618b6ef4be7cedc7aacaf505d3a58 (patch)
tree5700c2336b2b53118c4869c447f084023ab8555e /fs/btrfs/inode.c
parent8ecebf4d767e2307a946c8905278d6358eda35c3 (diff)
Btrfs: sync log after logging new name
When we add a new name for an inode which was logged in the current transaction, we update the inode in the log so that its new name and ancestors are added to the log. However when we do this we do not persist the log, so the changes remain in memory only, and as a consequence, any ancestors that were created in the current transaction are updated such that future calls to btrfs_inode_in_log() return true. This leads to a subsequent fsync against such new ancestor directories returning immediately, without persisting the log, therefore after a power failure the new ancestor directories do not exist, despite fsync being called against them explicitly. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/A $ mkdir /mnt/B $ mkdir /mnt/A/C $ touch /mnt/B/foo $ xfs_io -c "fsync" /mnt/B/foo $ ln /mnt/B/foo /mnt/A/C/foo $ xfs_io -c "fsync" /mnt/A <power failure> After the power failure, directory "A" does not exist, despite the explicit fsync on it. Instead of fixing this by changing the behaviour of the explicit fsync on directory "A" to persist the log instead of doing nothing, make the logging of the new file name (which happens when creating a hard link or renaming) persist the log. This approach not only is simpler, not requiring addition of new fields to the inode in memory structure, but also gives us the same behaviour as ext4, xfs and f2fs (possibly other filesystems too). A test case for fstests follows soon. Fixes: 12fcfd22fe5b ("Btrfs: tree logging unlink/rename fixes") Reported-by: Vijay Chidambaram <vvijay03@gmail.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c92
1 files changed, 80 insertions, 12 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c6d8c5d19ff0..cb09873ad270 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6634,6 +6634,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
drop_inode = 1;
} else {
struct dentry *parent = dentry->d_parent;
+ int ret;
+
err = btrfs_update_inode(trans, root, inode);
if (err)
goto fail;
@@ -6647,7 +6649,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
d_instantiate(dentry, inode);
- btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
+ ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
+ true, NULL);
+ if (ret == BTRFS_NEED_TRANS_COMMIT) {
+ err = btrfs_commit_transaction(trans);
+ trans = NULL;
+ }
}
fail:
@@ -9386,14 +9393,21 @@ static int btrfs_rename_exchange(struct inode *old_dir,
u64 new_idx = 0;
u64 root_objectid;
int ret;
- int ret2;
bool root_log_pinned = false;
bool dest_log_pinned = false;
+ struct btrfs_log_ctx ctx_root;
+ struct btrfs_log_ctx ctx_dest;
+ bool sync_log_root = false;
+ bool sync_log_dest = false;
+ bool commit_transaction = false;
/* we only allow rename subvolume link between subvolumes */
if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
return -EXDEV;
+ btrfs_init_log_ctx(&ctx_root, old_inode);
+ btrfs_init_log_ctx(&ctx_dest, new_inode);
+
/* close the race window with snapshot create/destroy ioctl */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
down_read(&fs_info->subvol_sem);
@@ -9540,15 +9554,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,
if (root_log_pinned) {
parent = new_dentry->d_parent;
- btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
- parent);
+ ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
+ BTRFS_I(old_dir), parent,
+ false, &ctx_root);
+ if (ret == BTRFS_NEED_LOG_SYNC)
+ sync_log_root = true;
+ else if (ret == BTRFS_NEED_TRANS_COMMIT)
+ commit_transaction = true;
+ ret = 0;
btrfs_end_log_trans(root);
root_log_pinned = false;
}
if (dest_log_pinned) {
- parent = old_dentry->d_parent;
- btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
- parent);
+ if (!commit_transaction) {
+ parent = old_dentry->d_parent;
+ ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
+ BTRFS_I(new_dir), parent,
+ false, &ctx_dest);
+ if (ret == BTRFS_NEED_LOG_SYNC)
+ sync_log_dest = true;
+ else if (ret == BTRFS_NEED_TRANS_COMMIT)
+ commit_transaction = true;
+ ret = 0;
+ }
btrfs_end_log_trans(dest);
dest_log_pinned = false;
}
@@ -9581,8 +9609,26 @@ out_fail:
dest_log_pinned = false;
}
}
- ret2 = btrfs_end_transaction(trans);
- ret = ret ? ret : ret2;
+ if (!ret && sync_log_root && !commit_transaction) {
+ ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
+ &ctx_root);
+ if (ret)
+ commit_transaction = true;
+ }
+ if (!ret && sync_log_dest && !commit_transaction) {
+ ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
+ &ctx_dest);
+ if (ret)
+ commit_transaction = true;
+ }
+ if (commit_transaction) {
+ ret = btrfs_commit_transaction(trans);
+ } else {
+ int ret2;
+
+ ret2 = btrfs_end_transaction(trans);
+ ret = ret ? ret : ret2;
+ }
out_notrans:
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
@@ -9659,6 +9705,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
int ret;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
bool log_pinned = false;
+ struct btrfs_log_ctx ctx;
+ bool sync_log = false;
+ bool commit_transaction = false;
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@@ -9816,8 +9865,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (log_pinned) {
struct dentry *parent = new_dentry->d_parent;
- btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
- parent);
+ btrfs_init_log_ctx(&ctx, old_inode);
+ ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
+ BTRFS_I(old_dir), parent,
+ false, &ctx);
+ if (ret == BTRFS_NEED_LOG_SYNC)
+ sync_log = true;
+ else if (ret == BTRFS_NEED_TRANS_COMMIT)
+ commit_transaction = true;
+ ret = 0;
btrfs_end_log_trans(root);
log_pinned = false;
}
@@ -9854,7 +9910,19 @@ out_fail:
btrfs_end_log_trans(root);
log_pinned = false;
}
- btrfs_end_transaction(trans);
+ if (!ret && sync_log) {
+ ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
+ if (ret)
+ commit_transaction = true;
+ }
+ if (commit_transaction) {
+ ret = btrfs_commit_transaction(trans);
+ } else {
+ int ret2;
+
+ ret2 = btrfs_end_transaction(trans);
+ ret = ret ? ret : ret2;
+ }
out_notrans:
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);