summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/btrfs_inode.h11
-rw-r--r--fs/btrfs/inode.c9
-rw-r--r--fs/btrfs/reflink.c15
-rw-r--r--fs/btrfs/tree-log.c13
4 files changed, 46 insertions, 2 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index e7d709505cb1..c47b6c6fea9f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -152,6 +152,17 @@ struct btrfs_inode {
u64 last_unlink_trans;
/*
+ * The id/generation of the last transaction where this inode was
+ * either the source or the destination of a clone/dedupe operation.
+ * Used when logging an inode to know if there are shared extents that
+ * need special care when logging checksum items, to avoid duplicate
+ * checksum items in a log (which can lead to a corruption where we end
+ * up with missing checksum ranges after log replay).
+ * Protected by the vfs inode lock.
+ */
+ u64 last_reflink_trans;
+
+ /*
* Number of bytes outstanding that are going to need csums. This is
* used in ENOSPC accounting.
*/
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0fa4f7007ff9..611b3412fbfd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3336,6 +3336,14 @@ cache_index:
*/
BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
+ /*
+ * Same logic as for last_unlink_trans. We don't persist the generation
+ * of the last transaction where this inode was used for a reflink
+ * operation, so after eviction and reloading the inode we must be
+ * pessimistic and assume the last transaction that modified the inode.
+ */
+ BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
+
path->slots[0]++;
if (inode->i_nlink != 1 ||
path->slots[0] >= btrfs_header_nritems(leaf))
@@ -8550,6 +8558,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->index_cnt = (u64)-1;
ei->dir_index = 0;
ei->last_unlink_trans = 0;
+ ei->last_reflink_trans = 0;
ei->last_log_commit = 0;
spin_lock_init(&ei->lock);
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 834eb6d98caa..5cd02514cf4d 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -337,6 +337,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
while (1) {
u64 next_key_min_offset = key.offset + 1;
struct btrfs_file_extent_item *extent;
+ u64 extent_gen;
int type;
u32 size;
struct btrfs_key new_key;
@@ -385,6 +386,7 @@ process_slot:
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
+ extent_gen = btrfs_file_extent_generation(leaf, extent);
comp = btrfs_file_extent_compression(leaf, extent);
type = btrfs_file_extent_type(leaf, extent);
if (type == BTRFS_FILE_EXTENT_REG ||
@@ -489,6 +491,19 @@ process_slot:
btrfs_release_path(path);
+ /*
+ * If this is a new extent update the last_reflink_trans of both
+ * inodes. This is used by fsync to make sure it does not log
+ * multiple checksum items with overlapping ranges. For older
+ * extents we don't need to do it since inode logging skips the
+ * checksums for older extents. Also ignore holes and inline
+ * extents because they don't have checksums in the csum tree.
+ */
+ if (extent_gen == trans->transid && disko > 0) {
+ BTRFS_I(src)->last_reflink_trans = trans->transid;
+ BTRFS_I(inode)->last_reflink_trans = trans->transid;
+ }
+
last_dest_end = ALIGN(new_key.offset + datal,
fs_info->sectorsize);
ret = clone_finish_inode_update(trans, inode, last_dest_end,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index aaa449153d9c..ea8136dcf71f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3892,6 +3892,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
}
static int log_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
struct btrfs_root *log_root,
struct btrfs_ordered_sum *sums)
{
@@ -3900,6 +3901,14 @@ static int log_csums(struct btrfs_trans_handle *trans,
int ret;
/*
+ * If this inode was not used for reflink operations in the current
+ * transaction with new extents, then do the fast path, no need to
+ * worry about logging checksum items with overlapping ranges.
+ */
+ if (inode->last_reflink_trans < trans->transid)
+ return btrfs_csum_file_blocks(trans, log_root, sums);
+
+ /*
* Serialize logging for checksums. This is to avoid racing with the
* same checksum being logged by another task that is logging another
* file which happens to refer to the same extent as well. Such races
@@ -4050,7 +4059,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = log_csums(trans, log, sums);
+ ret = log_csums(trans, inode, log, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -4109,7 +4118,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = log_csums(trans, log_root, sums);
+ ret = log_csums(trans, inode, log_root, sums);
list_del(&sums->list);
kfree(sums);
}