summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c815
1 files changed, 420 insertions, 395 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7e8d8169779d..8e23780acfae 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -45,7 +45,6 @@
#include "compression.h"
#include "locking.h"
#include "free-space-cache.h"
-#include "inode-map.h"
#include "props.h"
#include "qgroup.h"
#include "delalloc-space.h"
@@ -62,7 +61,6 @@ struct btrfs_dio_data {
loff_t length;
ssize_t submitted;
struct extent_changeset *data_reserved;
- bool sync;
};
static const struct inode_operations btrfs_dir_inode_operations;
@@ -96,6 +94,51 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode,
const bool uptodate);
/*
+ * btrfs_inode_lock - lock inode i_rwsem based on arguments passed
+ *
+ * ilock_flags can have the following bit set:
+ *
+ * BTRFS_ILOCK_SHARED - acquire a shared lock on the inode
+ * BTRFS_ILOCK_TRY - try to acquire the lock, if fails on first attempt
+ * return -EAGAIN
+ */
+int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
+{
+ if (ilock_flags & BTRFS_ILOCK_SHARED) {
+ if (ilock_flags & BTRFS_ILOCK_TRY) {
+ if (!inode_trylock_shared(inode))
+ return -EAGAIN;
+ else
+ return 0;
+ }
+ inode_lock_shared(inode);
+ } else {
+ if (ilock_flags & BTRFS_ILOCK_TRY) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ else
+ return 0;
+ }
+ inode_lock(inode);
+ }
+ return 0;
+}
+
+/*
+ * btrfs_inode_unlock - unock inode i_rwsem
+ *
+ * ilock_flags should contain the same bits set as passed to btrfs_inode_lock()
+ * to decide whether the lock acquired is shared or exclusive.
+ */
+void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags)
+{
+ if (ilock_flags & BTRFS_ILOCK_SHARED)
+ inode_unlock_shared(inode);
+ else
+ inode_unlock(inode);
+}
+
+/*
* Cleanup all submitted ordered extents in specified range to handle errors
* from the btrfs_run_delalloc_range() callback.
*
@@ -158,7 +201,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
* no overlapping inline items exist in the btree
*/
static int insert_inline_extent(struct btrfs_trans_handle *trans,
- struct btrfs_path *path, int extent_inserted,
+ struct btrfs_path *path, bool extent_inserted,
struct btrfs_root *root, struct inode *inode,
u64 start, size_t size, size_t compressed_size,
int compress_type,
@@ -179,8 +222,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
if (compressed_size && compressed_pages)
cur_size = compressed_size;
- inode_add_bytes(inode, size);
-
if (!extent_inserted) {
struct btrfs_key key;
size_t datasize;
@@ -190,7 +231,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
key.type = BTRFS_EXTENT_DATA_KEY;
datasize = btrfs_file_extent_calc_inline_size(cur_size);
- path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key,
datasize);
if (ret)
@@ -256,8 +296,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
* could end up racing with unlink.
*/
BTRFS_I(inode)->disk_i_size = inode->i_size;
- ret = btrfs_update_inode(trans, root, inode);
-
fail:
return ret;
}
@@ -273,6 +311,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
int compress_type,
struct page **compressed_pages)
{
+ struct btrfs_drop_extents_args drop_args = { 0 };
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
@@ -283,8 +322,6 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
u64 data_len = inline_len;
int ret;
struct btrfs_path *path;
- int extent_inserted = 0;
- u32 extent_item_size;
if (compressed_size)
data_len = compressed_size;
@@ -310,16 +347,20 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
}
trans->block_rsv = &inode->block_rsv;
+ drop_args.path = path;
+ drop_args.start = start;
+ drop_args.end = aligned_end;
+ drop_args.drop_cache = true;
+ drop_args.replace_extent = true;
+
if (compressed_size && compressed_pages)
- extent_item_size = btrfs_file_extent_calc_inline_size(
+ drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
compressed_size);
else
- extent_item_size = btrfs_file_extent_calc_inline_size(
+ drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
inline_len);
- ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end,
- NULL, 1, 1, extent_item_size,
- &extent_inserted);
+ ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -327,7 +368,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
if (isize > actual_end)
inline_len = min_t(u64, isize, actual_end);
- ret = insert_inline_extent(trans, path, extent_inserted,
+ ret = insert_inline_extent(trans, path, drop_args.extent_inserted,
root, &inode->vfs_inode, start,
inline_len, compressed_size,
compress_type, compressed_pages);
@@ -339,8 +380,17 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
goto out;
}
+ btrfs_update_inode_bytes(inode, inline_len, drop_args.bytes_found);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret && ret != -ENOSPC) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ } else if (ret == -ENOSPC) {
+ ret = 1;
+ goto out;
+ }
+
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
- btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
out:
/*
* Don't forget to free the reserved space, as for inlined extent
@@ -1598,6 +1648,15 @@ next_slot:
goto out_check;
if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
goto out_check;
+
+ /*
+ * The following checks can be expensive, as they need to
+ * take other locks and do btree or rbtree searches, so
+ * release the path to avoid blocking other tasks for too
+ * long.
+ */
+ btrfs_release_path(path);
+
/* If extent is RO, we must COW it */
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
@@ -1673,12 +1732,12 @@ out_check:
cur_offset = extent_end;
if (cur_offset > end)
break;
+ if (!path->nodes[0])
+ continue;
path->slots[0]++;
goto next_slot;
}
- btrfs_release_path(path);
-
/*
* COW range from cow_start to found_key.offset - 1. As the key
* will contain the beginning of the first extent that can be
@@ -2098,6 +2157,8 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
spin_lock(&inode->lock);
ASSERT(inode->new_delalloc_bytes >= len);
inode->new_delalloc_bytes -= len;
+ if (*bits & EXTENT_ADD_INODE_BYTES)
+ inode_add_bytes(&inode->vfs_inode, len);
spin_unlock(&inode->lock);
}
}
@@ -2121,7 +2182,7 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- u64 logical = (u64)bio->bi_iter.bi_sector << 9;
+ u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = 0;
u64 map_length;
int ret;
@@ -2150,11 +2211,9 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
- u64 bio_offset)
+static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
+ u64 dio_file_offset)
{
- struct inode *inode = private_data;
-
return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
}
@@ -2187,7 +2246,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
int skip_sum;
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
- skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+ !fs_info->csum_root;
if (btrfs_is_free_space_inode(BTRFS_I(inode)))
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
@@ -2202,8 +2262,13 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
mirror_num,
bio_flags);
goto out;
- } else if (!skip_sum) {
- ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL);
+ } else {
+ /*
+ * Lookup bio sums does extra checks around whether we
+ * need to csum or not, which is why we ignore skip_sum
+ * here.
+ */
+ ret = btrfs_lookup_bio_sums(inode, bio, NULL);
if (ret)
goto out;
}
@@ -2213,8 +2278,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
goto mapit;
/* we're doing a write, do the async checksumming */
- ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
- 0, inode, btrfs_submit_bio_start);
+ ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
+ 0, btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
@@ -2282,8 +2347,8 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
ret = set_extent_bit(&inode->io_tree, search_start,
search_start + em_len - 1,
- EXTENT_DELALLOC_NEW,
- NULL, cached_state, GFP_NOFS);
+ EXTENT_DELALLOC_NEW, 0, NULL, cached_state,
+ GFP_NOFS, NULL);
next:
search_start = extent_map_end(em);
free_extent_map(em);
@@ -2511,9 +2576,11 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 file_pos,
struct btrfs_file_extent_item *stack_fi,
+ const bool update_inode_bytes,
u64 qgroup_reserved)
{
struct btrfs_root *root = inode->root;
+ const u64 sectorsize = root->fs_info->sectorsize;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
@@ -2521,7 +2588,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
- int extent_inserted = 0;
+ struct btrfs_drop_extents_args drop_args = { 0 };
int ret;
path = btrfs_alloc_path();
@@ -2537,18 +2604,20 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
* the caller is expected to unpin it and allow it to be merged
* with the others.
*/
- ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
- file_pos + num_bytes, NULL, 0,
- 1, sizeof(*stack_fi), &extent_inserted);
+ drop_args.path = path;
+ drop_args.start = file_pos;
+ drop_args.end = file_pos + num_bytes;
+ drop_args.replace_extent = true;
+ drop_args.extent_item_size = sizeof(*stack_fi);
+ ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret)
goto out;
- if (!extent_inserted) {
+ if (!drop_args.extent_inserted) {
ins.objectid = btrfs_ino(inode);
ins.offset = file_pos;
ins.type = BTRFS_EXTENT_DATA_KEY;
- path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &ins,
sizeof(*stack_fi));
if (ret)
@@ -2563,7 +2632,24 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- inode_add_bytes(&inode->vfs_inode, num_bytes);
+ /*
+ * If we dropped an inline extent here, we know the range where it is
+ * was not marked with the EXTENT_DELALLOC_NEW bit, so we update the
+ * number of bytes only for that range contaning the inline extent.
+ * The remaining of the range will be processed when clearning the
+ * EXTENT_DELALLOC_BIT bit through the ordered extent completion.
+ */
+ if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
+ u64 inline_size = round_down(drop_args.bytes_found, sectorsize);
+
+ inline_size = drop_args.bytes_found - inline_size;
+ btrfs_update_inode_bytes(inode, sectorsize, inline_size);
+ drop_args.bytes_found -= inline_size;
+ num_bytes -= sectorsize;
+ }
+
+ if (update_inode_bytes)
+ btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found);
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
@@ -2601,6 +2687,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
{
struct btrfs_file_extent_item stack_fi;
u64 logical_len;
+ bool update_inode_bytes;
memset(&stack_fi, 0, sizeof(stack_fi));
btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
@@ -2616,9 +2703,18 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
/* Encryption and other encoding is reserved and all 0 */
+ /*
+ * For delalloc, when completing an ordered extent we update the inode's
+ * bytes when clearing the range in the inode's io tree, so pass false
+ * as the argument 'update_inode_bytes' to insert_reserved_file_extent(),
+ * except if the ordered extent was truncated.
+ */
+ update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
+ test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
+
return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
oe->file_offset, &stack_fi,
- oe->qgroup_rsv);
+ update_inode_bytes, oe->qgroup_rsv);
}
/*
@@ -2628,11 +2724,11 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
*/
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
{
- struct inode *inode = ordered_extent->inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans = NULL;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_state *cached_state = NULL;
u64 start, end;
int compress_type = 0;
@@ -2640,10 +2736,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
u64 logical_len = ordered_extent->num_bytes;
bool freespace_inode;
bool truncated = false;
- bool range_locked = false;
- bool clear_new_delalloc_bytes = false;
bool clear_reserved_extent = true;
- unsigned int clear_bits;
+ unsigned int clear_bits = EXTENT_DEFRAG;
start = ordered_extent->file_offset;
end = start + ordered_extent->num_bytes - 1;
@@ -2651,16 +2745,16 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
- clear_new_delalloc_bytes = true;
+ clear_bits |= EXTENT_DELALLOC_NEW;
- freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
+ freespace_inode = btrfs_is_free_space_inode(inode);
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
goto out;
}
- btrfs_free_io_failure_record(BTRFS_I(inode), start, end);
+ btrfs_free_io_failure_record(inode, start, end);
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
@@ -2683,14 +2777,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans = NULL;
goto out;
}
- trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+ trans->block_rsv = &inode->block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, ret);
goto out;
}
- range_locked = true;
+ clear_bits |= EXTENT_LOCKED;
lock_extent_bits(io_tree, start, end, &cached_state);
if (freespace_inode)
@@ -2703,13 +2797,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+ trans->block_rsv = &inode->block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compress_type = ordered_extent->compress_type;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compress_type);
- ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
+ ret = btrfs_mark_extent_written(trans, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
logical_len);
@@ -2723,8 +2817,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->disk_num_bytes);
}
}
- unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
- ordered_extent->file_offset,
+ unpin_extent_cache(&inode->extent_tree, ordered_extent->file_offset,
ordered_extent->num_bytes, trans->transid);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
@@ -2737,6 +2830,17 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
+ /*
+ * If this is a new delalloc range, clear its new delalloc flag to
+ * update the inode's number of bytes. This needs to be done first
+ * before updating the inode item.
+ */
+ if ((clear_bits & EXTENT_DELALLOC_NEW) &&
+ !test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
+ clear_extent_bit(&inode->io_tree, start, end,
+ EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
+ 0, 0, &cached_state);
+
btrfs_inode_safe_disk_i_size_write(inode, 0);
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) { /* -ENOMEM or corruption */
@@ -2745,12 +2849,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
}
ret = 0;
out:
- clear_bits = EXTENT_DEFRAG;
- if (range_locked)
- clear_bits |= EXTENT_LOCKED;
- if (clear_new_delalloc_bytes)
- clear_bits |= EXTENT_DELALLOC_NEW;
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits,
+ clear_extent_bit(&inode->io_tree, start, end, clear_bits,
(clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
&cached_state);
@@ -2765,7 +2864,7 @@ out:
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
/* Drop the cache for the part of the extent we didn't write. */
- btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0);
+ btrfs_drop_extent_cache(inode, unwritten_start, end, 0);
/*
* If the ordered extent had an IOERR or something else went
@@ -2800,7 +2899,7 @@ out:
* This needs to be done to make sure anybody waiting knows we are done
* updating everything for this ordered extent.
*/
- btrfs_remove_ordered_extent(BTRFS_I(inode), ordered_extent);
+ btrfs_remove_ordered_extent(inode, ordered_extent);
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -2841,18 +2940,32 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
btrfs_queue_work(wq, &ordered_extent->work);
}
+/*
+ * check_data_csum - verify checksum of one sector of uncompressed data
+ * @inode: inode
+ * @io_bio: btrfs_io_bio which contains the csum
+ * @bio_offset: offset to the beginning of the bio (in bytes)
+ * @page: page where is the data to be verified
+ * @pgoff: offset inside the page
+ *
+ * The length of such check is always one sector size.
+ */
static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
- int icsum, struct page *page, int pgoff, u64 start,
- size_t len)
+ u32 bio_offset, struct page *page, u32 pgoff)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
char *kaddr;
- u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ u32 len = fs_info->sectorsize;
+ const u32 csum_size = fs_info->csum_size;
+ unsigned int offset_sectors;
u8 *csum_expected;
u8 csum[BTRFS_CSUM_SIZE];
- csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
+ ASSERT(pgoff + len <= PAGE_SIZE);
+
+ offset_sectors = bio_offset >> fs_info->sectorsize_bits;
+ csum_expected = ((u8 *)io_bio->csum) + offset_sectors * csum_size;
kaddr = kmap_atomic(page);
shash->tfm = fs_info->csum_shash;
@@ -2865,8 +2978,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
kunmap_atomic(kaddr);
return 0;
zeroit:
- btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
- io_bio->mirror_num);
+ btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff,
+ csum, csum_expected, io_bio->mirror_num);
if (io_bio->device)
btrfs_dev_stat_inc_and_print(io_bio->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
@@ -2877,17 +2990,23 @@ zeroit:
}
/*
- * when reads are done, we need to check csums to verify the data is correct
+ * When reads are done, we need to check csums to verify the data is correct.
* if there's a match, we allow the bio to finish. If not, the code in
* extent_io.c will try to find good copies for us.
+ *
+ * @bio_offset: offset to the beginning of the bio (in bytes)
+ * @start: file offset of the range start
+ * @end: file offset of the range end (inclusive)
+ * @mirror: mirror number
*/
-int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
+int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
struct page *page, u64 start, u64 end, int mirror)
{
- size_t offset = start - page_offset(page);
struct inode *inode = page->mapping->host;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ const u32 sectorsize = root->fs_info->sectorsize;
+ u32 pg_off;
if (PageChecked(page)) {
ClearPageChecked(page);
@@ -2897,15 +3016,27 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
return 0;
+ if (!root->fs_info->csum_root)
+ return 0;
+
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
return 0;
}
- phy_offset >>= inode->i_sb->s_blocksize_bits;
- return check_data_csum(inode, io_bio, phy_offset, page, offset, start,
- (size_t)(end - start + 1));
+ ASSERT(page_offset(page) <= start &&
+ end <= page_offset(page) + PAGE_SIZE - 1);
+ for (pg_off = offset_in_page(start);
+ pg_off < offset_in_page(end);
+ pg_off += sectorsize, bio_offset += sectorsize) {
+ int ret;
+
+ ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off);
+ if (ret < 0)
+ return -EIO;
+ }
+ return 0;
}
/*
@@ -3515,7 +3646,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
* copy everything in the in-memory inode into the btree.
*/
static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
+ struct btrfs_root *root,
+ struct btrfs_inode *inode)
{
struct btrfs_inode_item *inode_item;
struct btrfs_path *path;
@@ -3526,9 +3658,7 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- path->leave_spinning = 1;
- ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
- 1);
+ ret = btrfs_lookup_inode(trans, root, path, &inode->location, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
@@ -3539,9 +3669,9 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
- fill_inode_item(trans, leaf, inode_item, inode);
+ fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
btrfs_mark_buffer_dirty(leaf);
- btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
+ btrfs_set_inode_last_trans(trans, inode);
ret = 0;
failed:
btrfs_free_path(path);
@@ -3552,7 +3682,8 @@ failed:
* copy everything in the in-memory inode into the btree.
*/
noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
+ struct btrfs_root *root,
+ struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -3564,23 +3695,22 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* The data relocation inode should also be directly updated
* without delay
*/
- if (!btrfs_is_free_space_inode(BTRFS_I(inode))
+ if (!btrfs_is_free_space_inode(inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
btrfs_update_root_times(trans, root);
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
- btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
+ btrfs_set_inode_last_trans(trans, inode);
return ret;
}
return btrfs_update_inode_item(trans, root, inode);
}
-noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode)
+int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_inode *inode)
{
int ret;
@@ -3615,7 +3745,6 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
goto out;
}
- path->leave_spinning = 1;
di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
name, name_len, -1);
if (IS_ERR_OR_NULL(di)) {
@@ -3695,7 +3824,7 @@ err:
inode_inc_iversion(&dir->vfs_inode);
inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
- ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
+ ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
}
@@ -3709,7 +3838,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
drop_nlink(&inode->vfs_inode);
- ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, inode);
}
return ret;
}
@@ -3858,7 +3987,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
inode_inc_iversion(dir);
dir->i_mtime = dir->i_ctime = current_time(dir);
- ret = btrfs_update_inode_fallback(trans, root, dir);
+ ret = btrfs_update_inode_fallback(trans, root, BTRFS_I(dir));
if (ret)
btrfs_abort_transaction(trans, ret);
out:
@@ -3995,7 +4124,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
struct btrfs_block_rsv block_rsv;
u64 root_flags;
int ret;
- int err;
/*
* Don't allow to delete a subvolume with send in progress. This is
@@ -4017,8 +4145,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
down_write(&fs_info->subvol_sem);
- err = may_destroy_subvol(dest);
- if (err)
+ ret = may_destroy_subvol(dest);
+ if (ret)
goto out_up_write;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
@@ -4027,13 +4155,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
* two for dir entries,
* two for root ref/backref.
*/
- err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
- if (err)
+ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+ if (ret)
goto out_up_write;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
goto out_release;
}
trans->block_rsv = &block_rsv;
@@ -4043,7 +4171,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
ret = btrfs_unlink_subvol(trans, dir, dentry);
if (ret) {
- err = ret;
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -4052,7 +4179,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
memset(&dest->root_item.drop_progress, 0,
sizeof(dest->root_item.drop_progress));
- dest->root_item.drop_level = 0;
+ btrfs_set_root_drop_level(&dest->root_item, 0);
btrfs_set_root_refs(&dest->root_item, 0);
if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
@@ -4061,7 +4188,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
}
@@ -4071,7 +4197,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
@@ -4081,7 +4206,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
}
@@ -4092,14 +4216,12 @@ out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
ret = btrfs_end_transaction(trans);
- if (ret && !err)
- err = ret;
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
out_up_write:
up_write(&fs_info->subvol_sem);
- if (err) {
+ if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
@@ -4109,15 +4231,9 @@ out_up_write:
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
-
- /* the last ref */
- if (dest->ino_cache_inode) {
- iput(dest->ino_cache_inode);
- dest->ino_cache_inode = NULL;
- }
}
- return err;
+ return ret;
}
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -4194,7 +4310,7 @@ out:
*/
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct inode *inode,
+ struct btrfs_inode *inode,
u64 new_size, u32 min_type)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4215,7 +4331,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int pending_del_slot = 0;
int extent_type = -1;
int ret;
- u64 ino = btrfs_ino(BTRFS_I(inode));
+ u64 ino = btrfs_ino(inode);
u64 bytes_deleted = 0;
bool be_nice = false;
bool should_throttle = false;
@@ -4229,7 +4345,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* off from time to time. This means all inodes in subvolume roots,
* reloc roots, and data reloc roots.
*/
- if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
+ if (!btrfs_is_free_space_inode(inode) &&
test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
be_nice = true;
@@ -4239,7 +4355,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
path->reada = READA_BACK;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
+ lock_extent_bits(&inode->io_tree, lock_start, (u64)-1,
&cached_state);
/*
@@ -4247,7 +4363,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* new size is not block aligned since we will be keeping the
* last block of the extent just the way it is.
*/
- btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
+ btrfs_drop_extent_cache(inode, ALIGN(new_size,
fs_info->sectorsize),
(u64)-1, 0);
}
@@ -4258,8 +4374,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* it is used to drop the logged items. So we shouldn't kill the delayed
* items.
*/
- if (min_type == 0 && root == BTRFS_I(inode)->root)
- btrfs_kill_delayed_inode_items(BTRFS_I(inode));
+ if (min_type == 0 && root == inode->root)
+ btrfs_kill_delayed_inode_items(inode);
key.objectid = ino;
key.offset = (u64)-1;
@@ -4315,14 +4431,13 @@ search_again:
btrfs_file_extent_num_bytes(leaf, fi);
trace_btrfs_truncate_show_fi_regular(
- BTRFS_I(inode), leaf, fi,
- found_key.offset);
+ inode, leaf, fi, found_key.offset);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
item_end += btrfs_file_extent_ram_bytes(leaf,
fi);
trace_btrfs_truncate_show_fi_inline(
- BTRFS_I(inode), leaf, fi, path->slots[0],
+ inode, leaf, fi, path->slots[0],
found_key.offset);
}
item_end--;
@@ -4361,7 +4476,8 @@ search_again:
if (test_bit(BTRFS_ROOT_SHAREABLE,
&root->state) &&
extent_start != 0)
- inode_sub_bytes(inode, num_dec);
+ inode_sub_bytes(&inode->vfs_inode,
+ num_dec);
btrfs_mark_buffer_dirty(leaf);
} else {
extent_num_bytes =
@@ -4376,7 +4492,8 @@ search_again:
found_extent = 1;
if (test_bit(BTRFS_ROOT_SHAREABLE,
&root->state))
- inode_sub_bytes(inode, num_dec);
+ inode_sub_bytes(&inode->vfs_inode,
+ num_dec);
}
}
clear_len = num_dec;
@@ -4411,7 +4528,8 @@ search_again:
}
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
- inode_sub_bytes(inode, item_end + 1 - new_size);
+ inode_sub_bytes(&inode->vfs_inode,
+ item_end + 1 - new_size);
}
delete:
/*
@@ -4419,8 +4537,8 @@ delete:
* multiple fsyncs, and in this case we don't want to clear the
* file extent range because it's just the log.
*/
- if (root == BTRFS_I(inode)->root) {
- ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
+ if (root == inode->root) {
+ ret = btrfs_inode_clear_file_extent_range(inode,
clear_start, clear_len);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -4529,8 +4647,8 @@ out:
if (!ret && last_size > new_size)
last_size = new_size;
btrfs_inode_safe_disk_i_size_write(inode, last_size);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
- (u64)-1, &cached_state);
+ unlock_extent_cached(&inode->io_tree, lock_start, (u64)-1,
+ &cached_state);
}
btrfs_free_path(path);
@@ -4548,12 +4666,12 @@ out:
* This will find the block for the "from" offset and cow the block and zero the
* part we want to zero. This is used with truncate and hole punching.
*/
-int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
- int front)
+int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
+ int front)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct address_space *mapping = inode->i_mapping;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
+ struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
@@ -4576,30 +4694,29 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
block_start = round_down(from, blocksize);
block_end = block_start + blocksize - 1;
- ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
- block_start, blocksize);
+ ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
+ blocksize);
if (ret < 0) {
- if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start,
- &write_bytes) > 0) {
+ if (btrfs_check_nocow_lock(inode, block_start, &write_bytes) > 0) {
/* For nocow case, no need to reserve data space */
only_release_metadata = true;
} else {
goto out;
}
}
- ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
+ ret = btrfs_delalloc_reserve_metadata(inode, blocksize);
if (ret < 0) {
if (!only_release_metadata)
- btrfs_free_reserved_data_space(BTRFS_I(inode