summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-24 17:36:12 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-24 17:36:12 +0100
commitfe0142df648f5478f410c41e01771b90b9793215 (patch)
tree54ae4995afa9f753ce0fb9159e29bd6299a84238 /fs
parentbfd93a87eadb03499a5ff02dfebfaf515310d27c (diff)
parent96987eea537d6ccd98704a71958f9ba02da80843 (diff)
Merge tag 'xfs-4.20-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pul xfs updates from Dave Chinner: "There's not a huge amount of change in this cycle - Darrick has been out of action for a couple of months (hence me sending the last few pull requests), so we decided a quiet cycle mainly focussed on bug fixes was a good idea. Darrick will take the helm again at the end of this merge window. FYI, I may be sending another update later in the cycle - there's a pending rework of the clone/dedupe_file_range code that fixes numerous bugs that is spread amongst the VFS, XFS and ocfs2 code. It has been reviewed and tested, Al and I just need to work out the details of the merge, so it may come from him rather than me. Summary: - only support filesystems with unwritten extents - add definition for statfs XFS magic number - remove unused parameters around reflink code - more debug for dangling delalloc extents - cancel COW extents on extent swap targets - fix quota stats output and clean up the code - refactor some of the attribute code in preparation for parent pointers - fix several buffer handling bugs" * tag 'xfs-4.20-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (21 commits) xfs: cancel COW blocks before swapext xfs: clear ail delwri queued bufs on unmount of shutdown fs xfs: use offsetof() in place of offset macros for __xfsstats xfs: Fix xqmstats offsets in /proc/fs/xfs/xqmstat xfs: fix use-after-free race in xfs_buf_rele xfs: Add attibute remove and helper functions xfs: Add attibute set and helper functions xfs: Add helper function xfs_attr_try_sf_addname xfs: Move fs/xfs/xfs_attr.h to fs/xfs/libxfs/xfs_attr.h xfs: issue log message on user force shutdown xfs: fix buffer state management in xrep_findroot_block xfs: always assign buffer verifiers when one is provided xfs: xrep_findroot_block should reject root blocks with siblings xfs: add a define for statfs magic to uapi xfs: print dangling delalloc extents xfs: fix fork selection in xfs_find_trim_cow_extent xfs: remove the unused trimmed argument from xfs_reflink_trim_around_shared xfs: remove the unused shared argument to xfs_reflink_reserve_cow xfs: handle zeroing in xfs_file_iomap_begin_delay xfs: remove suport for filesystems without unwritten extent flag ...
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/libxfs/xfs_attr.c236
-rw-r--r--fs/xfs/libxfs/xfs_attr.h (renamed from fs/xfs/xfs_attr.h)2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c70
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h1
-rw-r--r--fs/xfs/libxfs/xfs_format.h8
-rw-r--r--fs/xfs/libxfs/xfs_sb.c5
-rw-r--r--fs/xfs/scrub/repair.c128
-rw-r--r--fs/xfs/scrub/scrub.c13
-rw-r--r--fs/xfs/xfs_aops.c4
-rw-r--r--fs/xfs/xfs_aops.h14
-rw-r--r--fs/xfs/xfs_bmap_util.c61
-rw-r--r--fs/xfs/xfs_buf.c109
-rw-r--r--fs/xfs/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_fsops.c50
-rw-r--r--fs/xfs/xfs_ioctl.c8
-rw-r--r--fs/xfs/xfs_iomap.c53
-rw-r--r--fs/xfs/xfs_reflink.c33
-rw-r--r--fs/xfs/xfs_reflink.h4
-rw-r--r--fs/xfs/xfs_stats.c52
-rw-r--r--fs/xfs/xfs_stats.h28
-rw-r--r--fs/xfs/xfs_super.c38
-rw-r--r--fs/xfs/xfs_trans.h1
-rw-r--r--fs/xfs/xfs_trans_ail.c28
-rw-r--r--fs/xfs/xfs_trans_buf.c42
24 files changed, 607 insertions, 383 deletions
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index c6299f82a6e4..844ed87b1900 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -191,6 +191,128 @@ xfs_attr_calc_size(
return nblks;
}
+STATIC int
+xfs_attr_try_sf_addname(
+ struct xfs_inode *dp,
+ struct xfs_da_args *args)
+{
+
+ struct xfs_mount *mp = dp->i_mount;
+ int error, error2;
+
+ error = xfs_attr_shortform_addname(args);
+ if (error == -ENOSPC)
+ return error;
+
+ /*
+ * Commit the shortform mods, and we're done.
+ * NOTE: this is also the error path (EEXIST, etc).
+ */
+ if (!error && (args->flags & ATTR_KERNOTIME) == 0)
+ xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
+
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(args->trans);
+
+ error2 = xfs_trans_commit(args->trans);
+ args->trans = NULL;
+ return error ? error : error2;
+}
+
+/*
+ * Set the attribute specified in @args.
+ */
+int
+xfs_attr_set_args(
+ struct xfs_da_args *args,
+ struct xfs_buf **leaf_bp)
+{
+ struct xfs_inode *dp = args->dp;
+ int error;
+
+ /*
+ * If the attribute list is non-existent or a shortform list,
+ * upgrade it to a single-leaf-block attribute list.
+ */
+ if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
+ (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+ dp->i_d.di_anextents == 0)) {
+
+ /*
+ * Build initial attribute list (if required).
+ */
+ if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
+ xfs_attr_shortform_create(args);
+
+ /*
+ * Try to add the attr to the attribute list in the inode.
+ */
+ error = xfs_attr_try_sf_addname(dp, args);
+ if (error != -ENOSPC)
+ return error;
+
+ /*
+ * It won't fit in the shortform, transform to a leaf block.
+ * GROT: another possible req'mt for a double-split btree op.
+ */
+ error = xfs_attr_shortform_to_leaf(args, leaf_bp);
+ if (error)
+ return error;
+
+ /*
+ * Prevent the leaf buffer from being unlocked so that a
+ * concurrent AIL push cannot grab the half-baked leaf
+ * buffer and run into problems with the write verifier.
+ */
+ xfs_trans_bhold(args->trans, *leaf_bp);
+
+ error = xfs_defer_finish(&args->trans);
+ if (error)
+ return error;
+
+ /*
+ * Commit the leaf transformation. We'll need another
+ * (linked) transaction to add the new attribute to the
+ * leaf.
+ */
+ error = xfs_trans_roll_inode(&args->trans, dp);
+ if (error)
+ return error;
+ xfs_trans_bjoin(args->trans, *leaf_bp);
+ *leaf_bp = NULL;
+ }
+
+ if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+ error = xfs_attr_leaf_addname(args);
+ else
+ error = xfs_attr_node_addname(args);
+ return error;
+}
+
+/*
+ * Remove the attribute specified in @args.
+ */
+int
+xfs_attr_remove_args(
+ struct xfs_da_args *args)
+{
+ struct xfs_inode *dp = args->dp;
+ int error;
+
+ if (!xfs_inode_hasattr(dp)) {
+ error = -ENOATTR;
+ } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+ ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
+ error = xfs_attr_shortform_remove(args);
+ } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
+ error = xfs_attr_leaf_removename(args);
+ } else {
+ error = xfs_attr_node_removename(args);
+ }
+
+ return error;
+}
+
int
xfs_attr_set(
struct xfs_inode *dp,
@@ -204,7 +326,7 @@ xfs_attr_set(
struct xfs_da_args args;
struct xfs_trans_res tres;
int rsvd = (flags & ATTR_ROOT) != 0;
- int error, err2, local;
+ int error, local;
XFS_STATS_INC(mp, xs_attr_set);
@@ -255,93 +377,17 @@ xfs_attr_set(
error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
- if (error) {
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
- xfs_trans_cancel(args.trans);
- return error;
- }
+ if (error)
+ goto out_trans_cancel;
xfs_trans_ijoin(args.trans, dp, 0);
-
- /*
- * If the attribute list is non-existent or a shortform list,
- * upgrade it to a single-leaf-block attribute list.
- */
- if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
-
- /*
- * Build initial attribute list (if required).
- */
- if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
- xfs_attr_shortform_create(&args);
-
- /*
- * Try to add the attr to the attribute list in
- * the inode.
- */
- error = xfs_attr_shortform_addname(&args);
- if (error != -ENOSPC) {
- /*
- * Commit the shortform mods, and we're done.
- * NOTE: this is also the error path (EEXIST, etc).
- */
- ASSERT(args.trans != NULL);
-
- /*
- * If this is a synchronous mount, make sure that
- * the transaction goes to disk before returning
- * to the user.
- */
- if (mp->m_flags & XFS_MOUNT_WSYNC)
- xfs_trans_set_sync(args.trans);
-
- if (!error && (flags & ATTR_KERNOTIME) == 0) {
- xfs_trans_ichgtime(args.trans, dp,
- XFS_ICHGTIME_CHG);
- }
- err2 = xfs_trans_commit(args.trans);
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
- return error ? error : err2;
- }
-
- /*
- * It won't fit in the shortform, transform to a leaf block.
- * GROT: another possible req'mt for a double-split btree op.
- */
- error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
- if (error)
- goto out;
- /*
- * Prevent the leaf buffer from being unlocked so that a
- * concurrent AIL push cannot grab the half-baked leaf
- * buffer and run into problems with the write verifier.
- */
- xfs_trans_bhold(args.trans, leaf_bp);
- error = xfs_defer_finish(&args.trans);
- if (error)
- goto out;
-
- /*
- * Commit the leaf transformation. We'll need another (linked)
- * transaction to add the new attribute to the leaf, which
- * means that we have to hold & join the leaf buffer here too.
- */
- error = xfs_trans_roll_inode(&args.trans, dp);
- if (error)
- goto out;
- xfs_trans_bjoin(args.trans, leaf_bp);
- leaf_bp = NULL;
- }
-
- if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
- error = xfs_attr_leaf_addname(&args);
- else
- error = xfs_attr_node_addname(&args);
+ error = xfs_attr_set_args(&args, &leaf_bp);
if (error)
- goto out;
+ goto out_release_leaf;
+ if (!args.trans) {
+ /* shortform attribute has already been committed */
+ goto out_unlock;
+ }
/*
* If this is a synchronous mount, make sure that the
@@ -358,17 +404,17 @@ xfs_attr_set(
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(args.trans);
+out_unlock:
xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
return error;
-out:
+out_release_leaf:
if (leaf_bp)
xfs_trans_brelse(args.trans, leaf_bp);
+out_trans_cancel:
if (args.trans)
xfs_trans_cancel(args.trans);
- xfs_iunlock(dp, XFS_ILOCK_EXCL);
- return error;
+ goto out_unlock;
}
/*
@@ -423,17 +469,7 @@ xfs_attr_remove(
*/
xfs_trans_ijoin(args.trans, dp, 0);
- if (!xfs_inode_hasattr(dp)) {
- error = -ENOATTR;
- } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
- ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
- error = xfs_attr_shortform_remove(&args);
- } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
- error = xfs_attr_leaf_removename(&args);
- } else {
- error = xfs_attr_node_removename(&args);
- }
-
+ error = xfs_attr_remove_args(&args);
if (error)
goto out;
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 033ff8c478e2..bdf52a333f3f 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -140,7 +140,9 @@ int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
unsigned char *value, int *valuelenp, int flags);
int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
unsigned char *value, int valuelen, int flags);
+int xfs_attr_set_args(struct xfs_da_args *args, struct xfs_buf **leaf_bp);
int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
+int xfs_attr_remove_args(struct xfs_da_args *args);
int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
int flags, struct attrlist_cursor_kern *cursor);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a47670332326..74d7228e755b 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1019,6 +1019,34 @@ xfs_bmap_add_attrfork_local(
return -EFSCORRUPTED;
}
+/* Set an inode attr fork off based on the format */
+int
+xfs_bmap_set_attrforkoff(
+ struct xfs_inode *ip,
+ int size,
+ int *version)
+{
+ switch (ip->i_d.di_format) {
+ case XFS_DINODE_FMT_DEV:
+ ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
+ break;
+ case XFS_DINODE_FMT_LOCAL:
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
+ if (!ip->i_d.di_forkoff)
+ ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
+ else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
+ *version = 2;
+ break;
+ default:
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/*
* Convert inode from non-attributed to attributed.
* Must not be in a transaction, ip must not be locked.
@@ -1070,26 +1098,9 @@ xfs_bmap_add_attrfork(
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- switch (ip->i_d.di_format) {
- case XFS_DINODE_FMT_DEV:
- ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
- break;
- case XFS_DINODE_FMT_LOCAL:
- case XFS_DINODE_FMT_EXTENTS:
- case XFS_DINODE_FMT_BTREE:
- ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
- if (!ip->i_d.di_forkoff)
- ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
- else if (mp->m_flags & XFS_MOUNT_ATTR2)
- version = 2;
- break;
- default:
- ASSERT(0);
- error = -EINVAL;
+ error = xfs_bmap_set_attrforkoff(ip, size, &version);
+ if (error)
goto trans_cancel;
- }
-
ASSERT(ip->i_afp == NULL);
ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
ip->i_afp->if_flags = XFS_IFEXTENTS;
@@ -4081,8 +4092,7 @@ xfs_bmapi_allocate(
* extents to real extents when we're about to write the data.
*/
if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
- (bma->flags & XFS_BMAPI_PREALLOC) &&
- xfs_sb_version_hasextflgbit(&mp->m_sb))
+ (bma->flags & XFS_BMAPI_PREALLOC))
bma->got.br_state = XFS_EXT_UNWRITTEN;
if (bma->wasdel)
@@ -5245,8 +5255,7 @@ __xfs_bunmapi(
* unmapping part of it. But we can't really
* get rid of part of a realtime extent.
*/
- if (del.br_state == XFS_EXT_UNWRITTEN ||
- !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+ if (del.br_state == XFS_EXT_UNWRITTEN) {
/*
* This piece is unwritten, or we're not
* using unwritten extents. Skip over it.
@@ -5296,10 +5305,9 @@ __xfs_bunmapi(
del.br_blockcount -= mod;
del.br_startoff += mod;
del.br_startblock += mod;
- } else if ((del.br_startoff == start &&
- (del.br_state == XFS_EXT_UNWRITTEN ||
- tp->t_blk_res == 0)) ||
- !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+ } else if (del.br_startoff == start &&
+ (del.br_state == XFS_EXT_UNWRITTEN ||
+ tp->t_blk_res == 0)) {
/*
* Can't make it unwritten. There isn't
* a full extent here so just skip it.
@@ -6114,11 +6122,7 @@ xfs_bmap_validate_extent(
XFS_FSB_TO_AGNO(mp, endfsb))
return __this_address;
}
- if (irec->br_state != XFS_EXT_NORM) {
- if (whichfork != XFS_DATA_FORK)
- return __this_address;
- if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
- return __this_address;
- }
+ if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
+ return __this_address;
return NULL;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b6e9b639e731..488dc8860fd7 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -183,6 +183,7 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
+int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, struct xfs_owner_info *oinfo,
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index afbe336600e1..9995d5ae380b 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -287,6 +287,8 @@ static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp)
{
if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
return false;
+ if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
+ return false;
/* check for unknown features in the fs */
if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
@@ -357,12 +359,6 @@ static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp)
(sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
}
-static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp)
-{
- return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
- (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
-}
-
static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp)
{
return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 081f46e30556..b5a82acd7dfe 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1115,7 +1115,8 @@ xfs_fs_geometry(
geo->version = XFS_FSOP_GEOM_VERSION;
geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
- XFS_FSOP_GEOM_FLAGS_DIRV2;
+ XFS_FSOP_GEOM_FLAGS_DIRV2 |
+ XFS_FSOP_GEOM_FLAGS_EXTFLG;
if (xfs_sb_version_hasattr(sbp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
if (xfs_sb_version_hasquota(sbp))
@@ -1124,8 +1125,6 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN;
if (xfs_sb_version_hasdalign(sbp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN;
- if (xfs_sb_version_hasextflgbit(sbp))
- geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG;
if (xfs_sb_version_hassector(sbp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
if (xfs_sb_version_hasasciici(sbp))
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 9f08dd9bf1d5..4fc0a5ea7673 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -29,6 +29,8 @@
#include "xfs_ag_resv.h"
#include "xfs_trans_space.h"
#include "xfs_quota.h"
+#include "xfs_attr.h"
+#include "xfs_reflink.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -692,13 +694,14 @@ xrep_findroot_block(
struct xrep_find_ag_btree *fab,
uint64_t owner,
xfs_agblock_t agbno,
- bool *found_it)
+ bool *done_with_block)
{
struct xfs_mount *mp = ri->sc->mp;
struct xfs_buf *bp;
struct xfs_btree_block *btblock;
xfs_daddr_t daddr;
- int error;
+ int block_level;
+ int error = 0;
daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno);
@@ -717,36 +720,111 @@ xrep_findroot_block(
return error;
}
+ /*
+ * Read the buffer into memory so that we can see if it's a match for
+ * our btree type. We have no clue if it is beforehand, and we want to
+ * avoid xfs_trans_read_buf's behavior of dumping the DONE state (which
+ * will cause needless disk reads in subsequent calls to this function)
+ * and logging metadata verifier failures.
+ *
+ * Therefore, pass in NULL buffer ops. If the buffer was already in
+ * memory from some other caller it will already have b_ops assigned.
+ * If it was in memory from a previous unsuccessful findroot_block
+ * call, the buffer won't have b_ops but it should be clean and ready
+ * for us to try to verify if the read call succeeds. The same applies
+ * if the buffer wasn't in memory at all.
+ *
+ * Note: If we never match a btree type with this buffer, it will be
+ * left in memory with NULL b_ops. This shouldn't be a problem unless
+ * the buffer gets written.
+ */
error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr,
mp->m_bsize, 0, &bp, NULL);
if (error)
return error;
- /*
- * Does this look like a block matching our fs and higher than any
- * other block we've found so far? If so, reattach buffer verifiers
- * so the AIL won't complain if the buffer is also dirty.
- */
+ /* Ensure the block magic matches the btree type we're looking for. */
btblock = XFS_BUF_TO_BLOCK(bp);
if (be32_to_cpu(btblock->bb_magic) != fab->magic)
goto out;
- if (xfs_sb_version_hascrc(&mp->m_sb) &&
- !uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- goto out;
- bp->b_ops = fab->buf_ops;
- /* Ignore this block if it's lower in the tree than we've seen. */
- if (fab->root != NULLAGBLOCK &&
- xfs_btree_get_level(btblock) < fab->height)
- goto out;
+ /*
+ * If the buffer already has ops applied and they're not the ones for
+ * this btree type, we know this block doesn't match the btree and we
+ * can bail out.
+ *
+ * If the buffer ops match ours, someone else has already validated
+ * the block for us, so we can move on to checking if this is a root
+ * block candidate.
+ *
+ * If the buffer does not have ops, nobody has successfully validated
+ * the contents and the buffer cannot be dirty. If the magic, uuid,
+ * and structure match this btree type then we'll move on to checking
+ * if it's a root block candidate. If there is no match, bail out.
+ */
+ if (bp->b_ops) {
+ if (bp->b_ops != fab->buf_ops)
+ goto out;
+ } else {
+ ASSERT(!xfs_trans_buf_is_dirty(bp));
+ if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
+ &mp->m_sb.sb_meta_uuid))
+ goto out;
+ fab->buf_ops->verify_read(bp);
+ if (bp->b_error) {
+ bp->b_error = 0;
+ goto out;
+ }
- /* Make sure we pass the verifiers. */
- bp->b_ops->verify_read(bp);
- if (bp->b_error)
+ /*
+ * Some read verifiers will (re)set b_ops, so we must be
+ * careful not to blow away any such assignment.
+ */
+ if (!bp->b_ops)
+ bp->b_ops = fab->buf_ops;
+ }
+
+ /*
+ * This block passes the magic/uuid and verifier tests for this btree
+ * type. We don't need the caller to try the other tree types.
+ */
+ *done_with_block = true;
+
+ /*
+ * Compare this btree block's level to the height of the current
+ * candidate root block.
+ *
+ * If the level matches the root we found previously, throw away both
+ * blocks because there can't be two candidate roots.
+ *
+ * If level is lower in the tree than the root we found previously,
+ * ignore this block.
+ */
+ block_level = xfs_btree_get_level(btblock);
+ if (block_level + 1 == fab->height) {
+ fab->root = NULLAGBLOCK;
goto out;
- fab->root = agbno;
- fab->height = xfs_btree_get_level(btblock) + 1;
- *found_it = true;
+ } else if (block_level < fab->height) {
+ goto out;
+ }
+
+ /*
+ * This is the highest block in the tree that we've found so far.
+ * Update the btree height to reflect what we've learned from this
+ * block.
+ */
+ fab->height = block_level + 1;
+
+ /*
+ * If this block doesn't have sibling pointers, then it's the new root
+ * block candidate. Otherwise, the root will be found farther up the
+ * tree.
+ */
+ if (btblock->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) &&
+ btblock->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
+ fab->root = agbno;
+ else
+ fab->root = NULLAGBLOCK;
trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno,
be32_to_cpu(btblock->bb_magic), fab->height - 1);
@@ -768,7 +846,7 @@ xrep_findroot_rmap(
struct xrep_findroot *ri = priv;
struct xrep_find_ag_btree *fab;
xfs_agblock_t b;
- bool found_it;
+ bool done;
int error = 0;
/* Ignore anything that isn't AG metadata. */
@@ -777,16 +855,16 @@ xrep_findroot_rmap(
/* Otherwise scan each block + btree type. */
for (b = 0; b < rec->rm_blockcount; b++) {
- found_it = false;
+ done = false;
for (fab = ri->btree_info; fab->buf_ops; fab++) {
if (rec->rm_owner != fab->rmap_owner)
continue;
error = xrep_findroot_block(ri, fab,
rec->rm_owner, rec->rm_startblock + b,
- &found_it);
+ &done);
if (error)
return error;
- if (found_it)
+ if (done)
break;
}
}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 4bfae1e61d30..1b2344d00525 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -412,19 +412,6 @@ xchk_validate_inputs(
goto out;
}
- error = -EOPNOTSUPP;
- /*
- * We won't scrub any filesystem that doesn't have the ability
- * to record unwritten extents. The option was made default in
- * 2003, removed from mkfs in 2007, and cannot be disabled in
- * v5, so if we find a filesystem without this flag it's either
- * really old or totally unsupported. Avoid it either way.
- * We also don't support v1-v3 filesystems, which aren't
- * mountable.
- */
- if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
- goto out;
-
/*
* We only want to repair read-write v5+ filesystems. Defer the check
* for ops->repair until after our scrub confirms that we need to
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 49f5f5896a43..338b9d9984e0 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -917,7 +917,7 @@ xfs_vm_writepage(
struct writeback_control *wbc)
{
struct xfs_writepage_ctx wpc = {
- .io_type = XFS_IO_INVALID,
+ .io_type = XFS_IO_HOLE,
};
int ret;
@@ -933,7 +933,7 @@ xfs_vm_writepages(
struct writeback_control *wbc)
{
struct xfs_writepage_ctx wpc = {
- .io_type = XFS_IO_INVALID,
+ .io_type = XFS_IO_HOLE,
};
int ret;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 9af867951a10..494b4338446e 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -12,21 +12,19 @@ extern struct bio_set xfs_ioend_bioset;
* Types of I/O for bmap clustering and I/O completion tracking.
*/
enum {
- XFS_IO_INVALID, /* initial state */
+ XFS_IO_HOLE, /* covers region without any block allocation */
XFS_IO_DELALLOC, /* covers delalloc region */
XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
XFS_IO_OVERWRITE, /* covers already allocated extent */
XFS_IO_COW, /* covers copy-on-write extent */
- XFS_IO_HOLE, /* covers region without any block allocation */
};
#define XFS_IO_TYPES \
- { XFS_IO_INVALID, "invalid" }, \
- { XFS_IO_DELALLOC, "delalloc" }, \
- { XFS_IO_UNWRITTEN, "unwritten" }, \
- { XFS_IO_OVERWRITE, "overwrite" }, \
- { XFS_IO_COW, "CoW" }, \
- { XFS_IO_HOLE, "hole" }
+ { XFS_IO_HOLE, "hole" }, \
+ { XFS_IO_DELALLOC, "delalloc" }, \
+ { XFS_IO_UNWRITTEN, "unwritten" }, \
+ { XFS_IO_OVERWRITE, "overwrite" }, \
+ { XFS_IO_COW, "CoW" }
/*
* Structure for buffered I/O completions.
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 6de8d90041ff..5d263dfdb3bc 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -406,10 +406,10 @@ xfs_getbmap_report_one(
struct xfs_bmbt_irec *got)
{
struct kgetbmap *p = out + bmv->bmv_entries;
- bool shared = false, trimmed = false;
+ bool shared = false;
int error;
- error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
+ error = xfs_reflink_trim_around_shared(ip, got, &shared);
if (error)
return error;
@@ -1043,44 +1043,6 @@ out_trans_cancel:
}
static int
-xfs_adjust_extent_unmap_boundaries(
- struct xfs_inode *ip,
- xfs_fileoff_t *startoffset_fsb,
- xfs_fileoff_t *endoffset_fsb)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_bmbt_irec imap;
- int nimap, error;
- xfs_extlen_t mod = 0;
-
- nimap = 1;
- error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
- if (error)
- return error;
-
- if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
- div_u64_rem(imap.br_startblock, mp->m_sb.sb_rextsize, &mod);
- if (mod)
- *startoffset_fsb += mp->m_sb.sb_rextsize - mod;
- }
-
- nimap = 1;
- error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
- if (error)
- return error;
-
- if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
- mod++;
- if (mod && mod != mp->m_sb.sb_rextsize)
- *endoffset_fsb -= mod;
- }
-
- return 0;
-}
-
-static int
xfs_flush_unmap_range(
struct xfs_inode *ip,
xfs_off_t offset,
@@ -1133,19 +1095,8 @@ xfs_free_file_space(
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
/*
- * Need to zero the stuff we're not freeing, on disk. If it's a RT file
- * and we can't use unwritten extents then we actually need to ensure
- * to zero the whole extent, otherwise we just need to take of block
- * boundaries, and xfs_bunmapi will handle the rest.
+ * Need to zero the stuff we're not freeing, on disk.
*/
- if (XFS_IS_REALTIME_INODE(ip) &&
- !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
- error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
- &endoffset_fsb);
- if (error)
- return error;
- }
-
if (endoffset_fsb > startoffset_fsb) {
while (!done) {
error = xfs_unmap_extent(ip, startoffset_fsb,
@@ -1824,6 +1775,12 @@ xfs_swap_extents(
if (error)
goto out_unlock;
+ if (xfs_inode_has_cow_data(tip)) {
+ error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
+ if (error)
+ return error;
+ }
+
/*
* Extent "swapping" with rmap requires a permanent reservation and
* a block reservation because it's really just a remap operation
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e839907e8492..b21ea2ba768d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -37,6 +37,32 @@ static kmem_zone_t *xfs_buf_zone;
#define xb_to_gfp(flags) \
((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
+/*
+ * Locking orders
+ *
+ * xfs_buf_ioacct_inc:
+ * xfs_buf_ioacct_dec:
+ * b_sema (caller holds)
+ * b_lock
+ *
+ * xfs_buf_stale:
+ * b_sema (caller holds)
+ * b_lock
+ * lru_lock
+ *
+ * xfs_buf_rele:
+ * b_lock
+ * pag_buf_lock
+ * lru_lock
+ *
+ * xfs_buftarg_wait_rele
+ * lru_lock
+ * b_lock (trylock due to inversion)
+ *
+ * xfs_buftarg_isolate