summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-04 12:44:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-04 12:44:02 -0700
commit547c43d777968228b1060b6f1b152b96215eb7b2 (patch)
tree3e256530397ec1e751d06ed23230bfe1daf4886c
parent2e08edc5c50a01dc52c005fd939c24476eaf55ef (diff)
parentdc1baa715bbfbb1902da942d06497e79b40e7bc7 (diff)
Merge tag 'xfs-4.17-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "Here's the first round of fixes for XFS for 4.17. The biggest new features this time around are the addition of lazytime support, further enhancement of the on-disk inode metadata verifiers, and a patch to smooth over some of the AGFL padding problems that have intermittently plagued users since 4.5. I forsee sending a second pull request next week with further bug fixes and speedups in the online scrub code and elsewhere. This series has been run through a full xfstests run over the weekend and through a quick xfstests run against this morning's master, with no major failures reported. Summary of changes for this release: - Various cleanups and code fixes - Implement lazytime as a mount option - Convert various on-disk metadata checks from asserts to -EFSCORRUPTED - Fix accounting problems with the rmap per-ag reservations - Refactorings and cleanups for xfs_log_force - Various bugfixes for the reflink code - Work around v5 AGFL padding problems to prevent fs shutdowns - Establish inode fork verifiers to inspect on-disk metadata correctness - Various online scrub fixes - Fix v5 swapext blowing up on deleted inodes" * tag 'xfs-4.17-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (49 commits) xfs: do not log/recover swapext extent owner changes for deleted inodes xfs: clean up xfs_mount allocation and dynamic initializers xfs: remove dead inode version setting code xfs: catch inode allocation state mismatch corruption xfs: xfs_scrub_iallocbt_xref_rmap_inodes should use xref_set_corrupt xfs: flag inode corruption if parent ptr doesn't get us a real inode xfs: don't accept inode buffers with suspicious unlinked chains xfs: move inode extent size hint validation to libxfs xfs: record inode buf errors as a xref error in inobt scrubber xfs: remove xfs_buf parameter from inode scrub methods xfs: inode scrubber shouldn't bother with raw checks xfs: bmap scrubber should do rmap xref with bmap for sparse files xfs: refactor inode buffer verifier error logging xfs: refactor inode verifier error logging xfs: refactor bmap record validation xfs: sanity-check the unused space before trying to use it xfs: detect agfl count corruption and reset agfl xfs: unwind the try_again loop in xfs_log_force xfs: refactor xfs_log_force_lsn xfs: minor cleanup for xfs_reflink_end_cow ...
-rw-r--r--fs/inode.c12
-rw-r--r--fs/sync.c6
-rw-r--r--fs/xfs/kmem.c6
-rw-r--r--fs/xfs/kmem.h8
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c39
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h31
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c139
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h2
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c51
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h3
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h14
-rw-r--r--fs/xfs/libxfs/xfs_btree.c125
-rw-r--r--fs/xfs/libxfs/xfs_btree.h19
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c59
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c78
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c13
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c16
-rw-r--r--fs/xfs/libxfs/xfs_format.h13
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c124
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h5
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c27
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c5
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_sb.c1
-rw-r--r--fs/xfs/scrub/agheader.c6
-rw-r--r--fs/xfs/scrub/attr.c2
-rw-r--r--fs/xfs/scrub/bmap.c174
-rw-r--r--fs/xfs/scrub/common.c24
-rw-r--r--fs/xfs/scrub/common.h13
-rw-r--r--fs/xfs/scrub/dir.c2
-rw-r--r--fs/xfs/scrub/ialloc.c5
-rw-r--r--fs/xfs/scrub/inode.c298
-rw-r--r--fs/xfs/scrub/parent.c12
-rw-r--r--fs/xfs/scrub/quota.c2
-rw-r--r--fs/xfs/scrub/rtbitmap.c3
-rw-r--r--fs/xfs/scrub/trace.h31
-rw-r--r--fs/xfs/xfs_aops.c20
-rw-r--r--fs/xfs/xfs_bmap_util.c44
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_buf_item.c10
-rw-r--r--fs/xfs/xfs_dquot.c6
-rw-r--r--fs/xfs/xfs_dquot_item.c11
-rw-r--r--fs/xfs/xfs_error.c29
-rw-r--r--fs/xfs/xfs_error.h3
-rw-r--r--fs/xfs/xfs_export.c2
-rw-r--r--fs/xfs/xfs_extent_busy.c5
-rw-r--r--fs/xfs/xfs_file.c52
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_icache.c23
-rw-r--r--fs/xfs/xfs_inode.c11
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c29
-rw-r--r--fs/xfs/xfs_iops.c17
-rw-r--r--fs/xfs/xfs_log.c376
-rw-r--r--fs/xfs/xfs_log.h15
-rw-r--r--fs/xfs/xfs_log_cil.c2
-rw-r--r--fs/xfs/xfs_log_recover.c100
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_mount.h13
-rw-r--r--fs/xfs/xfs_reflink.c25
-rw-r--r--fs/xfs/xfs_super.c71
-rw-r--r--fs/xfs/xfs_trace.h9
-rw-r--r--fs/xfs/xfs_trans.c32
-rw-r--r--fs/xfs/xfs_trans_ail.c152
-rw-r--r--fs/xfs/xfs_trans_buf.c4
-rw-r--r--fs/xfs/xfs_trans_inode.c14
-rw-r--r--fs/xfs/xfs_trans_priv.h42
71 files changed, 1365 insertions, 1167 deletions
diff --git a/fs/inode.c b/fs/inode.c
index ef362364d396..b153aeaa61ea 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -346,9 +346,8 @@ void inc_nlink(struct inode *inode)
}
EXPORT_SYMBOL(inc_nlink);
-void address_space_init_once(struct address_space *mapping)
+static void __address_space_init_once(struct address_space *mapping)
{
- memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
spin_lock_init(&mapping->tree_lock);
init_rwsem(&mapping->i_mmap_rwsem);
@@ -356,6 +355,12 @@ void address_space_init_once(struct address_space *mapping)
spin_lock_init(&mapping->private_lock);
mapping->i_mmap = RB_ROOT_CACHED;
}
+
+void address_space_init_once(struct address_space *mapping)
+{
+ memset(mapping, 0, sizeof(*mapping));
+ __address_space_init_once(mapping);
+}
EXPORT_SYMBOL(address_space_init_once);
/*
@@ -371,7 +376,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_io_list);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
- address_space_init_once(&inode->i_data);
+ __address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
}
EXPORT_SYMBOL(inode_init_once);
@@ -1533,7 +1538,6 @@ retry:
if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
atomic_inc(&inode->i_count);
- inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
trace_writeback_lazytime_iput(inode);
mark_inode_dirty_sync(inode);
diff --git a/fs/sync.c b/fs/sync.c
index 9908a114d506..b54e0541ad89 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,12 +192,8 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
if (!file->f_op->fsync)
return -EINVAL;
- if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
- spin_lock(&inode->i_lock);
- inode->i_state &= ~I_DIRTY_TIME;
- spin_unlock(&inode->i_lock);
+ if (!datasync && (inode->i_state & I_DIRTY_TIME))
mark_inode_dirty_sync(inode);
- }
return file->f_op->fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 393b6849aeb3..7bace03dc9dc 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -46,13 +46,13 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
}
void *
-kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+kmem_alloc_large(size_t size, xfs_km_flags_t flags)
{
unsigned nofs_flag = 0;
void *ptr;
gfp_t lflags;
- ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
+ ptr = kmem_alloc(size, flags | KM_MAYFAIL);
if (ptr)
return ptr;
@@ -67,7 +67,7 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
nofs_flag = memalloc_nofs_save();
lflags = kmem_flags_convert(flags);
- ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL);
+ ptr = __vmalloc(size, lflags, PAGE_KERNEL);
if (flags & KM_NOFS)
memalloc_nofs_restore(nofs_flag);
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 4b87472f35bc..6023b594ead7 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -71,7 +71,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
}
extern void *kmem_alloc(size_t, xfs_km_flags_t);
-extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
+extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
static inline void kmem_free(const void *ptr)
{
@@ -85,6 +85,12 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
return kmem_alloc(size, flags | KM_ZERO);
}
+static inline void *
+kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+{
+ return kmem_alloc_large(size, flags | KM_ZERO);
+}
+
/*
* Zone interfaces
*/
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 2291f4224e24..03885a968de8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -95,13 +95,13 @@ xfs_ag_resv_critical(
switch (type) {
case XFS_AG_RESV_METADATA:
- avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved;
+ avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
orig = pag->pag_meta_resv.ar_asked;
break;
- case XFS_AG_RESV_AGFL:
+ case XFS_AG_RESV_RMAPBT:
avail = pag->pagf_freeblks + pag->pagf_flcount -
pag->pag_meta_resv.ar_reserved;
- orig = pag->pag_agfl_resv.ar_asked;
+ orig = pag->pag_rmapbt_resv.ar_asked;
break;
default:
ASSERT(0);
@@ -126,10 +126,10 @@ xfs_ag_resv_needed(
{
xfs_extlen_t len;
- len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved;
+ len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
switch (type) {
case XFS_AG_RESV_METADATA:
- case XFS_AG_RESV_AGFL:
+ case XFS_AG_RESV_RMAPBT:
len -= xfs_perag_resv(pag, type)->ar_reserved;
break;
case XFS_AG_RESV_NONE:
@@ -160,10 +160,11 @@ __xfs_ag_resv_free(
if (pag->pag_agno == 0)
pag->pag_mount->m_ag_max_usable += resv->ar_asked;
/*
- * AGFL blocks are always considered "free", so whatever
- * was reserved at mount time must be given back at umount.
+ * RMAPBT blocks come from the AGFL and AGFL blocks are always
+ * considered "free", so whatever was reserved at mount time must be
+ * given back at umount.
*/
- if (type == XFS_AG_RESV_AGFL)
+ if (type == XFS_AG_RESV_RMAPBT)
oldresv = resv->ar_orig_reserved;
else
oldresv = resv->ar_reserved;
@@ -185,7 +186,7 @@ xfs_ag_resv_free(
int error;
int err2;
- error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL);
+ error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
if (err2 && !error)
error = err2;
@@ -284,15 +285,15 @@ xfs_ag_resv_init(
}
}
- /* Create the AGFL metadata reservation */
- if (pag->pag_agfl_resv.ar_asked == 0) {
+ /* Create the RMAPBT metadata reservation */
+ if (pag->pag_rmapbt_resv.ar_asked == 0) {
ask = used = 0;
error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
- error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
+ error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
if (error)
goto out;
}
@@ -304,7 +305,7 @@ xfs_ag_resv_init(
return error;
ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
- xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
+ xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
pag->pagf_freeblks + pag->pagf_flcount);
#endif
out:
@@ -325,8 +326,10 @@ xfs_ag_resv_alloc_extent(
trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
switch (type) {
- case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
+ return;
+ case XFS_AG_RESV_METADATA:
+ case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
break;
default:
@@ -341,7 +344,7 @@ xfs_ag_resv_alloc_extent(
len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
resv->ar_reserved -= len;
- if (type == XFS_AG_RESV_AGFL)
+ if (type == XFS_AG_RESV_RMAPBT)
return;
/* Allocations of reserved blocks only need on-disk sb updates... */
xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
@@ -365,8 +368,10 @@ xfs_ag_resv_free_extent(
trace_xfs_ag_resv_free_extent(pag, type, len);
switch (type) {
- case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
+ return;
+ case XFS_AG_RESV_METADATA:
+ case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
break;
default:
@@ -379,7 +384,7 @@ xfs_ag_resv_free_extent(
leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
resv->ar_reserved += leftover;
- if (type == XFS_AG_RESV_AGFL)
+ if (type == XFS_AG_RESV_RMAPBT)
return;
/* Freeing into the reserved pool only requires on-disk update... */
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index 8d6c687deef3..938f2f96c5e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -32,4 +32,35 @@ void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
struct xfs_trans *tp, xfs_extlen_t len);
+/*
+ * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from
+ * the AGFL, they are allocated one at a time and the reservation updates don't
+ * require a transaction.
+ */
+static inline void
+xfs_ag_resv_rmapbt_alloc(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_alloc_arg args = {0};
+ struct xfs_perag *pag;
+
+ args.len = 1;
+ pag = xfs_perag_get(mp, agno);
+ xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
+ xfs_perag_put(pag);
+}
+
+static inline void
+xfs_ag_resv_rmapbt_free(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, agno);
+ xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
+ xfs_perag_put(pag);
+}
+
#endif /* __XFS_AG_RESV_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c02781a4c091..39387bdd225d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -53,6 +53,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+/*
+ * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
+ * the beginning of the block for a proper header with the location information
+ * and CRC.
+ */
+unsigned int
+xfs_agfl_size(
+ struct xfs_mount *mp)
+{
+ unsigned int size = mp->m_sb.sb_sectsize;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ size -= sizeof(struct xfs_agfl);
+
+ return size / sizeof(xfs_agblock_t);
+}
+
unsigned int
xfs_refc_block(
struct xfs_mount *mp)
@@ -550,7 +567,7 @@ xfs_agfl_verify(
if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
return __this_address;
- for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
+ for (i = 0; i < xfs_agfl_size(mp); i++) {
if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
return __this_address;
@@ -1564,7 +1581,6 @@ xfs_alloc_ag_vextent_small(
int *stat) /* status: 0-freelist, 1-normal/none */
{
struct xfs_owner_info oinfo;
- struct xfs_perag *pag;
int error;
xfs_agblock_t fbno;
xfs_extlen_t flen;
@@ -1616,18 +1632,13 @@ xfs_alloc_ag_vextent_small(
/*
* If we're feeding an AGFL block to something that
* doesn't live in the free space, we need to clear
- * out the OWN_AG rmap and add the block back to
- * the AGFL per-AG reservation.
+ * out the OWN_AG rmap.
*/
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
error = xfs_rmap_free(args->tp, args->agbp, args->agno,
fbno, 1, &oinfo);
if (error)
goto error0;
- pag = xfs_perag_get(args->mp, args->agno);
- xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
- args->tp, 1);
- xfs_perag_put(pag);
*stat = 0;
return 0;
@@ -1911,14 +1922,12 @@ xfs_free_ag_extent(
XFS_STATS_INC(mp, xs_freex);
XFS_STATS_ADD(mp, xs_freeb, len);
- trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
- haveleft, haveright);
+ trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
return 0;
error0:
- trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
- -1, -1);
+ trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
if (cnt_cur)
@@ -2054,6 +2063,93 @@ xfs_alloc_space_available(
}
/*
+ * Check the agfl fields of the agf for inconsistency or corruption. The purpose
+ * is to detect an agfl header padding mismatch between current and early v5
+ * kernels. This problem manifests as a 1-slot size difference between the
+ * on-disk flcount and the active [first, last] range of a wrapped agfl. This
+ * may also catch variants of agfl count corruption unrelated to padding. Either
+ * way, we'll reset the agfl and warn the user.
+ *
+ * Return true if a reset is required before the agfl can be used, false
+ * otherwise.
+ */
+static bool
+xfs_agfl_needs_reset(
+ struct xfs_mount *mp,
+ struct xfs_agf *agf)
+{
+ uint32_t f = be32_to_cpu(agf->agf_flfirst);
+ uint32_t l = be32_to_cpu(agf->agf_fllast);
+ uint32_t c = be32_to_cpu(agf->agf_flcount);
+ int agfl_size = xfs_agfl_size(mp);
+ int active;
+
+ /* no agfl header on v4 supers */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+
+ /*
+ * The agf read verifier catches severe corruption of these fields.
+ * Repeat some sanity checks to cover a packed -> unpacked mismatch if
+ * the verifier allows it.
+ */
+ if (f >= agfl_size || l >= agfl_size)
+ return true;
+ if (c > agfl_size)
+ return true;
+
+ /*
+ * Check consistency between the on-disk count and the active range. An
+ * agfl padding mismatch manifests as an inconsistent flcount.
+ */
+ if (c && l >= f)
+ active = l - f + 1;
+ else if (c)
+ active = agfl_size - f + l + 1;
+ else
+ active = 0;
+
+ return active != c;
+}
+
+/*
+ * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
+ * agfl content cannot be trusted. Warn the user that a repair is required to
+ * recover leaked blocks.
+ *
+ * The purpose of this mechanism is to handle filesystems affected by the agfl
+ * header padding mismatch problem. A reset keeps the filesystem online with a
+ * relatively minor free space accounting inconsistency rather than suffer the
+ * inevitable crash from use of an invalid agfl block.
+ */
+static void
+xfs_agfl_reset(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ struct xfs_perag *pag)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+
+ ASSERT(pag->pagf_agflreset);
+ trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
+
+ xfs_warn(mp,
+ "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
+ "Please unmount and run xfs_repair.",
+ pag->pag_agno, pag->pagf_flcount);
+
+ agf->agf_flfirst = 0;
+ agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
+ agf->agf_flcount = 0;
+ xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
+ XFS_AGF_FLCOUNT);
+
+ pag->pagf_flcount = 0;
+ pag->pagf_agflreset = false;
+}
+
+/*
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
@@ -2114,6 +2210,10 @@ xfs_alloc_fix_freelist(
}
}
+ /* reset a padding mismatched agfl before final free space check */
+ if (pag->pagf_agflreset)
+ xfs_agfl_reset(tp, agbp, pag);
+
/* If there isn't enough total space or single-extent, reject it. */
need = xfs_alloc_min_freelist(mp, pag);
if (!xfs_alloc_space_available(args, need, flags))
@@ -2266,10 +2366,11 @@ xfs_alloc_get_freelist(
bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
- if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
+ if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
agf->agf_flfirst = 0;
pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+ ASSERT(!pag->pagf_agflreset);
be32_add_cpu(&agf->agf_flcount, -1);
xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
@@ -2377,10 +2478,11 @@ xfs_alloc_put_freelist(
be32_to_cpu(agf->agf_seqno), &agflbp)))
return error;
be32_add_cpu(&agf->agf_fllast, 1);
- if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
+ if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
agf->agf_fllast = 0;
pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+ ASSERT(!pag->pagf_agflreset);
be32_add_cpu(&agf->agf_flcount, 1);
xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
@@ -2395,7 +2497,7 @@ xfs_alloc_put_freelist(
xfs_alloc_log_agf(tp, agbp, logflags);
- ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
+ ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
@@ -2428,9 +2530,9 @@ xfs_agf_verify(
if (!(agf->agf_magicnum