// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2010 Red Hat, Inc. All Rights Reserved.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_error.h"
#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
#include "xfs_discard.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_trace.h"
struct workqueue_struct *xfs_discard_wq;
/*
* Allocate a new ticket. Failing to get a new ticket makes it really hard to
* recover, so we don't allow failure here. Also, we allocate in a context that
* we don't want to be issuing transactions from, so we need to tell the
* allocation code this as well.
*
* We don't reserve any space for the ticket - we are going to steal whatever
* space we require from transactions as they commit. To ensure we reserve all
* the space required, we need to set the current reservation of the ticket to
* zero so that we know to steal the initial transaction overhead from the
* first transaction commit.
*/
static struct xlog_ticket *
xlog_cil_ticket_alloc(
struct xlog *log)
{
struct xlog_ticket *tic;
tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
KM_SLEEP|KM_NOFS);
/*
* set the current reservation to zero so we know to steal the basic
* transaction overhead reservation from the first transaction commit.
*/
tic->t_curr_res = 0;
return tic;
}
/*
* After the first stage of log recovery is done, we know where the head and
* tail of the log are. We need this log initialisation done before we can
* initialise the first CIL checkpoint context.
*
* Here we allocate a log ticket to track space usage during a CIL push. This
* ticket is passed to xlog_write() directly so that we don't slowly leak log
* space by failing to account for space used by log headers and additional
* region headers for split regions.
*/
void
xlog_cil_init_post_recovery(
struct xlog *log)
{
log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
log->l_cilp->xc_ctx->sequence = 1;
}
static inline int
xlog_cil_iovec_space(
uint niovecs)
{
return round_up((sizeof(struct xfs_log_vec) +
niovecs * sizeof(struct xfs_log_iovec)),
sizeof(uint64_t));
}
/*
* Allocate or pin log vector buffers for CIL insertion.
*
* The CIL currently uses disposable buffers for copying a snapshot of the
* modified items into the log during a push. The biggest problem with this is
* the requirement to allocate the disposable buffer during the commit if:
* a) does not exist; or
* b) it is too small
*
* If we do this allocation within xlog_cil_insert_format_items(), it is done
* under the xc_ctx_lock, which means that a CIL push cannot occur during
* the memory allocation. This means that we have a potential deadlock situation
* under low memory conditions when we have lots of dirty metadata pinned in
* the CIL and we need a CIL commit to occur to free memory.
*
* To avoid this, we need to move the memory allocation outside the
* xc_ctx_lock, but because the log vector buffers are disposable, that opens
* up a TOCTOU race condition w.r.t. the CIL committing and removing the log
* vector buffers between the check and the formatting of the item into the
* log vector buffer within the xc_ctx_lock.
*
* Because the log vector buffer needs to be unchanged during the CIL push
* process, we cannot share the buffer between the transaction commit (which
* modifies the buffer) and the CIL push context that is writing the changes
* into the log. This means skipping preallocation of buffer space is
* unreliable, but we most definitely do not want to be allocating and freeing
* buffers unnecessarily during commits when overwrites can be done safely.
*
* The simplest solution to this problem is to allocate a shadow buffer when a
* log item is committed for the second time, and then to only use this buffer
* if necessary. The buffer can remain attached to the log item until such time
* it is needed, and this is the buffer that is reallocated to match the size of
* the incoming modification. Then during the formatting of the item we can swap
* the active buffer with the new one if we can't reuse the existing buffer. We
* don't free the old buffer as it may be reused on the next modification if
* it's size is right, otherwise we'll free and reallocate it at that point.
*
* This function builds a vector for the changes in each log item in the
* transaction. It then works out the length of the buffer needed for each log
* item, allocates them and attaches the vector to the log item in preparation
* for the formatting step which occurs under the xc_ctx_lock.
*
* While this means the memory footprint goes up, it avoids the repeated
* alloc/free pattern that repeated modifications of an item would otherwise
* cause, and hence minimises the CPU overhead of such behaviour.
*/
static void
xlog_cil_alloc_shadow_bufs(
struct xlog *log,
struct xfs_trans *tp)
{
struct xfs_log_item *lip;
list_for_each_entry(lip, &tp->t_items, li_trans) {
struct xfs_log_vec *lv;
int niovecs = 0;
int nbytes = 0;
int buf_size;
bool ordered = false;
/* Skip items which aren't dirty in this transaction. */
if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
continue;
/* get number of vecs and size of data to be stored */
lip->li_ops->iop_size(lip, &niovecs, &nbytes);
/*
* Ordered items need to be tracked but we do not wish to write
* them. We need a logvec to track the object, but we do not
* need an iovec or buffer to be allocated for copying data.
*/
if