summaryrefslogtreecommitdiffstats
path: root/fs/jbd/journal.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd/journal.c')
-rw-r--r--fs/jbd/journal.c2145
1 files changed, 0 insertions, 2145 deletions
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
deleted file mode 100644
index c46a79adb6ad..000000000000
--- a/fs/jbd/journal.c
+++ /dev/null
@@ -1,2145 +0,0 @@
-/*
- * linux/fs/jbd/journal.c
- *
- * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
- *
- * Copyright 1998 Red Hat corp --- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * Generic filesystem journal-writing code; part of the ext2fs
- * journaling system.
- *
- * This file manages journals: areas of disk reserved for logging
- * transactional updates. This includes the kernel journaling thread
- * which is responsible for scheduling updates to the log.
- *
- * We do not actually manage the physical storage of the journal in this
- * file: that is left to a per-journal policy function, which allows us
- * to store the journal within a filesystem-specified area for ext2
- * journaling (ext2 can use a reserved inode for storing the log).
- */
-
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/freezer.h>
-#include <linux/pagemap.h>
-#include <linux/kthread.h>
-#include <linux/poison.h>
-#include <linux/proc_fs.h>
-#include <linux/debugfs.h>
-#include <linux/ratelimit.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/jbd.h>
-
-#include <asm/uaccess.h>
-#include <asm/page.h>
-
-EXPORT_SYMBOL(journal_start);
-EXPORT_SYMBOL(journal_restart);
-EXPORT_SYMBOL(journal_extend);
-EXPORT_SYMBOL(journal_stop);
-EXPORT_SYMBOL(journal_lock_updates);
-EXPORT_SYMBOL(journal_unlock_updates);
-EXPORT_SYMBOL(journal_get_write_access);
-EXPORT_SYMBOL(journal_get_create_access);
-EXPORT_SYMBOL(journal_get_undo_access);
-EXPORT_SYMBOL(journal_dirty_data);
-EXPORT_SYMBOL(journal_dirty_metadata);
-EXPORT_SYMBOL(journal_release_buffer);
-EXPORT_SYMBOL(journal_forget);
-#if 0
-EXPORT_SYMBOL(journal_sync_buffer);
-#endif
-EXPORT_SYMBOL(journal_flush);
-EXPORT_SYMBOL(journal_revoke);
-
-EXPORT_SYMBOL(journal_init_dev);
-EXPORT_SYMBOL(journal_init_inode);
-EXPORT_SYMBOL(journal_update_format);
-EXPORT_SYMBOL(journal_check_used_features);
-EXPORT_SYMBOL(journal_check_available_features);
-EXPORT_SYMBOL(journal_set_features);
-EXPORT_SYMBOL(journal_create);
-EXPORT_SYMBOL(journal_load);
-EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_abort);
-EXPORT_SYMBOL(journal_errno);
-EXPORT_SYMBOL(journal_ack_err);
-EXPORT_SYMBOL(journal_clear_err);
-EXPORT_SYMBOL(log_wait_commit);
-EXPORT_SYMBOL(log_start_commit);
-EXPORT_SYMBOL(journal_start_commit);
-EXPORT_SYMBOL(journal_force_commit_nested);
-EXPORT_SYMBOL(journal_wipe);
-EXPORT_SYMBOL(journal_blocks_per_page);
-EXPORT_SYMBOL(journal_invalidatepage);
-EXPORT_SYMBOL(journal_try_to_free_buffers);
-EXPORT_SYMBOL(journal_force_commit);
-
-static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
-static void __journal_abort_soft (journal_t *journal, int errno);
-static const char *journal_dev_name(journal_t *journal, char *buffer);
-
-#ifdef CONFIG_JBD_DEBUG
-void __jbd_debug(int level, const char *file, const char *func,
- unsigned int line, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- if (level > journal_enable_debug)
- return;
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf);
- va_end(args);
-}
-EXPORT_SYMBOL(__jbd_debug);
-#endif
-
-/*
- * Helper function used to manage commit timeouts
- */
-
-static void commit_timeout(unsigned long __data)
-{
- struct task_struct * p = (struct task_struct *) __data;
-
- wake_up_process(p);
-}
-
-/*
- * kjournald: The main thread function used to manage a logging device
- * journal.
- *
- * This kernel thread is responsible for two things:
- *
- * 1) COMMIT: Every so often we need to commit the current state of the
- * filesystem to disk. The journal thread is responsible for writing
- * all of the metadata buffers to disk.
- *
- * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
- * of the data in that part of the log has been rewritten elsewhere on
- * the disk. Flushing these old buffers to reclaim space in the log is
- * known as checkpointing, and this thread is responsible for that job.
- */
-
-static int kjournald(void *arg)
-{
- journal_t *journal = arg;
- transaction_t *transaction;
-
- /*
- * Set up an interval timer which can be used to trigger a commit wakeup
- * after the commit interval expires
- */
- setup_timer(&journal->j_commit_timer, commit_timeout,
- (unsigned long)current);
-
- set_freezable();
-
- /* Record that the journal thread is running */
- journal->j_task = current;
- wake_up(&journal->j_wait_done_commit);
-
- printk(KERN_INFO "kjournald starting. Commit interval %ld seconds\n",
- journal->j_commit_interval / HZ);
-
- /*
- * And now, wait forever for commit wakeup events.
- */
- spin_lock(&journal->j_state_lock);
-
-loop:
- if (journal->j_flags & JFS_UNMOUNT)
- goto end_loop;
-
- jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
- journal->j_commit_sequence, journal->j_commit_request);
-
- if (journal->j_commit_sequence != journal->j_commit_request) {
- jbd_debug(1, "OK, requests differ\n");
- spin_unlock(&journal->j_state_lock);
- del_timer_sync(&journal->j_commit_timer);
- journal_commit_transaction(journal);
- spin_lock(&journal->j_state_lock);
- goto loop;
- }
-
- wake_up(&journal->j_wait_done_commit);
- if (freezing(current)) {
- /*
- * The simpler the better. Flushing journal isn't a
- * good idea, because that depends on threads that may
- * be already stopped.
- */
- jbd_debug(1, "Now suspending kjournald\n");
- spin_unlock(&journal->j_state_lock);
- try_to_freeze();
- spin_lock(&journal->j_state_lock);
- } else {
- /*
- * We assume on resume that commits are already there,
- * so we don't sleep
- */
- DEFINE_WAIT(wait);
- int should_sleep = 1;
-
- prepare_to_wait(&journal->j_wait_commit, &wait,
- TASK_INTERRUPTIBLE);
- if (journal->j_commit_sequence != journal->j_commit_request)
- should_sleep = 0;
- transaction = journal->j_running_transaction;
- if (transaction && time_after_eq(jiffies,
- transaction->t_expires))
- should_sleep = 0;
- if (journal->j_flags & JFS_UNMOUNT)
- should_sleep = 0;
- if (should_sleep) {
- spin_unlock(&journal->j_state_lock);
- schedule();
- spin_lock(&journal->j_state_lock);
- }
- finish_wait(&journal->j_wait_commit, &wait);
- }
-
- jbd_debug(1, "kjournald wakes\n");
-
- /*
- * Were we woken up by a commit wakeup event?
- */
- transaction = journal->j_running_transaction;
- if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
- journal->j_commit_request = transaction->t_tid;
- jbd_debug(1, "woke because of timeout\n");
- }
- goto loop;
-
-end_loop:
- spin_unlock(&journal->j_state_lock);
- del_timer_sync(&journal->j_commit_timer);
- journal->j_task = NULL;
- wake_up(&journal->j_wait_done_commit);
- jbd_debug(1, "Journal thread exiting.\n");
- return 0;
-}
-
-static int journal_start_thread(journal_t *journal)
-{
- struct task_struct *t;
-
- t = kthread_run(kjournald, journal, "kjournald");
- if (IS_ERR(t))
- return PTR_ERR(t);
-
- wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
- return 0;
-}
-
-static void journal_kill_thread(journal_t *journal)
-{
- spin_lock(&journal->j_state_lock);
- journal->j_flags |= JFS_UNMOUNT;
-
- while (journal->j_task) {
- wake_up(&journal->j_wait_commit);
- spin_unlock(&journal->j_state_lock);
- wait_event(journal->j_wait_done_commit,
- journal->j_task == NULL);
- spin_lock(&journal->j_state_lock);
- }
- spin_unlock(&journal->j_state_lock);
-}
-
-/*
- * journal_write_metadata_buffer: write a metadata buffer to the journal.
- *
- * Writes a metadata buffer to a given disk block. The actual IO is not
- * performed but a new buffer_head is constructed which labels the data
- * to be written with the correct destination disk block.
- *
- * Any magic-number escaping which needs to be done will cause a
- * copy-out here. If the buffer happens to start with the
- * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
- * magic number is only written to the log for descripter blocks. In
- * this case, we copy the data and replace the first word with 0, and we
- * return a result code which indicates that this buffer needs to be
- * marked as an escaped buffer in the corresponding log descriptor
- * block. The missing word can then be restored when the block is read
- * during recovery.
- *
- * If the source buffer has already been modified by a new transaction
- * since we took the last commit snapshot, we use the frozen copy of
- * that data for IO. If we end up using the existing buffer_head's data
- * for the write, then we *have* to lock the buffer to prevent anyone
- * else from using and possibly modifying it while the IO is in
- * progress.
- *
- * The function returns a pointer to the buffer_heads to be used for IO.
- *
- * We assume that the journal has already been locked in this function.
- *
- * Return value:
- * <0: Error
- * >=0: Finished OK
- *
- * On success:
- * Bit 0 set == escape performed on the data
- * Bit 1 set == buffer copy-out performed (kfree the data after IO)
- */
-
-int journal_write_metadata_buffer(transaction_t *transaction,
- struct journal_head *jh_in,
- struct journal_head **jh_out,
- unsigned int blocknr)
-{
- int need_copy_out = 0;
- int done_copy_out = 0;
- int do_escape = 0;
- char *mapped_data;
- struct buffer_head *new_bh;
- struct journal_head *new_jh;
- struct page *new_page;
- unsigned int new_offset;
- struct buffer_head *bh_in = jh2bh(jh_in);
- journal_t *journal = transaction->t_journal;
-
- /*
- * The buffer really shouldn't be locked: only the current committing
- * transaction is allowed to write it, so nobody else is allowed
- * to do any IO.
- *
- * akpm: except if we're journalling data, and write() output is
- * also part of a shared mapping, and another thread has
- * decided to launch a writepage() against this buffer.
- */
- J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
-
- new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
- /* keep subsequent assertions sane */
- atomic_set(&new_bh->b_count, 1);
- new_jh = journal_add_journal_head(new_bh); /* This sleeps */
-
- /*
- * If a new transaction has already done a buffer copy-out, then
- * we use that version of the data for the commit.
- */
- jbd_lock_bh_state(bh_in);
-repeat:
- if (jh_in->b_frozen_data) {
- done_copy_out = 1;
- new_page = virt_to_page(jh_in->b_frozen_data);
- new_offset = offset_in_page(jh_in->b_frozen_data);
- } else {
- new_page = jh2bh(jh_in)->b_page;
- new_offset = offset_in_page(jh2bh(jh_in)->b_data);
- }
-
- mapped_data = kmap_atomic(new_page);
- /*
- * Check for escaping
- */
- if (*((__be32 *)(mapped_data + new_offset)) ==
- cpu_to_be32(JFS_MAGIC_NUMBER)) {
- need_copy_out = 1;
- do_escape = 1;
- }
- kunmap_atomic(mapped_data);
-
- /*
- * Do we need to do a data copy?
- */
- if (need_copy_out && !done_copy_out) {
- char *tmp;
-
- jbd_unlock_bh_state(bh_in);
- tmp = jbd_alloc(bh_in->b_size, GFP_NOFS);
- jbd_lock_bh_state(bh_in);
- if (jh_in->b_frozen_data) {
- jbd_free(tmp, bh_in->b_size);
- goto repeat;
- }
-
- jh_in->b_frozen_data = tmp;
- mapped_data = kmap_atomic(new_page);
- memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
- kunmap_atomic(mapped_data);
-
- new_page = virt_to_page(tmp);
- new_offset = offset_in_page(tmp);
- done_copy_out = 1;
- }
-
- /*
- * Did we need to do an escaping? Now we've done all the
- * copying, we can finally do so.
- */
- if (do_escape) {
- mapped_data = kmap_atomic(new_page);
- *((unsigned int *)(mapped_data + new_offset)) = 0;
- kunmap_atomic(mapped_data);
- }
-
- set_bh_page(new_bh, new_page, new_offset);
- new_jh->b_transaction = NULL;
- new_bh->b_size = jh2bh(jh_in)->b_size;
- new_bh->b_bdev = transaction->t_journal->j_dev;
- new_bh->b_blocknr = blocknr;
- set_buffer_mapped(new_bh);
- set_buffer_dirty(new_bh);
-
- *jh_out = new_jh;
-
- /*
- * The to-be-written buffer needs to get moved to the io queue,
- * and the original buffer whose contents we are shadowing or
- * copying is moved to the transaction's shadow queue.
- */
- JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
- spin_lock(&journal->j_list_lock);
- __journal_file_buffer(jh_in, transaction, BJ_Shadow);
- spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh_in);
-
- JBUFFER_TRACE(new_jh, "file as BJ_IO");
- journal_file_buffer(new_jh, transaction, BJ_IO);
-
- return do_escape | (done_copy_out << 1);
-}
-
-/*
- * Allocation code for the journal file. Manage the space left in the
- * journal, so that we can begin checkpointing when appropriate.
- */
-
-/*
- * __log_space_left: Return the number of free blocks left in the journal.
- *
- * Called with the journal already locked.
- *
- * Called under j_state_lock
- */
-
-int __log_space_left(journal_t *journal)
-{
- int left = journal->j_free;
-
- assert_spin_locked(&journal->j_state_lock);
-
- /*
- * Be pessimistic here about the number of those free blocks which
- * might be required for log descriptor control blocks.
- */
-
-#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
-
- left -= MIN_LOG_RESERVED_BLOCKS;
-
- if (left <= 0)
- return 0;
- left -= (left >> 3);
- return left;
-}
-
-/*
- * Called under j_state_lock. Returns true if a transaction commit was started.
- */
-int __log_start_commit(journal_t *journal, tid_t target)
-{
- /*
- * The only transaction we can possibly wait upon is the
- * currently running transaction (if it exists). Otherwise,
- * the target tid must be an old one.
- */
- if (journal->j_commit_request != target &&
- journal->j_running_transaction &&
- journal->j_running_transaction->t_tid == target) {
- /*
- * We want a new commit: OK, mark the request and wakeup the
- * commit thread. We do _not_ do the commit ourselves.
- */
-
- journal->j_commit_request = target;
- jbd_debug(1, "JBD: requesting commit %d/%d\n",
- journal->j_commit_request,
- journal->j_commit_sequence);
- wake_up(&journal->j_wait_commit);
- return 1;
- } else if (!tid_geq(journal->j_commit_request, target))
- /* This should never happen, but if it does, preserve
- the evidence before kjournald goes into a loop and
- increments j_commit_sequence beyond all recognition. */
- WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
- journal->j_commit_request, journal->j_commit_sequence,
- target, journal->j_running_transaction ?
- journal->j_running_transaction->t_tid : 0);
- return 0;
-}
-
-int log_start_commit(journal_t *journal, tid_t tid)
-{
- int ret;
-
- spin_lock(&journal->j_state_lock);
- ret = __log_start_commit(journal, tid);
- spin_unlock(&journal->j_state_lock);
- return ret;
-}
-
-/*
- * Force and wait upon a commit if the calling process is not within
- * transaction. This is used for forcing out undo-protected data which contains
- * bitmaps, when the fs is running out of space.
- *
- * We can only force the running transaction if we don't have an active handle;
- * otherwise, we will deadlock.
- *
- * Returns true if a transaction was started.
- */
-int journal_force_commit_nested(journal_t *journal)
-{
- transaction_t *transaction = NULL;
- tid_t tid;
-
- spin_lock(&journal->j_state_lock);
- if (journal->j_running_transaction && !current->journal_info) {
- transaction = journal->j_running_transaction;
- __log_start_commit(journal, transaction->t_tid);
- } else if (journal->j_committing_transaction)
- transaction = journal->j_committing_transaction;
-
- if (!transaction) {
- spin_unlock(&journal->j_state_lock);
- return 0; /* Nothing to retry */
- }
-
- tid = transaction->t_tid;
- spin_unlock(&journal->j_state_lock);
- log_wait_commit(journal, tid);
- return 1;
-}
-
-/*
- * Start a commit of the current running transaction (if any). Returns true
- * if a transaction is going to be committed (or is currently already
- * committing), and fills its tid in at *ptid
- */
-int journal_start_commit(journal_t *journal, tid_t *ptid)
-{
- int ret = 0;
-
- spin_lock(&journal->j_state_lock);
- if (journal->j_running_transaction) {
- tid_t tid = journal->j_running_transaction->t_tid;
-
- __log_start_commit(journal, tid);
- /* There's a running transaction and we've just made sure
- * it's commit has been scheduled. */
- if (ptid)
- *ptid = tid;
- ret = 1;
- } else if (journal->j_committing_transaction) {
- /*
- * If commit has been started, then we have to wait for
- * completion of that transaction.
- */
- if (ptid)
- *ptid = journal->j_committing_transaction->t_tid;
- ret = 1;
- }
- spin_unlock(&journal->j_state_lock);
- return ret;
-}
-
-/*
- * Wait for a specified commit to complete.
- * The caller may not hold the journal lock.
- */
-int log_wait_commit(journal_t *journal, tid_t tid)
-{
- int err = 0;
-
-#ifdef CONFIG_JBD_DEBUG
- spin_lock(&journal->j_state_lock);
- if (!tid_geq(journal->j_commit_request, tid)) {
- printk(KERN_ERR
- "%s: error: j_commit_request=%d, tid=%d\n",
- __func__, journal->j_commit_request, tid);
- }
- spin_unlock(&journal->j_state_lock);
-#endif
- spin_lock(&journal->j_state_lock);
- /*
- * Not running or committing trans? Must be already committed. This
- * saves us from waiting for a *long* time when tid overflows.
- */
- if (!((journal->j_running_transaction &&
- journal->j_running_transaction->t_tid == tid) ||
- (journal->j_committing_transaction &&
- journal->j_committing_transaction->t_tid == tid)))
- goto out_unlock;
-
- if (!tid_geq(journal->j_commit_waited, tid))
- journal->j_commit_waited = tid;
- while (tid_gt(tid, journal->j_commit_sequence)) {
- jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
- tid, journal->j_commit_sequence);
- wake_up(&journal->j_wait_commit);
- spin_unlock(&journal->j_state_lock);
- wait_event(journal->j_wait_done_commit,
- !tid_gt(tid, journal->j_commit_sequence));
- spin_lock(&journal->j_state_lock);
- }
-out_unlock:
- spin_unlock(&journal->j_state_lock);
-
- if (unlikely(is_journal_aborted(journal)))
- err = -EIO;
- return err;
-}
-
-/*
- * Return 1 if a given transaction has not yet sent barrier request
- * connected with a transaction commit. If 0 is returned, transaction
- * may or may not have sent the barrier. Used to avoid sending barrier
- * twice in common cases.
- */
-int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
-{
- int ret = 0;
- transaction_t *commit_trans;
-
- if (!(journal->j_flags & JFS_BARRIER))
- return 0;
- spin_lock(&journal->j_state_lock);
- /* Transaction already committed? */
- if (tid_geq(journal->j_commit_sequence, tid))
- goto out;
- /*
- * Transaction is being committed and we already proceeded to
- * writing commit record?
- */
- commit_trans = journal->j_committing_transaction;
- if (commit_trans && commit_trans->t_tid == tid &&
- commit_trans->t_state >= T_COMMIT_RECORD)
- goto out;
- ret = 1;
-out:
- spin_unlock(&journal->j_state_lock);
- return ret;
-}
-EXPORT_SYMBOL(journal_trans_will_send_data_barrier);
-
-/*
- * Log buffer allocation routines:
- */
-
-int journal_next_log_block(journal_t *journal, unsigned int *retp)
-{
- unsigned int blocknr;
-
- spin_lock(&journal->j_state_lock);
- J_ASSERT(journal->j_free > 1);
-
- blocknr = journal->j_head;
- journal->j_head++;
- journal->j_free--;
- if (journal->j_head == journal->j_last)
- journal->j_head = journal->j_first;
- spin_unlock(&journal->j_state_lock);
- return journal_bmap(journal, blocknr, retp);
-}
-
-/*
- * Conversion of logical to physical block numbers for the journal
- *
- * On external journals the journal blocks are identity-mapped, so
- * this is a no-op. If needed, we can use j_blk_offset - everything is
- * ready.
- */
-int journal_bmap(journal_t *journal, unsigned int blocknr,
- unsigned int *retp)
-{
- int err = 0;
- unsigned int ret;
-
- if (journal->j_inode) {
- ret = bmap(journal->j_inode, blocknr);
- if (ret)
- *retp = ret;
- else {
- char b[BDEVNAME_SIZE];
-
- printk(KERN_ALERT "%s: journal block not found "
- "at offset %u on %s\n",
- __func__,
- blocknr,
- bdevname(journal->j_dev, b));
- err = -EIO;
- __journal_abort_soft(journal, err);
- }
- } else {
- *retp = blocknr; /* +journal->j_blk_offset */
- }
- return err;
-}
-
-/*
- * We play buffer_head aliasing tricks to write data/metadata blocks to
- * the journal without copying their contents, but for journal
- * descriptor blocks we do need to generate bona fide buffers.
- *
- * After the caller of journal_get_descriptor_buffer() has finished modifying
- * the buffer's contents they really should run flush_dcache_page(bh->b_page).
- * But we don't bother doing that, so there will be coherency problems with
- * mmaps of blockdevs which hold live JBD-controlled filesystems.
- */
-struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
-{
- struct buffer_head *bh;
- unsigned int blocknr;
- int err;
-
- err = journal_next_log_block(journal, &blocknr);
-
- if (err)
- return NULL;
-
- bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
- if (!bh)
- return NULL;
- lock_buffer(bh);
- memset(bh->b_data, 0, journal->j_blocksize);
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
- BUFFER_TRACE(bh, "return this buffer");
- return journal_add_journal_head(bh);
-}
-
-/*
- * Management for journal control blocks: functions to create and
- * destroy journal_t structures, and to initialise and read existing
- * journal blocks from disk. */
-
-/* First: create and setup a journal_t object in memory. We initialise
- * very few fields yet: that has to wait until we have created the
- * journal structures from from scratch, or loaded them from disk. */
-
-static journal_t * journal_init_common (void)
-{
- journal_t *journal;
- int err;
-
- journal = kzalloc(sizeof(*journal), GFP_KERNEL);
- if (!journal)
- goto fail;
-
- init_waitqueue_head(&journal->j_wait_transaction_locked);
- init_waitqueue_head(&journal->j_wait_logspace);
- init_waitqueue_head(&journal->j_wait_done_commit);
- init_waitqueue_head(&journal->j_wait_checkpoint);
- init_waitqueue_head(&journal->j_wait_commit);
- init_waitqueue_head(&journal->j_wait_updates);
- mutex_init(&journal->j_checkpoint_mutex);
- spin_lock_init(&journal->j_revoke_lock);
- spin_lock_init(&journal->j_list_lock);
- spin_lock_init(&journal->j_state_lock);
-
- journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
-
- /* The journal is marked for error until we succeed with recovery! */
- journal->j_flags = JFS_ABORT;
-
- /* Set up a default-sized revoke table for the new mount. */
- err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
- if (err) {
- kfree(journal);
- goto fail;
- }
- return journal;
-fail:
- return NULL;
-}
-
-/* journal_init_dev and journal_init_inode:
- *
- * Create a journal structure assigned some fixed set of disk blocks to
- * the journal. We don't actually touch those disk blocks yet, but we
- * need to set up all of the mapping information to tell the journaling
- * system where the journal blocks are.
- *
- */
-
-/**
- * journal_t * journal_init_dev() - creates and initialises a journal structure
- * @bdev: Block device on which to create the journal
- * @fs_dev: Device which hold journalled filesystem for this journal.
- * @start: Block nr Start of journal.
- * @len: Length of the journal in blocks.
- * @blocksize: blocksize of journalling device
- *
- * Returns: a newly created journal_t *
- *
- * journal_init_dev creates a journal which maps a fixed contiguous
- * range of blocks on an arbitrary block device.
- *
- */
-journal_t * journal_init_dev(struct block_device *bdev,
- struct block_device *fs_dev,
- int start, int len, int blocksize)
-{
- journal_t *journal = journal_init_common();
- struct buffer_head *bh;
- int n;
-
- if (!journal)
- return NULL;
-
- /* journal descriptor can store up to n blocks -bzzz */
- journal->j_blocksize = blocksize;
- n = journal->j_blocksize / sizeof(journal_block_tag_t);
- journal->j_wbufsize = n;
- journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
- if (!journal->j_wbuf) {
- printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
- __func__);
- goto out_err;
- }
- journal->j_dev = bdev;
- journal->j_fs_dev = fs_dev;
- journal->j_blk_offset = start;
- journal->j_maxlen = len;
-
- bh = __getblk(journal->j_dev, start, journal->j_blocksize);
- if (!bh) {
- printk(KERN_ERR
- "%s: Cannot get buffer for journal superblock\n",
- __func__);
- goto out_err;
- }
- journal->j_sb_buffer = bh;
- journal->j_superblock = (journal_superblock_t *)bh->b_data;
-
- return journal;
-out_err:
- kfree(journal->j_wbuf);
- kfree(journal);
- return NULL;
-}
-
-/**
- * journal_t * journal_init_inode () - creates a journal which maps to a inode.
- * @inode: An inode to create the journal in
- *
- * journal_init_inode creates a journal which maps an on-disk inode as
- * the journal. The inode must exist already, must support bmap() and
- * must have all data blocks preallocated.
- */
-journal_t * journal_init_inode (struct inode *inode)
-{
- struct buffer_head *bh;
- journal_t *journal = journal_init_common();
- int err;
- int n;
- unsigned int blocknr;
-
- if (!journal)
- return NULL;
-
- journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
- journal->j_inode = inode;
- jbd_debug(1,
- "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
- journal, inode->i_sb->s_id, inode->i_ino,
- (long long) inode->i_size,
- inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
-
- journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
- journal->j_blocksize = inode->i_sb->s_blocksize;
-
- /* journal descriptor can store up to n blocks -bzzz */
- n = journal->j_blocksize / sizeof(journal_block_tag_t);
- journal->j_wbufsize = n;
- journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
- if (!journal->j_wbuf) {
- printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
- __func__);
- goto out_err;
- }
-
- err = journal_bmap(journal, 0, &blocknr);
- /* If that failed, give up */
- if (err) {
- printk(KERN_ERR "%s: Cannot locate journal superblock\n",
- __func__);
- goto out_err;
- }
-
- bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
- if (!bh) {
- printk(KERN_ERR
- "%s: Cannot get buffer for journal superblock\n",
- __func__);
- goto out_err;
- }
- journal->j_sb_buffer = bh;
- journal->j_superblock = (journal_superblock_t *)bh->b_data;
-
- return journal;
-out_err:
- kfree(journal->j_wbuf);
- kfree(journal);
- return NULL;
-}
-
-/*
- * If the journal init or create aborts, we need to mark the journal
- * superblock as being NULL to prevent the journal destroy from writing
- * back a bogus superblock.
- */
-static void journal_fail_superblock (journal_t *journal)
-{
- struct buffer_head *bh = journal->j_sb_buffer;
- brelse(bh);
- journal->j_sb_buffer = NULL;
-}
-
-/*
- * Given a journal_t structure, initialise the various fields for
- * startup of a new journaling session. We use this both when creating
- * a journal, and after recovering an old journal to reset it for
- * subsequent use.
- */
-
-static int journal_reset(journal_t *journal)
-{
- journal_superblock_t *sb = journal->j_superblock;
- unsigned int first, last;
-
- first = be32_to_cpu(sb->s_first);
- last = be32_to_cpu(sb->s_maxlen);
- if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
- printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n",
- first, last);
- journal_fail_superblock(journal);
- return -EINVAL;
- }
-
- journal->j_first = first;
- journal->j_last = last;
-
- journal->j_head = first;
- journal->j_tail = first;
- journal->j_free = last - first;
-
- journal->j_tail_sequence = journal->j_transaction_sequence;
- journal->j_commit_sequence = journal->j_transaction_sequence - 1;
- journal->j_commit_request = journal->j_commit_sequence;
-
- journal->j_max_transaction_buffers = journal->j_maxlen / 4;
-
- /*
- * As a special case, if the on-disk copy is already marked as needing
- * no recovery (s_start == 0), then we can safely defer the superblock
- * update until the next commit by setting JFS_FLUSHED. This avoids
- * attempting a write to a potential-readonly device.
- */
- if (sb->s_start == 0) {
- jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
- "(start %u, seq %d, errno %d)\n",
- journal->j_tail, journal->j_tail_sequence,
- journal->j_errno);
- journal->j_flags |= JFS_FLUSHED;
- } else {
- /* Lock here to make assertions happy... */
- mutex_lock(&journal->j_checkpoint_mutex);
- /*
- * Update log tail information. We use WRITE_FUA since new
- * transaction will start reusing journal space and so we
- * must make sure information about current log tail is on
- * disk before that.
- */
- journal_update_sb_log_tail(journal,
- journal->j_tail_sequence,
- journal->j_tail,
- WRITE_FUA);
- mutex_unlock(&journal->j_checkpoint_mutex);
- }
- return journal_start_thread(journal);
-}
-
-/**
- * int journal_create() - Initialise the new journal file
- * @journal: Journal to create. This structure must have been initialised
- *
- * Given a journal_t structure which tells us which disk blocks we can
- * use, create a new journal superblock and initialise all of the
- * journal fields from scratch.
- **/
-int journal_create(journal_t *journal)
-{
- unsigned int blocknr;
- struct buffer_head *bh;
- journal_superblock_t *sb;
- int i, err;
-
- if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
- printk (KERN_ERR "Journal length (%d blocks) too short.\n",
- journal->j_maxlen);
- journal_fail_superblock(journal);
- return -EINVAL;
- }
-
- if (journal->j_inode == NULL) {
- /*
- * We don't know what block to start at!
- */
- printk(KERN_EMERG
- "%s: creation of journal on external device!\n",
- __func__);
- BUG();
- }
-
- /* Zero out the entire journal on disk. We cannot afford to
- have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
- jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
- for (i = 0; i < journal->j_maxlen; i++) {
- err = journal_bmap(journal, i, &blocknr);
- if (err)
- return err;
- bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
- if (unlikely(!bh))
- return -ENOMEM;
- lock_buffer(bh);
- memset (bh->b_data, 0, journal->j_blocksize);
- BUFFER_TRACE(bh, "marking dirty");
- mark_buffer_dirty(bh);
- BUFFER_TRACE(bh, "marking uptodate");
- set_buffer_uptodate(bh);
- unlock_buffer(bh);
- __brelse(bh);
- }
-
- sync_blockdev(journal->j_dev);
- jbd_debug(1, "JBD: journal cleared.\n");
-
- /* OK, fill in the initial static fields in the new superblock */
- sb = journal->j_superblock;
-
- sb->s_header.h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
- sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
-
- sb->s_blocksize = cpu_to_be32(journal->j_blocksize);
- sb->s_maxlen = cpu_to_be32(journal->j_maxlen);
- sb->s_first = cpu_to_be32(1);
-
- journal->j_transaction_sequence = 1;
-
- journal->j_flags &= ~JFS_ABORT;
- journal->j_format_version = 2;
-
- return journal_reset(journal);
-}
-
-static void journal_write_superblock(journal_t *journal, int write_op)
-{
- struct buffer_head *bh = journal->j_sb_buffer;
- int ret;
-
- trace_journal_write_superblock(journal, write_op);
- if (!(journal->j_flags & JFS_BARRIER))
- write_op &= ~(REQ_FUA | REQ_FLUSH);
- lock_buffer(bh);
- if (buffer_write_io_error(bh)) {
- char b[BDEVNAME_SIZE];
- /*
- * Oh, dear. A previous attempt to write the journal
- * superblock failed. This could happen because the
- * USB device was yanked out. Or it could happen to
- * be a transient write error and maybe the block will
- * be remapped. Nothing we can do but to retry the
- * write and hope for the best.
- */
- printk(KERN_ERR "JBD: previous I/O error detected "
- "for journal superblock update for %s.\n",
- journal_dev_name(journal, b));
- clear_buffer_write_io_error(bh);
- set_buffer_uptodate(bh);
- }
-
- get_bh(bh);
- bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(write_op, bh);
- wait_on_buffer(bh);
- if (buffer_write_io_error(bh)) {
- clear_buffer_write_io_error(bh);
- set_buffer_uptodate(bh);
- ret = -EIO;
- }
- if (ret) {
- char b[BDEVNAME_SIZE];
- printk(KERN_ERR "JBD: Error %d detected "
- "when updating journal superblock for %s.\n",
- ret, journal_dev_name(journal, b));
- }
-}
-
-/**
- * journal_update_sb_log_tail() - Update log tail in journal sb on disk.
- * @journal: The journal to update.
- * @tail_tid: TID of the new transaction at the tail of the log
- * @tail_block: The first block of the transaction at the tail of the log
- * @write_op: With which operation should we write the journal sb
- *
- * Update a journal's superblock information about log tail and write it to
- * disk, waiting for the IO to complete.
- */
-void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
- unsigned int tail_block, int write_op)
-{
- journal_superblock_t *sb = journal->j_superblock;
-
- BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
- jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n",
- tail_block, tail_tid);
-
- sb->s_sequence = cpu_to_be32(tail_tid);
- sb->s_start = cpu_to_be32(tail_block);
-
- journal_write_superblock(journal, write_op);
-
- /* Log is no longer em