diff options
Diffstat (limited to 'fs/ext3/inode.c')
-rw-r--r-- | fs/ext3/inode.c | 3574 |
1 files changed, 0 insertions, 3574 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c deleted file mode 100644 index 6c7e5468a2f8..000000000000 --- a/fs/ext3/inode.c +++ /dev/null @@ -1,3574 +0,0 @@ -/* - * linux/fs/ext3/inode.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/inode.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Goal-directed block allocation by Stephen Tweedie - * (sct@redhat.com), 1993, 1998 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * 64-bit file support on 64-bit platforms by Jakub Jelinek - * (jj@sunsite.ms.mff.cuni.cz) - * - * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 - */ - -#include <linux/highuid.h> -#include <linux/quotaops.h> -#include <linux/writeback.h> -#include <linux/mpage.h> -#include <linux/namei.h> -#include <linux/uio.h> -#include "ext3.h" -#include "xattr.h" -#include "acl.h" - -static int ext3_writepage_trans_blocks(struct inode *inode); -static int ext3_block_truncate_page(struct inode *inode, loff_t from); - -/* - * Test whether an inode is a fast symlink. - */ -static int ext3_inode_is_fast_symlink(struct inode *inode) -{ - int ea_blocks = EXT3_I(inode)->i_file_acl ? - (inode->i_sb->s_blocksize >> 9) : 0; - - return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); -} - -/* - * The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. - * - * "bh" may be NULL: a metadata block may have been freed from memory - * but there may still be a record of it in the journal, and that record - * still needs to be revoked. - */ -int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext3_fsblk_t blocknr) -{ - int err; - - might_sleep(); - - trace_ext3_forget(inode, is_metadata, blocknr); - BUFFER_TRACE(bh, "enter"); - - jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " - "data mode %lx\n", - bh, is_metadata, inode->i_mode, - test_opt(inode->i_sb, DATA_FLAGS)); - - /* Never use the revoke function if we are doing full data - * journaling: there is no need to, and a V1 superblock won't - * support it. Otherwise, only skip the revoke on un-journaled - * data blocks. */ - - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA || - (!is_metadata && !ext3_should_journal_data(inode))) { - if (bh) { - BUFFER_TRACE(bh, "call journal_forget"); - return ext3_journal_forget(handle, bh); - } - return 0; - } - - /* - * data!=journal && (is_metadata || should_journal_data(inode)) - */ - BUFFER_TRACE(bh, "call ext3_journal_revoke"); - err = ext3_journal_revoke(handle, blocknr, bh); - if (err) - ext3_abort(inode->i_sb, __func__, - "error %d when attempting revoke", err); - BUFFER_TRACE(bh, "exit"); - return err; -} - -/* - * Work out how many blocks we need to proceed with the next chunk of a - * truncate transaction. - */ -static unsigned long blocks_for_truncate(struct inode *inode) -{ - unsigned long needed; - - needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); - - /* Give ourselves just enough room to cope with inodes in which - * i_blocks is corrupt: we've seen disk corruptions in the past - * which resulted in random data in an inode which looked enough - * like a regular file for ext3 to try to delete it. Things - * will go a bit crazy if that happens, but at least we should - * try not to panic the whole kernel. */ - if (needed < 2) - needed = 2; - - /* But we need to bound the transaction so we don't overflow the - * journal. */ - if (needed > EXT3_MAX_TRANS_DATA) - needed = EXT3_MAX_TRANS_DATA; - - return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed; -} - -/* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. - * - * start_transaction gets us a new handle for a truncate transaction, - * and extend_transaction tries to extend the existing one a bit. If - * extend fails, we need to propagate the failure up and restart the - * transaction in the top-level truncate loop. --sct - */ -static handle_t *start_transaction(struct inode *inode) -{ - handle_t *result; - - result = ext3_journal_start(inode, blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; - - ext3_std_error(inode->i_sb, PTR_ERR(result)); - return result; -} - -/* - * Try to extend this transaction for the purposes of truncation. - * - * Returns 0 if we managed to create more room. If we can't create more - * room, and the transaction must be restarted we return 1. - */ -static int try_to_extend_transaction(handle_t *handle, struct inode *inode) -{ - if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) - return 0; - if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) - return 0; - return 1; -} - -/* - * Restart the transaction associated with *handle. This does a commit, - * so before we call here everything must be consistently dirtied against - * this transaction. - */ -static int truncate_restart_transaction(handle_t *handle, struct inode *inode) -{ - int ret; - - jbd_debug(2, "restarting handle %p\n", handle); - /* - * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle - * At this moment, get_block can be called only for blocks inside - * i_size since page cache has been already dropped and writes are - * blocked by i_mutex. So we can safely drop the truncate_mutex. - */ - mutex_unlock(&EXT3_I(inode)->truncate_mutex); - ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); - mutex_lock(&EXT3_I(inode)->truncate_mutex); - return ret; -} - -/* - * Called at inode eviction from icache - */ -void ext3_evict_inode (struct inode *inode) -{ - struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_block_alloc_info *rsv; - handle_t *handle; - int want_delete = 0; - - trace_ext3_evict_inode(inode); - if (!inode->i_nlink && !is_bad_inode(inode)) { - dquot_initialize(inode); - want_delete = 1; - } - - /* - * When journalling data dirty buffers are tracked only in the journal. - * So although mm thinks everything is clean and ready for reaping the - * inode might still have some pages to write in the running - * transaction or waiting to be checkpointed. Thus calling - * journal_invalidatepage() (via truncate_inode_pages()) to discard - * these buffers can cause data loss. Also even if we did not discard - * these buffers, we would have no way to find them after the inode - * is reaped and thus user could see stale data if he tries to read - * them before the transaction is checkpointed. So be careful and - * force everything to disk here... We use ei->i_datasync_tid to - * store the newest transaction containing inode's data. - * - * Note that directories do not have this problem because they don't - * use page cache. - * - * The s_journal check handles the case when ext3_get_journal() fails - * and puts the journal inode. - */ - if (inode->i_nlink && ext3_should_journal_data(inode) && - EXT3_SB(inode->i_sb)->s_journal && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && - inode->i_ino != EXT3_JOURNAL_INO) { - tid_t commit_tid = atomic_read(&ei->i_datasync_tid); - journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; - - log_start_commit(journal, commit_tid); - log_wait_commit(journal, commit_tid); - filemap_write_and_wait(&inode->i_data); - } - truncate_inode_pages_final(&inode->i_data); - - ext3_discard_reservation(inode); - rsv = ei->i_block_alloc_info; - ei->i_block_alloc_info = NULL; - if (unlikely(rsv)) - kfree(rsv); - - if (!want_delete) - goto no_delete; - - handle = start_transaction(inode); - if (IS_ERR(handle)) { - /* - * If we're going to skip the normal cleanup, we still need to - * make sure that the in-core orphan linked list is properly - * cleaned up. - */ - ext3_orphan_del(NULL, inode); - goto no_delete; - } - - if (IS_SYNC(inode)) - handle->h_sync = 1; - inode->i_size = 0; - if (inode->i_blocks) - ext3_truncate(inode); - /* - * Kill off the orphan record created when the inode lost the last - * link. Note that ext3_orphan_del() has to be able to cope with the - * deletion of a non-existent orphan - ext3_truncate() could - * have removed the record. - */ - ext3_orphan_del(handle, inode); - ei->i_dtime = get_seconds(); - - /* - * One subtle ordering requirement: if anything has gone wrong - * (transaction abort, IO errors, whatever), then we can still - * do these next steps (the fs will already have been marked as - * having errors), but we can't free the inode if the mark_dirty - * fails. - */ - if (ext3_mark_inode_dirty(handle, inode)) { - /* If that failed, just dquot_drop() and be done with that */ - dquot_drop(inode); - clear_inode(inode); - } else { - ext3_xattr_delete_inode(handle, inode); - dquot_free_inode(inode); - dquot_drop(inode); - clear_inode(inode); - ext3_free_inode(handle, inode); - } - ext3_journal_stop(handle); - return; -no_delete: - clear_inode(inode); - dquot_drop(inode); -} - -typedef struct { - __le32 *p; - __le32 key; - struct buffer_head *bh; -} Indirect; - -static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) -{ - p->key = *(p->p = v); - p->bh = bh; -} - -static int verify_chain(Indirect *from, Indirect *to) -{ - while (from <= to && from->key == *from->p) - from++; - return (from > to); -} - -/** - * ext3_block_to_path - parse the block number into array of offsets - * @inode: inode in question (we are only interested in its superblock) - * @i_block: block number to be parsed - * @offsets: array to store the offsets in - * @boundary: set this non-zero if the referred-to block is likely to be - * followed (on disk) by an indirect block. - * - * To store the locations of file's data ext3 uses a data structure common - * for UNIX filesystems - tree of pointers anchored in the inode, with - * data blocks at leaves and indirect blocks in intermediate nodes. - * This function translates the block number into path in that tree - - * return value is the path length and @offsets[n] is the offset of - * pointer to (n+1)th node in the nth one. If @block is out of range - * (negative or too large) warning is printed and zero returned. - * - * Note: function doesn't find node addresses, so no IO is needed. All - * we need to know is the capacity of indirect blocks (taken from the - * inode->i_sb). - */ - -/* - * Portability note: the last comparison (check that we fit into triple - * indirect block) is spelled differently, because otherwise on an - * architecture with 32-bit longs and 8Kb pages we might get into trouble - * if our filesystem had 8Kb blocks. We might use long long, but that would - * kill us on x86. Oh, well, at least the sign propagation does not matter - - * i_block would have to be negative in the very beginning, so we would not - * get there at all. - */ - -static int ext3_block_to_path(struct inode *inode, - long i_block, int offsets[4], int *boundary) -{ - int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); - int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); - const long direct_blocks = EXT3_NDIR_BLOCKS, - indirect_blocks = ptrs, - double_blocks = (1 << (ptrs_bits * 2)); - int n = 0; - int final = 0; - - if (i_block < 0) { - ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0"); - } else if (i_block < direct_blocks) { - offsets[n++] = i_block; - final = direct_blocks; - } else if ( (i_block -= direct_blocks) < indirect_blocks) { - offsets[n++] = EXT3_IND_BLOCK; - offsets[n++] = i_block; - final = ptrs; - } else if ((i_block -= indirect_blocks) < double_blocks) { - offsets[n++] = EXT3_DIND_BLOCK; - offsets[n++] = i_block >> ptrs_bits; - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { - offsets[n++] = EXT3_TIND_BLOCK; - offsets[n++] = i_block >> (ptrs_bits * 2); - offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else { - ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big"); - } - if (boundary) - *boundary = final - 1 - (i_block & (ptrs - 1)); - return n; -} - -/** - * ext3_get_branch - read the chain of indirect blocks leading to data - * @inode: inode in question - * @depth: depth of the chain (1 - direct pointer, etc.) - * @offsets: offsets of pointers in inode/indirect blocks - * @chain: place to store the result - * @err: here we store the error value - * - * Function fills the array of triples <key, p, bh> and returns %NULL - * if everything went OK or the pointer to the last filled triple - * (incomplete one) otherwise. Upon the return chain[i].key contains - * the number of (i+1)-th block in the chain (as it is stored in memory, - * i.e. little-endian 32-bit), chain[i].p contains the address of that - * number (it points into struct inode for i==0 and into the bh->b_data - * for i>0) and chain[i].bh points to the buffer_head of i-th indirect - * block for i>0 and NULL for i==0. In other words, it holds the block - * numbers of the chain, addresses they were taken from (and where we can - * verify that chain did not change) and buffer_heads hosting these - * numbers. - * - * Function stops when it stumbles upon zero pointer (absent block) - * (pointer to last triple returned, *@err == 0) - * or when it gets an IO error reading an indirect block - * (ditto, *@err == -EIO) - * or when it notices that chain had been changed while it was reading - * (ditto, *@err == -EAGAIN) - * or when it reads all @depth-1 indirect blocks successfully and finds - * the whole chain, all way to the data (returns %NULL, *err == 0). - */ -static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets, - Indirect chain[4], int *err) -{ - struct super_block *sb = inode->i_sb; - Indirect *p = chain; - struct buffer_head *bh; - - *err = 0; - /* i_data is not going away, no lock needed */ - add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { - bh = sb_bread(sb, le32_to_cpu(p->key)); - if (!bh) - goto failure; - /* Reader: pointers */ - if (!verify_chain(chain, p)) - goto changed; - add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); - /* Reader: end */ - if (!p->key) - goto no_block; - } - return NULL; - -changed: - brelse(bh); - *err = -EAGAIN; - goto no_block; -failure: - *err = -EIO; -no_block: - return p; -} - -/** - * ext3_find_near - find a place for allocation with sufficient locality - * @inode: owner - * @ind: descriptor of indirect block. - * - * This function returns the preferred place for block allocation. - * It is used when heuristic for sequential allocation fails. - * Rules are: - * + if there is a block to the left of our position - allocate near it. - * + if pointer will live in indirect block - allocate near that block. - * + if pointer will live in inode - allocate in the same - * cylinder group. - * - * In the latter case we colour the starting block by the callers PID to - * prevent it from clashing with concurrent allocations for a different inode - * in the same block group. The PID is used here so that functionally related - * files will be close-by on-disk. - * - * Caller must make sure that @ind is valid and will stay that way. - */ -static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) -{ - struct ext3_inode_info *ei = EXT3_I(inode); - __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; - __le32 *p; - ext3_fsblk_t bg_start; - ext3_grpblk_t colour; - - /* Try to find previous block */ - for (p = ind->p - 1; p >= start; p--) { - if (*p) - return le32_to_cpu(*p); - } - - /* No such thing, so let's try location of indirect block */ - if (ind->bh) - return ind->bh->b_blocknr; - - /* - * It is going to be referred to from the inode itself? OK, just put it - * into the same cylinder group then. - */ - bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group); - colour = (current->pid % 16) * - (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); - return bg_start + colour; -} - -/** - * ext3_find_goal - find a preferred place for allocation. - * @inode: owner - * @block: block we want - * @partial: pointer to the last triple within a chain - * - * Normally this function find the preferred place for block allocation, - * returns it. - */ - -static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, - Indirect *partial) -{ - struct ext3_block_alloc_info *block_i; - - block_i = EXT3_I(inode)->i_block_alloc_info; - - /* - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ - if (block_i && (block == block_i->last_alloc_logical_block + 1) - && (block_i->last_alloc_physical_block != 0)) { - return block_i->last_alloc_physical_block + 1; - } - - return ext3_find_near(inode, partial); -} - -/** - * ext3_blks_to_allocate - Look up the block map and count the number - * of direct blocks need to be allocated for the given branch. - * - * @branch: chain of indirect blocks - * @k: number of blocks need for indirect blocks - * @blks: number of data blocks to be mapped. - * @blocks_to_boundary: the offset in the indirect block - * - * return the total number of blocks to be allocate, including the - * direct and indirect blocks. - */ -static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks, - int blocks_to_boundary) -{ - unsigned long count = 0; - - /* - * Simple case, [t,d]Indirect block(s) has not allocated yet - * then it's clear blocks on that path have not allocated - */ - if (k > 0) { - /* right now we don't handle cross boundary allocation */ - if (blks < blocks_to_boundary + 1) - count += blks; - else - count += blocks_to_boundary + 1; - return count; - } - - count++; - while (count < blks && count <= blocks_to_boundary && - le32_to_cpu(*(branch[0].p + count)) == 0) { - count++; - } - return count; -} - -/** - * ext3_alloc_blocks - multiple allocate blocks needed for a branch - * @handle: handle for this transaction - * @inode: owner - * @goal: preferred place for allocation - * @indirect_blks: the number of blocks need to allocate for indirect - * blocks - * @blks: number of blocks need to allocated for direct blocks - * @new_blocks: on return it will store the new block numbers for - * the indirect blocks(if needed) and the first direct block, - * @err: here we store the error value - * - * return the number of direct blocks allocated - */ -static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, int indirect_blks, int blks, - ext3_fsblk_t new_blocks[4], int *err) -{ - int target, i; - unsigned long count = 0; - int index = 0; - ext3_fsblk_t current_block = 0; - int ret = 0; - - /* - * Here we try to allocate the requested multiple blocks at once, - * on a best-effort basis. - * To build a branch, we should allocate blocks for - * the indirect blocks(if not allocated yet), and at least - * the first direct block of this branch. That's the - * minimum number of blocks need to allocate(required) - */ - target = blks + indirect_blks; - - while (1) { - count = target; - /* allocating blocks for indirect blocks and direct blocks */ - current_block = ext3_new_blocks(handle,inode,goal,&count,err); - if (*err) - goto failed_out; - - target -= count; - /* allocate blocks for indirect blocks */ - while (index < indirect_blks && count) { - new_blocks[index++] = current_block++; - count--; - } - - if (count > 0) - break; - } - - /* save the new block number for the first direct block */ - new_blocks[index] = current_block; - - /* total number of blocks allocated for direct blocks */ - ret = count; - *err = 0; - return ret; -failed_out: - for (i = 0; i <index; i++) - ext3_free_blocks(handle, inode, new_blocks[i], 1); - return ret; -} - -/** - * ext3_alloc_branch - allocate and set up a chain of blocks. - * @handle: handle for this transaction - * @inode: owner - * @indirect_blks: number of allocated indirect blocks - * @blks: number of allocated direct blocks - * @goal: preferred place for allocation - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. - * - * This function allocates blocks, zeroes out all but the last one, - * links them into chain and (if we are synchronous) writes them to disk. - * In other words, it prepares a branch that can be spliced onto the - * inode. It stores the information about that chain in the branch[], in - * the same format as ext3_get_branch() would do. We are calling it after - * we had read the existing part of chain and partial points to the last - * triple of that (one with zero ->key). Upon the exit we have the same - * picture as after the successful ext3_get_block(), except that in one - * place chain is disconnected - *branch->p is still zero (we did not - * set the last link), but branch->key contains the number that should - * be placed into *branch->p to fill that gap. - * - * If allocation fails we free all blocks we've allocated (and forget - * their buffer_heads) and return the error value the from failed - * ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain - * as described above and return 0. - */ -static int ext3_alloc_branch(handle_t *handle, struct inode *inode, - int indirect_blks, int *blks, ext3_fsblk_t goal, - int *offsets, Indirect *branch) -{ - int blocksize = inode->i_sb->s_blocksize; - int i, n = 0; - int err = 0; - struct buffer_head *bh; - int num; - ext3_fsblk_t new_blocks[4]; - ext3_fsblk_t current_block; - - num = ext3_alloc_blocks(handle, inode, goal, indirect_blks, - *blks, new_blocks, &err); - if (err) - return err; - - branch[0].key = cpu_to_le32(new_blocks[0]); - /* - * metadata blocks and data blocks are allocated. - */ - for (n = 1; n <= indirect_blks; n++) { - /* - * Get buffer_head for parent block, zero it out - * and set the pointer to new one, then send - * parent to disk. - */ - bh = sb_getblk(inode->i_sb, new_blocks[n-1]); - if (unlikely(!bh)) { - err = -ENOMEM; - goto failed; - } - branch[n].bh = bh; - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - err = ext3_journal_get_create_access(handle, bh); - if (err) { - unlock_buffer(bh); - brelse(bh); - goto failed; - } - - memset(bh->b_data, 0, blocksize); - branch[n].p = (__le32 *) bh->b_data + offsets[n]; - branch[n].key = cpu_to_le32(new_blocks[n]); - *branch[n].p = branch[n].key; - if ( n == indirect_blks) { - current_block = new_blocks[n]; - /* - * End of chain, update the last new metablock of - * the chain to point to the new allocated - * data blocks numbers - */ - for (i=1; i < num; i++) - *(branch[n].p + i) = cpu_to_le32(++current_block); - } - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh); - if (err) - goto failed; - } - *blks = num; - return err; -failed: - /* Allocation failed, free what we already allocated */ - for (i = 1; i <= n ; i++) { - BUFFER_TRACE(branch[i].bh, "call journal_forget"); - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < indirect_blks; i++) - ext3_free_blocks(handle, inode, new_blocks[i], 1); - - ext3_free_blocks(handle, inode, new_blocks[i], num); - - return err; -} - -/** - * ext3_splice_branch - splice the allocated branch onto inode. - * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @where: location of missing link - * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding - * - * This function fills the missing link and does all housekeeping needed in - * inode (->i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. - */ -static int ext3_splice_branch(handle_t *handle, struct inode *inode, - long block, Indirect *where, int num, int blks) -{ - int i; - int err = 0; - struct ext3_block_alloc_info *block_i; - ext3_fsblk_t current_block; - struct ext3_inode_info *ei = EXT3_I(inode); - struct timespec now; - - block_i = ei->i_block_alloc_info; - /* - * If we're splicing into a [td]indirect block (as opposed to the - * inode) then we need to get write access to the [td]indirect block - * before the splice. - */ - if (where->bh) { - BUFFER_TRACE(where->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, where->bh); - if (err) - goto err_out; - } - /* That's it */ - - *where->p = where->key; - - /* - * Update the host buffer_head or inode to point to more just allocated - * direct blocks blocks - */ - if (num == 0 && blks > 1) { - current_block = le32_to_cpu(where->key) + 1; - for (i = 1; i < blks; i++) - *(where->p + i ) = cpu_to_le32(current_block++); - } - - /* - * update the most recently allocated logical & physical block - * in i_block_alloc_info, to assist find the proper goal block for next - * allocation - */ - if (block_i) { - block_i->last_alloc_logical_block = block + blks - 1; - block_i->last_alloc_physical_block = - le32_to_cpu(where[num].key) + blks - 1; - } - - /* We are done with atomic stuff, now do the rest of housekeeping */ - now = CURRENT_TIME_SEC; - if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) { - inode->i_ctime = now; - ext3_mark_inode_dirty(handle, inode); - } - /* ext3_mark_inode_dirty already updated i_sync_tid */ - atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); - - /* had we spliced it onto indirect block? */ - if (where->bh) { - /* - * If we spliced it onto an indirect block, we haven't - * altered the inode. Note however that if it is being spliced - * onto an indirect block at the very end of the file (the - * file is growing) then we *will* alter the inode to reflect - * the new i_size. But that is not done here - it is done in - * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode. - */ - jbd_debug(5, "splicing indirect only\n"); - BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, where->bh); - if (err) - goto err_out; - } else { - /* - * OK, we spliced it into the inode itself on a direct block. - * Inode was dirtied above. - */ - jbd_debug(5, "splicing direct\n"); - } - return err; - -err_out: - for (i = 1; i <= num; i++) { - BUFFER_TRACE(where[i].bh, "call journal_forget"); - ext3_journal_forget(handle, where[i].bh); - ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); - } - ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); - - return err; -} - -/* - * Allocation strategy is simple: if we have to allocate something, we will - * have to go the whole way to leaf. So let's do it before attaching anything - * to tree, set linkage between the newborn blocks, write them if sync is - * required, recheck the path, free and repeat if check fails, otherwise - * set the last missing link (that will protect us from any truncate-generated - * removals - all blocks on the path are immune now) and possibly force the - * write on the parent block. - * That has a nice additional property: no special recovery from the failed - * allocations is needed - we simply release blocks and do not touch anything - * reachable from inode. - * - * `handle' can be NULL if create == 0. - * - * The BKL may not be held on entry here. Be sure to take it early. - * return > 0, # of blocks mapped or allocated. - * return = 0, if plain lookup failed. - * return < 0, error case. - */ -int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, - sector_t iblock, unsigned long maxblocks, - struct buffer_head *bh_result, - int create) -{ - int err = -EIO; - int offsets[4]; - Indirect chain[4]; - Indirect *partial; - ext3_fsblk_t goal; - int indirect_blks; - int blocks_to_boundary = 0; - int depth; - struct ext3_inode_info *ei = EXT3_I(inode); - int count = 0; - ext3_fsblk_t first_block = 0; - - - trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create); - J_ASSERT(handle != NULL || create == 0); - depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); - - if (depth == 0) - goto out; - - partial = ext3_get_branch(inode, depth, offsets, chain, &err); - - /* Simplest case - block found, no allocation needed */ - if (!partial) { - first_block = le32_to_cpu(chain[depth - 1].key); - clear_buffer_new(bh_result); - count++; - /*map more blocks*/ - while (count < maxblocks && count <= blocks_to_boundary) { - ext3_fsblk_t blk; - - if (!verify_chain(chain, chain + depth - 1)) { - /* - * Indirect block might be removed by - * truncate while we were reading it. - * Handling of that case: forget what we've - * got now. Flag the err as EAGAIN, so it - * will reread. - */ - err = -EAGAIN; - count = 0; - break; - } - blk = le32_to_cpu(*(chain[depth-1].p + count)); - - if (blk == first_block + count) - count++; - else - break; - } - if (err != -EAGAIN) - goto got_it; - } - - /* Next simple case - plain lookup or failed read of indirect block */ - if (!create || err == -EIO) - goto cleanup; - - /* - * Block out ext3_truncate while we alter the tree - */ - mutex_lock(&ei->truncate_mutex); - - /* - * If the indirect block is missing while we are reading - * the chain(ext3_get_branch() returns -EAGAIN err), or - * if the chain has been changed after we grab the semaphore, - * (either because another process truncated this branch, or - * another get_block allocated this branch) re-grab the chain to see if - * the request block has been allocated or not. - * - * Since we already block the truncate/other get_block - * at this point, we will have the current copy of the chain when we - * splice the branch into the tree. - */ - if (err == -EAGAIN || !verify_chain(chain, partial)) { - while (partial > chain) { - brelse(partial->bh); - partial--; - } - partial = ext3_get_branch(inode, depth, offsets, chain, &err); - if (!partial) { - count++; - mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; - clear_buffer_new(bh_result); - goto got_it; - } - } - - /* - * Okay, we need to do block allocation. Lazily initialize the block - * allocation info here if necessary - */ - if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) - ext3_init_block_alloc_info(inode); - - goal = ext3_find_goal(inode, iblock, partial); - - /* the number of blocks need to allocate for [d,t]indirect blocks */ - indirect_blks = (chain + depth) - partial - 1; - - /* - * Next look up the indirect map to count the totoal number of - * direct blocks to allocate for this branch. - */ - count = ext3_blks_to_allocate(partial, indirect_blks, - maxblocks, blocks_to_boundary); - err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, - offsets + (partial - chain), partial); - - /* - * The ext3_splice_branch call will free and forget any buffers - * on the new chain if there is a failure, but that risks using - * up transaction credits, especially for bitmaps where the - * credits cannot be returned. Can we handle this somehow? We - * may need to return -EAGAIN upwards in the worst case. --sct - */ - if (!err) - err = ext3_splice_branch(handle, inode, iblock, - partial, indirect_blks, count); - mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; - - set_buffer_new(bh_result); -got_it: - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); - if (count > blocks_to_boundary) - set_buffer_boundary(bh_result); - err = count; - /* Clean up and exit */ - partial = chain + depth - 1; /* the whole chain */ -cleanup: - while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } - BUFFER_TRACE(bh_result, "returned"); -out: - trace_ext3_get_blocks_exit(inode, iblock, - depth ? le32_to_cpu(chain[depth-1].key) : 0, - count, err); - return err; -} - -/* Maximum number of blocks we map for direct IO at once. */ -#define DIO_MAX_BLOCKS 4096 -/* - * Number of credits we need for writing DIO_MAX_BLOCKS: - * We need sb + group descriptor + bitmap + inode -> 4 - * For B blocks with A block pointers per block we need: - * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). - * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. - */ -#define DIO_CREDITS 25 - -static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - handle_t *handle = ext3_journal_current_handle(); - int ret = 0, started = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - - if (create && !handle) { /* Direct IO write... */ - if (max_blocks > DIO_MAX_BLOCKS) - max_blocks = DIO_MAX_BLOCKS; - handle = ext3_journal_start(inode, DIO_CREDITS + - EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - started = 1; - } - - ret = ext3_get_blocks_handle(handle, inode, iblock, - max_blocks, bh_result, create); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } - if (started) - ext3_journal_stop(handle); -out: - return ret; -} - -int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len) -{ - return generic_block_fiemap(inode, fieinfo, start, len, - ext3_get_block); -} - -/* - * `handle' can be NULL if create is zero - */ -struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, - long block, int create, int *errp) -{ - struct buffer_head dummy; - int fatal = 0, err; - - J_ASSERT(handle != NULL || create == 0); - - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); - err = ext3_get_blocks_handle(handle, inode, block, 1, - &dummy, create); - /* - * ext3_get_blocks_handle() returns number of blocks - * mapped. 0 in case of a HOLE. - */ - if (err > 0) { - WARN_ON(err > 1); - err = 0; - } - *errp = err; - if (!err && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); - if (unlikely(!bh)) { - *errp = -ENOMEM; - goto err; - } - if (buffer_new(&dummy)) { - J_ASSERT(create != 0); - J_ASSERT(handle != NULL); - - /* - * Now that we do not always journal data, we should - * keep in mind whether this should always journal the |