summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 09:39:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 09:39:34 -0700
commit9e239bb93914e1c832d54161c7f8f398d0c914ab (patch)
tree0fe11e8e717152660ad77d77e66bf0f1695d7ed1 /fs/ext4
parent63580e51bb3e7ec459501165884e5f815a7a9322 (diff)
parent6ae06ff51eab5dcbbf959b05ce0f11003a305ba5 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 update from Ted Ts'o: "Lots of bug fixes, cleanups and optimizations. In the bug fixes category, of note is a fix for on-line resizing file systems where the block size is smaller than the page size (i.e., file systems 1k blocks on x86, or more interestingly file systems with 4k blocks on Power or ia64 systems.) In the cleanup category, the ext4's punch hole implementation was significantly improved by Lukas Czerner, and now supports bigalloc file systems. In addition, Jan Kara significantly cleaned up the write submission code path. We also improved error checking and added a few sanity checks. In the optimizations category, two major optimizations deserve mention. The first is that ext4_writepages() is now used for nodelalloc and ext3 compatibility mode. This allows writes to be submitted much more efficiently as a single bio request, instead of being sent as individual 4k writes into the block layer (which then relied on the elevator code to coalesce the requests in the block queue). Secondly, the extent cache shrink mechanism, which was introduce in 3.9, no longer has a scalability bottleneck caused by the i_es_lru spinlock. Other optimizations include some changes to reduce CPU usage and to avoid issuing empty commits unnecessarily." * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (86 commits) ext4: optimize starting extent in ext4_ext_rm_leaf() jbd2: invalidate handle if jbd2_journal_restart() fails ext4: translate flag bits to strings in tracepoints ext4: fix up error handling for mpage_map_and_submit_extent() jbd2: fix theoretical race in jbd2__journal_restart ext4: only zero partial blocks in ext4_zero_partial_blocks() ext4: check error return from ext4_write_inline_data_end() ext4: delete unnecessary C statements ext3,ext4: don't mess with dir_file->f_pos in htree_dirblock_to_tree() jbd2: move superblock checksum calculation to jbd2_write_superblock() ext4: pass inode pointer instead of file pointer to punch hole ext4: improve free space calculation for inline_data ext4: reduce object size when !CONFIG_PRINTK ext4: improve extent cache shrink mechanism to avoid to burn CPU time ext4: implement error handling of ext4_mb_new_preallocation() ext4: fix corruption when online resizing a fs with 1K block size ext4: delete unused variables ext4: return FIEMAP_EXTENT_UNKNOWN for delalloc extents jbd2: remove debug dependency on debug_fs and update Kconfig help text jbd2: use a single printk for jbd_debug() ...
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c14
-rw-r--r--fs/ext4/ext4.h187
-rw-r--r--fs/ext4/ext4_jbd2.c58
-rw-r--r--fs/ext4/ext4_jbd2.h29
-rw-r--r--fs/ext4/extents.c193
-rw-r--r--fs/ext4/extents_status.c75
-rw-r--r--fs/ext4/extents_status.h5
-rw-r--r--fs/ext4/file.c14
-rw-r--r--fs/ext4/fsync.c52
-rw-r--r--fs/ext4/ialloc.c3
-rw-r--r--fs/ext4/indirect.c40
-rw-r--r--fs/ext4/inline.c4
-rw-r--r--fs/ext4/inode.c1751
-rw-r--r--fs/ext4/mballoc.c21
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/page-io.c325
-rw-r--r--fs/ext4/resize.c24
-rw-r--r--fs/ext4/super.c155
19 files changed, 1525 insertions, 1435 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d0f13eada0ed..58339393fa6e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -682,11 +682,15 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
static inline int test_root(ext4_group_t a, int b)
{
- int num = b;
-
- while (a > num)
- num *= b;
- return num == a;
+ while (1) {
+ if (a < b)
+ return 0;
+ if (a == b)
+ return 1;
+ if ((a % b) != 0)
+ return 0;
+ a = a / b;
+ }
}
static int ext4_group_sparse(ext4_group_t group)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4af03ea84aa3..b577e45425b0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -177,38 +177,28 @@ struct ext4_map_blocks {
};
/*
- * For delayed allocation tracking
- */
-struct mpage_da_data {
- struct inode *inode;
- sector_t b_blocknr; /* start block number of extent */
- size_t b_size; /* size of extent */
- unsigned long b_state; /* state of the extent */
- unsigned long first_page, next_page; /* extent of pages */
- struct writeback_control *wbc;
- int io_done;
- int pages_written;
- int retval;
-};
-
-/*
* Flags for ext4_io_end->flags
*/
#define EXT4_IO_END_UNWRITTEN 0x0001
-#define EXT4_IO_END_ERROR 0x0002
-#define EXT4_IO_END_DIRECT 0x0004
+#define EXT4_IO_END_DIRECT 0x0002
/*
- * For converting uninitialized extents on a work queue.
+ * For converting uninitialized extents on a work queue. 'handle' is used for
+ * buffered writeback.
*/
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
+ handle_t *handle; /* handle reserved for extent
+ * conversion */
struct inode *inode; /* file being written to */
+ struct bio *bio; /* Linked list of completed
+ * bios covering the extent */
unsigned int flag; /* unwritten or not */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
+ atomic_t count; /* reference counter */
} ext4_io_end_t;
struct ext4_io_submit {
@@ -581,11 +571,6 @@ enum {
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
/*
- * Flags used by ext4_discard_partial_page_buffers
- */
-#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
-
-/*
* ioctl commands
*/
#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
@@ -879,6 +864,7 @@ struct ext4_inode_info {
rwlock_t i_es_lock;
struct list_head i_es_lru;
unsigned int i_es_lru_nr; /* protected by i_es_lock */
+ unsigned long i_touch_when; /* jiffies of last accessing */
/* ialloc */
ext4_group_t i_last_alloc_group;
@@ -903,12 +889,22 @@ struct ext4_inode_info {
qsize_t i_reserved_quota;
#endif
- /* completed IOs that might need unwritten extents handling */
- struct list_head i_completed_io_list;
+ /* Lock protecting lists below */
spinlock_t i_completed_io_lock;
+ /*
+ * Completed IOs that need unwritten extents handling and have
+ * transaction reserved
+ */
+ struct list_head i_rsv_conversion_list;
+ /*
+ * Completed IOs that need unwritten extents handling and don't have
+ * transaction reserved
+ */
+ struct list_head i_unrsv_conversion_list;
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
- struct work_struct i_unwritten_work; /* deferred extent conversion */
+ struct work_struct i_rsv_conversion_work;
+ struct work_struct i_unrsv_conversion_work;
spinlock_t i_block_reservation_lock;
@@ -1245,7 +1241,6 @@ struct ext4_sb_info {
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
- unsigned int s_max_writeback_mb_bump;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
@@ -1281,8 +1276,10 @@ struct ext4_sb_info {
struct flex_groups *s_flex_groups;
ext4_group_t s_flex_groups_allocated;
- /* workqueue for dio unwritten */
- struct workqueue_struct *dio_unwritten_wq;
+ /* workqueue for unreserved extent convertions (dio) */
+ struct workqueue_struct *unrsv_conversion_wq;
+ /* workqueue for reserved extent conversions (buffered io) */
+ struct workqueue_struct *rsv_conversion_wq;
/* timer for periodic error stats printing */
struct timer_list s_err_report;
@@ -1307,6 +1304,7 @@ struct ext4_sb_info {
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
struct list_head s_es_lru;
+ unsigned long s_es_last_sorted;
struct percpu_counter s_extent_cache_cnt;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
};
@@ -1342,6 +1340,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
struct ext4_io_end *io_end)
{
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+ /* Writeback has to have coversion transaction reserved */
+ WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle &&
+ !(io_end->flag & EXT4_IO_END_DIRECT));
io_end->flag |= EXT4_IO_END_UNWRITTEN;
atomic_inc(&EXT4_I(inode)->i_unwritten);
}
@@ -1999,7 +2000,6 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype)
/* fsync.c */
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
-extern int ext4_flush_unwritten_io(struct inode *);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
@@ -2088,7 +2088,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_can_truncate(struct inode *inode);
extern void ext4_truncate(struct inode *);
-extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
+extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *);
@@ -2096,9 +2096,12 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
-extern int ext4_discard_partial_page_buffers(handle_t *handle,
- struct address_space *mapping, loff_t from,
- loff_t length, int flags);
+extern int ext4_block_truncate_page(handle_t *handle,
+ struct address_space *mapping, loff_t from);
+extern int ext4_block_zero_page_range(handle_t *handle,
+ struct address_space *mapping, loff_t from, loff_t length);
+extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
+ loff_t lstart, loff_t lend);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -2111,7 +2114,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs);
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
-extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
+extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
extern void ext4_ind_truncate(handle_t *, struct inode *inode);
extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t first, ext4_lblk_t stop);
@@ -2166,42 +2169,96 @@ extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
extern const char *ext4_decode_error(struct super_block *sb, int errno,
char nbuf[16]);
+
extern __printf(4, 5)
void __ext4_error(struct super_block *, const char *, unsigned int,
const char *, ...);
-#define ext4_error(sb, message...) __ext4_error(sb, __func__, \
- __LINE__, ## message)
extern __printf(5, 6)
-void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t,
+void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t,
const char *, ...);
extern __printf(5, 6)
-void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
+void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
const char *, ...);
extern void __ext4_std_error(struct super_block *, const char *,
unsigned int, int);
extern __printf(4, 5)
void __ext4_abort(struct super_block *, const char *, unsigned int,
const char *, ...);
-#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \
- __LINE__, ## message)
extern __printf(4, 5)
void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...);
-#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \
- __LINE__, ## message)
extern __printf(3, 4)
-void ext4_msg(struct super_block *, const char *, const char *, ...);
+void __ext4_msg(struct super_block *, const char *, const char *, ...);
extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
const char *, unsigned int, const char *);
-#define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \
- __LINE__, msg)
extern __printf(7, 8)
void __ext4_grp_locked_error(const char *, unsigned int,
struct super_block *, ext4_group_t,
unsigned long, ext4_fsblk_t,
const char *, ...);
-#define ext4_grp_locked_error(sb, grp, message...) \
- __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
+
+#ifdef CONFIG_PRINTK
+
+#define ext4_error_inode(inode, func, line, block, fmt, ...) \
+ __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__)
+#define ext4_error_file(file, func, line, block, fmt, ...) \
+ __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__)
+#define ext4_error(sb, fmt, ...) \
+ __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
+#define ext4_abort(sb, fmt, ...) \
+ __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
+#define ext4_warning(sb, fmt, ...) \
+ __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__)
+#define ext4_msg(sb, level, fmt, ...) \
+ __ext4_msg(sb, level, fmt, ##__VA_ARGS__)
+#define dump_mmp_msg(sb, mmp, msg) \
+ __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg)
+#define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \
+ __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \
+ fmt, ##__VA_ARGS__)
+
+#else
+
+#define ext4_error_inode(inode, func, line, block, fmt, ...) \
+do { \
+ no_printk(fmt, ##__VA_ARGS__); \
+ __ext4_error_inode(inode, "", 0, block, " "); \
+} while (0)
+#define ext4_error_file(file, func, line, block, fmt, ...) \
+do { \
+ no_printk(fmt, ##__VA_ARGS__); \
+ __ext4_error_file(file, "", 0, block, " "); \
+} while (0)
+#define ext4_error(sb, fmt, ...) \
+do { \
+ no_printk(fmt, ##__VA_ARGS__); \
+ __ext4_error(sb, "", 0, " "); \
+} while (0)
+#define ext4_abort(sb, fmt, ...) \
+do { \
+ no_printk(fmt, ##__VA_ARGS__); \
+ __ext4_abort(sb, "", 0, " "); \
+} while (0)
+#define ext4_warning(sb, fmt, ...) \
+do { \
+ no_printk(fmt, ##__VA_ARGS__); \
+ __ext4_warning(sb, "", 0, " "); \
+} while (0)
+#define ext4_msg(sb, level, fmt, ...) \
+do { \
+ no_printk(fmt, ##__VA_ARGS__); \
+ __ext4_msg(sb, "", " "); \
+} while (0)
+#define dump_mmp_msg(sb, mmp, msg) \
+ __dump_mmp_msg(sb, mmp, "", 0, "")
+#define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \
+do { \
+ no_printk(fmt, ##__VA_ARGS__); \
+ __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \
+} while (0)
+
+#endif
+
extern void ext4_update_dynamic_rev(struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat);
@@ -2312,6 +2369,7 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
{
struct ext4_group_info ***grp_info;
long indexv, indexh;
+ BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
grp_info = EXT4_SB(sb)->s_group_info;
indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
@@ -2598,8 +2656,7 @@ struct ext4_extent;
extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
-extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
- int chunk);
+extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
extern void ext4_ext_truncate(handle_t *, struct inode *);
@@ -2609,8 +2666,8 @@ extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
-extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
- ssize_t len);
+extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
+ loff_t offset, ssize_t len);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
@@ -2650,12 +2707,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
/* page-io.c */
extern int __init ext4_init_pageio(void);
-extern void ext4_add_complete_io(ext4_io_end_t *io_end);
extern void ext4_exit_pageio(void);
-extern void ext4_ioend_shutdown(struct inode *);
-extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
-extern void ext4_end_io_work(struct work_struct *work);
+extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
+extern int ext4_put_io_end(ext4_io_end_t *io_end);
+extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
+extern void ext4_io_submit_init(struct ext4_io_submit *io,
+ struct writeback_control *wbc);
+extern void ext4_end_io_rsv_work(struct work_struct *work);
+extern void ext4_end_io_unrsv_work(struct work_struct *work);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
@@ -2668,20 +2728,17 @@ extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp);
extern int ext4_mmp_csum_verify(struct super_block *sb,
struct mmp_struct *mmp);
-/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
+/*
+ * Note that these flags will never ever appear in a buffer_head's state flag.
+ * See EXT4_MAP_... to see where this is used.
+ */
enum ext4_state_bits {
BH_Uninit /* blocks are allocated but uninitialized on disk */
- = BH_JBDPrivateStart,
+ = BH_JBDPrivateStart,
BH_AllocFromCluster, /* allocated blocks were part of already
- * allocated cluster. Note that this flag will
- * never, ever appear in a buffer_head's state
- * flag. See EXT4_MAP_FROM_CLUSTER to see where
- * this is used. */
+ * allocated cluster. */
};
-BUFFER_FNS(Uninit, uninit)
-TAS_BUFFER_FNS(Uninit, uninit)
-
/*
* Add new method to test whether block and inode bitmaps are properly
* initialized. With uninit_bg reading the block from disk is not enough
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 451eb4045330..72a3600aedbd 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -38,31 +38,43 @@ static void ext4_put_nojournal(handle_t *handle)
/*
* Wrappers for jbd2_journal_start/end.
*/
-handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
- int type, int nblocks)
+static int ext4_journal_check_start(struct super_block *sb)
{
journal_t *journal;
might_sleep();
-
- trace_ext4_journal_start(sb, nblocks, _RET_IP_);
if (sb->s_flags & MS_RDONLY)
- return ERR_PTR(-EROFS);
-
+ return -EROFS;
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
- if (!journal)
- return ext4_get_nojournal();
/*
* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly.
*/
- if (is_journal_aborted(journal)) {
+ if (journal && is_journal_aborted(journal)) {
ext4_abort(sb, "Detected aborted journal");
- return ERR_PTR(-EROFS);
+ return -EROFS;
}
- return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line);
+ return 0;
+}
+
+handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
+ int type, int blocks, int rsv_blocks)
+{
+ journal_t *journal;
+ int err;
+
+ trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_);
+ err = ext4_journal_check_start(sb);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ journal = EXT4_SB(sb)->s_journal;
+ if (!journal)
+ return ext4_get_nojournal();
+ return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS,
+ type, line);
}
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
@@ -86,6 +98,30 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
return err;
}
+handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
+ int type)
+{
+ struct super_block *sb;
+ int err;
+
+ if (!ext4_handle_valid(handle))
+ return ext4_get_nojournal();
+
+ sb = handle->h_journal->j_private;
+ trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits,
+ _RET_IP_);
+ err = ext4_journal_check_start(sb);
+ if (err < 0) {
+ jbd2_journal_free_reserved(handle);
+ return ERR_PTR(err);
+ }
+
+ err = jbd2_journal_start_reserved(handle, type, line);
+ if (err < 0)
+ return ERR_PTR(err);
+ return handle;
+}
+
void ext4_journal_abort_handle(const char *caller, unsigned int line,
const char *err_fn, struct buffer_head *bh,
handle_t *handle, int err)
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index c8c6885406db..2877258d9497 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode)
#define EXT4_HT_MIGRATE 8
#define EXT4_HT_MOVE_EXTENTS 9
#define EXT4_HT_XATTR 10
-#define EXT4_HT_MAX 11
+#define EXT4_HT_EXT_CONVERT 11
+#define EXT4_HT_MAX 12
/**
* struct ext4_journal_cb_entry - Base structure for callback information.
@@ -265,7 +266,7 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
- int type, int nblocks);
+ int type, int blocks, int rsv_blocks);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -300,21 +301,37 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
}
#define ext4_journal_start_sb(sb, type, nblocks) \
- __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks))
+ __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0)
#define ext4_journal_start(inode, type, nblocks) \
- __ext4_journal_start((inode), __LINE__, (type), (nblocks))
+ __ext4_journal_start((inode), __LINE__, (type), (nblocks), 0)
+
+#define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \
+ __ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks))
static inline handle_t *__ext4_journal_start(struct inode *inode,
unsigned int line, int type,
- int nblocks)
+ int blocks, int rsv_blocks)
{
- return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks);
+ return __ext4_journal_start_sb(inode->i_sb, line, type, blocks,
+ rsv_blocks);
}
#define ext4_journal_stop(handle) \
__ext4_journal_stop(__func__, __LINE__, (handle))
+#define ext4_journal_start_reserved(handle, type) \
+ __ext4_journal_start_reserved((handle), __LINE__, (type))
+
+handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
+ int type);
+
+static inline void ext4_journal_free_reserved(handle_t *handle)
+{
+ if (ext4_handle_valid(handle))
+ jbd2_journal_free_reserved(handle);
+}
+
static inline handle_t *ext4_journal_current_handle(void)
{
return journal_current_handle();
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bc0f1910b9cf..7097b0f680e6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2125,7 +2125,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
next_del = ext4_find_delayed_extent(inode, &es);
if (!exists && next_del) {
exists = 1;
- flags |= FIEMAP_EXTENT_DELALLOC;
+ flags |= (FIEMAP_EXTENT_DELALLOC |
+ FIEMAP_EXTENT_UNKNOWN);
}
up_read(&EXT4_I(inode)->i_data_sem);
@@ -2328,17 +2329,15 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
}
/*
- * How many index/leaf blocks need to change/allocate to modify nrblocks?
+ * How many index/leaf blocks need to change/allocate to add @extents extents?
*
- * if nrblocks are fit in a single extent (chunk flag is 1), then
- * in the worse case, each tree level index/leaf need to be changed
- * if the tree split due to insert a new extent, then the old tree
- * index/leaf need to be updated too
+ * If we add a single extent, then in the worse case, each tree level
+ * index/leaf need to be changed in case of the tree split.
*
- * If the nrblocks are discontiguous, they could cause
- * the whole tree split more than once, but this is really rare.
+ * If more extents are inserted, they could cause the whole tree split more
+ * than once, but this is really rare.
*/
-int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
{
int index;
int depth;
@@ -2349,7 +2348,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
depth = ext_depth(inode);
- if (chunk)
+ if (extents <= 1)
index = depth * 2;
else
index = depth * 3;
@@ -2357,20 +2356,24 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
return index;
}
+static inline int get_default_free_blocks_flags(struct inode *inode)
+{
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
+ else if (ext4_should_journal_data(inode))
+ return EXT4_FREE_BLOCKS_FORGET;
+ return 0;
+}
+
static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent *ex,
- ext4_fsblk_t *partial_cluster,
+ long long *partial_cluster,
ext4_lblk_t from, ext4_lblk_t to)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned short ee_len = ext4_ext_get_actual_len(ex);
ext4_fsblk_t pblk;
- int flags = 0;
-
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
- else if (ext4_should_journal_data(inode))
- flags |= EXT4_FREE_BLOCKS_FORGET;
+ int flags = get_default_free_blocks_flags(inode);
/*
* For bigalloc file systems, we never free a partial cluster
@@ -2388,7 +2391,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* partial cluster here.
*/
pblk = ext4_ext_pblock(ex) + ee_len - 1;
- if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
+ if ((*partial_cluster > 0) &&
+ (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
ext4_free_blocks(handle, inode, NULL,
EXT4_C2B(sbi, *partial_cluster),
sbi->s_cluster_ratio, flags);
@@ -2414,41 +2418,46 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
&& to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
/* tail removal */
ext4_lblk_t num;
+ unsigned int unaligned;
num = le32_to_cpu(ex->ee_block) + ee_len - from;
pblk = ext4_ext_pblock(ex) + ee_len - num;
- ext_debug("free last %u blocks starting %llu\n", num, pblk);
+ /*
+ * Usually we want to free partial cluster at the end of the
+ * extent, except for the situation when the cluster is still
+ * used by any other extent (partial_cluster is negative).
+ */
+ if (*partial_cluster < 0 &&
+ -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1))
+ flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
+
+ ext_debug("free last %u blocks starting %llu partial %lld\n",
+ num, pblk, *partial_cluster);
ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
/*
* If the block range to be freed didn't start at the
* beginning of a cluster, and we removed the entire
- * extent, save the partial cluster here, since we
- * might need to delete if we determine that the
- * truncate operation has removed all of the blocks in
- * the cluster.
+ * extent and the cluster is not used by any other extent,
+ * save the partial cluster here, since we might need to
+ * delete if we determine that the truncate operation has
+ * removed all of the blocks in the cluster.
+ *
+ * On the other hand, if we did not manage to free the whole
+ * extent, we have to mark the cluster as used (store negative
+ * cluster number in partial_cluster).
*/
- if (pblk & (sbi->s_cluster_ratio - 1) &&
- (ee_len == num))
+ unaligned = pblk & (sbi->s_cluster_ratio - 1);
+ if (unaligned && (ee_len == num) &&
+ (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
*partial_cluster = EXT4_B2C(sbi, pblk);
- else
+ else if (unaligned)
+ *partial_cluster = -((long long)EXT4_B2C(sbi, pblk));
+ else if (*partial_cluster > 0)
*partial_cluster = 0;
- } else if (from == le32_to_cpu(ex->ee_block)
- && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
- /* head removal */
- ext4_lblk_t num;
- ext4_fsblk_t start;
-
- num = to - from;
- start = ext4_ext_pblock(ex);
-
- ext_debug("free first %u blocks starting %llu\n", num, start);
- ext4_free_blocks(handle, inode, NULL, start, num, flags);
-
- } else {
- printk(KERN_INFO "strange request: removal(2) "
- "%u-%u from %u:%u\n",
- from, to, le32_to_cpu(ex->ee_block), ee_len);
- }
+ } else
+ ext4_error(sbi->s_sb, "strange request: removal(2) "
+ "%u-%u from %u:%u\n",
+ from, to, le32_to_cpu(ex->ee_block), ee_len);
return 0;
}
@@ -2461,12 +2470,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* @handle: The journal handle
* @inode: The files inode
* @path: The path to the leaf
+ * @partial_cluster: The cluster which we'll have to free if all extents
+ * has been released from it. It gets negative in case
+ * that the cluster is still used.
* @start: The first block to remove
* @end: The last block to remove
*/
static int
ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
+ struct ext4_ext_path *path,
+ long long *partial_cluster,
ext4_lblk_t start, ext4_lblk_t end)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2479,6 +2492,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
unsigned short ex_ee_len;
unsigned uninitialized = 0;
struct ext4_extent *ex;
+ ext4_fsblk_t pblk;
/* the header must be checked already in ext4_ext_remove_space() */
ext_debug("truncate since %u in leaf to %u\n", start, end);
@@ -2490,7 +2504,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
return -EIO;
}
/* find where to start removing */
- ex = EXT_LAST_EXTENT(eh);
+ ex = path[depth].p_ext;
+ if (!ex)
+ ex = EXT_LAST_EXTENT(eh);
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2517,6 +2533,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* If this extent is beyond the end of the hole, skip it */
if (end < ex_ee_block) {
+ /*
+ * We're going to skip this extent and move to another,
+ * so if this extent is not cluster aligned we have
+ * to mark the current cluster as used to avoid
+ * accidentally freeing it later on
+ */
+ pblk = ext4_ext_pblock(ex);
+ if (pblk & (sbi->s_cluster_ratio - 1))
+ *partial_cluster =
+ -((long long)EXT4_B2C(sbi, pblk));
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2592,7 +2618,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
sizeof(struct ext4_extent));
}
le16_add_cpu(&eh->eh_entries, -1);
- } else
+ } else if (*partial_cluster > 0)
*partial_cluster = 0;
err = ext4_ext_dirty(handle, inode, path + depth);
@@ -2610,17 +2636,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
err = ext4_ext_correct_indexes(handle, inode, path);
/*
- * If there is still a entry in the leaf node, check to see if
- * it references the partial cluster. This is the only place
- * where it could; if it doesn't, we can free the cluster.
+ * Free the partial cluster only if the current extent does not
+ * reference it. Otherwise we might free used cluster.
*/
- if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
+ if (*partial_cluster > 0 &&
(EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
*partial_cluster)) {
- int flags = EXT4_FREE_BLOCKS_FORGET;
-
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
+ int flags = get_default_free_blocks_flags(inode);
ext4_free_blocks(handle, inode, NULL,
EXT4_C2B(sbi, *partial_cluster),
@