From aaddea812cb0a2dc38b55ba557b68999bc2f6203 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Wed, 16 Jan 2013 20:21:26 -0500 Subject: ext4: add tracepoint in punching hole This patch adds a tracepoint in ext4_punch_hole. CC: Lukas Czerner Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 7e8c36bc7082..6080ea1380b8 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1324,6 +1324,31 @@ TRACE_EVENT(ext4_fallocate_exit, __entry->ret) ); +TRACE_EVENT(ext4_punch_hole, + TP_PROTO(struct inode *inode, loff_t offset, loff_t len), + + TP_ARGS(inode, offset, len), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( loff_t, offset ) + __field( loff_t, len ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->offset = offset; + __entry->len = len; + ), + + TP_printk("dev %d,%d ino %lu offset %lld len %lld", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + __entry->offset, __entry->len) +); + TRACE_EVENT(ext4_unlink_enter, TP_PROTO(struct inode *parent, struct dentry *dentry), -- cgit v1.2.3 From c3ad83d9efdfe6a86efd44945a781f00c879b7b4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 24 Jan 2013 23:24:56 -0500 Subject: quota: autoload the quota_v2 module for QFMT_VFS_V1 quota format Otherwise, ext4 file systems with the quota featured enable will get a very confusing "No such process" error message if the quota code is built as a module and the quota_v2 module has not been loaded. Signed-off-by: "Theodore Ts'o" Reviewed-by: Carlos Maiolino Acked-by: Jan Kara Cc: stable@vger.kernel.org --- include/linux/quota.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index 58fdef125252..d13371134c59 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -405,6 +405,7 @@ struct quota_module_name { #define INIT_QUOTA_MODULE_NAMES {\ {QFMT_VFS_OLD, "quota_v1"},\ {QFMT_VFS_V0, "quota_v2"},\ + {QFMT_VFS_V1, "quota_v2"},\ {0, NULL}} #endif /* _QUOTA_ */ -- cgit v1.2.3 From 9fff24aa2c5c504aadead1ff9599e813604c2e53 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 6 Feb 2013 22:30:23 -0500 Subject: jbd2: track request delay statistics Track the delay between when we first request that the commit begin and when it actually begins, so we can see how much of a gap exists. In theory, this should just be the remaining scheduling quantuum of the thread which requested the commit (assuming it was not a synchronous operation which triggered the commit request) plus scheduling overhead; however, it's possible that real time processes might get in the way of letting the kjournald thread from executing. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 7 +++++++ include/trace/events/jbd2.h | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e30b66346942..e0aafc46064f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -580,6 +580,11 @@ struct transaction_s */ unsigned long t_start; + /* + * When commit was requested + */ + unsigned long t_requested; + /* * Checkpointing stats [j_checkpoint_sem] */ @@ -637,6 +642,7 @@ struct transaction_s struct transaction_run_stats_s { unsigned long rs_wait; + unsigned long rs_request_delay; unsigned long rs_running; unsigned long rs_locked; unsigned long rs_flushing; @@ -649,6 +655,7 @@ struct transaction_run_stats_s { struct transaction_stats_s { unsigned long ts_tid; + unsigned long ts_requested; struct transaction_run_stats_s run; }; diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 127993dbf322..5419f57beb1f 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -142,6 +142,7 @@ TRACE_EVENT(jbd2_run_stats, __field( dev_t, dev ) __field( unsigned long, tid ) __field( unsigned long, wait ) + __field( unsigned long, request_delay ) __field( unsigned long, running ) __field( unsigned long, locked ) __field( unsigned long, flushing ) @@ -155,6 +156,7 @@ TRACE_EVENT(jbd2_run_stats, __entry->dev = dev; __entry->tid = tid; __entry->wait = stats->rs_wait; + __entry->request_delay = stats->rs_request_delay; __entry->running = stats->rs_running; __entry->locked = stats->rs_locked; __entry->flushing = stats->rs_flushing; @@ -164,10 +166,12 @@ TRACE_EVENT(jbd2_run_stats, __entry->blocks_logged = stats->rs_blocks_logged; ), - TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u " - "logging %u handle_count %u blocks %u blocks_logged %u", + TP_printk("dev %d,%d tid %lu wait %u request_delay %u running %u " + "locked %u flushing %u logging %u handle_count %u " + "blocks %u blocks_logged %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, jiffies_to_msecs(__entry->wait), + jiffies_to_msecs(__entry->request_delay), jiffies_to_msecs(__entry->running), jiffies_to_msecs(__entry->locked), jiffies_to_msecs(__entry->flushing), -- cgit v1.2.3 From 078d5039a13dedbd2ed14153a6d764fd75baae07 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 7 Feb 2013 00:02:15 -0500 Subject: jbd2: revert "jbd2: add COW fields to struct jbd2_journal_handle" This reverts commit 93737456d68ddcb86232f669b83da673dd12e351. The cow-snapshots effort is no longer active, so remove these extra fields to shrink down the handle structure. Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara --- include/linux/jbd2.h | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e0aafc46064f..24db7256a5ff 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -397,35 +397,13 @@ struct jbd2_journal_handle int h_err; /* Flags [no locking] */ - unsigned int h_sync:1; /* sync-on-close */ - unsigned int h_jdata:1; /* force data journaling */ - unsigned int h_aborted:1; /* fatal error on handle */ - unsigned int h_cowing:1; /* COWing block to snapshot */ - - /* Number of buffers requested by user: - * (before adding the COW credits factor) */ - unsigned int h_base_credits:14; - - /* Number of buffers the user is allowed to dirty: - * (counts only buffers dirtied when !h_cowing) */ - unsigned int h_user_credits:14; - + unsigned int h_sync: 1; /* sync-on-close */ + unsigned int h_jdata: 1; /* force data journaling */ + unsigned int h_aborted: 1; /* fatal error on handle */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map h_lockdep_map; #endif - -#ifdef CONFIG_JBD2_DEBUG - /* COW debugging counters: */ - unsigned int h_cow_moved; /* blocks moved to snapshot */ - unsigned int h_cow_copied; /* blocks copied to snapshot */ - unsigned int h_cow_ok_jh; /* blocks already COWed during current - transaction */ - unsigned int h_cow_ok_bitmap; /* blocks not set in COW bitmap */ - unsigned int h_cow_ok_mapped;/* blocks already mapped in snapshot */ - unsigned int h_cow_bitmaps; /* COW bitmaps created */ - unsigned int h_cow_excluded; /* blocks set in exclude bitmap */ -#endif }; -- cgit v1.2.3 From 343d9c283c9847da043fda3e76e3197f27b667dd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 8 Feb 2013 13:00:22 -0500 Subject: jbd2: add tracepoints which provide per-handle statistics Handles which stay open a long time are problematic when it comes time to close down a transaction so it can be committed. These tracepoints will help us determine which ones are the problematic ones, and to validate whether changes makes things better or worse. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 8 +++- include/trace/events/jbd2.h | 98 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 24db7256a5ff..fa5fea17b619 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -400,6 +400,11 @@ struct jbd2_journal_handle unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_jdata: 1; /* force data journaling */ unsigned int h_aborted: 1; /* fatal error on handle */ + unsigned int h_type: 8; /* for handle statistics */ + unsigned int h_line_no: 16; /* for handle statistics */ + + unsigned long h_start_jiffies; + unsigned int h_requested_credits; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map h_lockdep_map; @@ -1071,7 +1076,8 @@ static inline handle_t *journal_current_handle(void) */ extern handle_t *jbd2_journal_start(journal_t *, int nblocks); -extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask); +extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask, + unsigned int type, unsigned int line_no); extern int jbd2_journal_restart(handle_t *, int nblocks); extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask); extern int jbd2_journal_extend (handle_t *, int nblocks); diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 5419f57beb1f..070df49e4a1d 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -132,6 +132,104 @@ TRACE_EVENT(jbd2_submit_inode_data, (unsigned long) __entry->ino) ); +TRACE_EVENT(jbd2_handle_start, + TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, + unsigned int line_no, int requested_blocks), + + TP_ARGS(dev, tid, type, line_no, requested_blocks), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( unsigned long, tid ) + __field( unsigned int, type ) + __field( unsigned int, line_no ) + __field( int, requested_blocks) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->tid = tid; + __entry->type = type; + __entry->line_no = line_no; + __entry->requested_blocks = requested_blocks; + ), + + TP_printk("dev %d,%d tid %lu type %u line_no %u " + "requested_blocks %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + __entry->type, __entry->line_no, __entry->requested_blocks) +); + +TRACE_EVENT(jbd2_handle_extend, + TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, + unsigned int line_no, int buffer_credits, + int requested_blocks), + + TP_ARGS(dev, tid, type, line_no, buffer_credits, requested_blocks), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( unsigned long, tid ) + __field( unsigned int, type ) + __field( unsigned int, line_no ) + __field( int, buffer_credits ) + __field( int, requested_blocks) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->tid = tid; + __entry->type = type; + __entry->line_no = line_no; + __entry->buffer_credits = buffer_credits; + __entry->requested_blocks = requested_blocks; + ), + + TP_printk("dev %d,%d tid %lu type %u line_no %u " + "buffer_credits %d requested_blocks %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + __entry->type, __entry->line_no, __entry->buffer_credits, + __entry->requested_blocks) +); + +TRACE_EVENT(jbd2_handle_stats, + TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, + unsigned int line_no, int interval, int sync, + int requested_blocks, int dirtied_blocks), + + TP_ARGS(dev, tid, type, line_no, interval, sync, + requested_blocks, dirtied_blocks), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( unsigned long, tid ) + __field( unsigned int, type ) + __field( unsigned int, line_no ) + __field( int, interval ) + __field( int, sync ) + __field( int, requested_blocks) + __field( int, dirtied_blocks ) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->tid = tid; + __entry->type = type; + __entry->line_no = line_no; + __entry->interval = interval; + __entry->sync = sync; + __entry->requested_blocks = requested_blocks; + __entry->dirtied_blocks = dirtied_blocks; + ), + + TP_printk("dev %d,%d tid %lu type %u line_no %u interval %d " + "sync %d requested_blocks %d dirtied_blocks %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + __entry->type, __entry->line_no, __entry->interval, + __entry->sync, __entry->requested_blocks, + __entry->dirtied_blocks) +); + TRACE_EVENT(jbd2_run_stats, TP_PROTO(dev_t dev, unsigned long tid, struct transaction_run_stats_s *stats), -- cgit v1.2.3 From b6e96d0067d81f6a300bedee661b5ece8164e210 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 9 Feb 2013 16:29:20 -0500 Subject: jbd2: use module parameters instead of debugfs for jbd_debug There are multiple reasons to move away from debugfs. First of all, we are only using it for a single parameter, and it is much more complicated to set up (some 30 lines of code compared to 3), and one more thing that might fail while loading the jbd2 module. Secondly, as a module paramter it can be specified as a boot option if jbd2 is built into the kernel, or as a parameter when the module is loaded, and it can also be manipulated dynamically under /sys/module/jbd2/parameters/jbd2_debug. So it is more flexible. Ultimately we want to move away from using jbd_debug() towards tracepoints, but for now this is still a useful simplification of the code base. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fa5fea17b619..50e5a5e6a712 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -20,7 +20,6 @@ #ifndef __KERNEL__ #include "jfs_compat.h" #define JBD2_DEBUG -#define jfs_debug jbd_debug #else #include @@ -57,7 +56,7 @@ * CONFIG_JBD2_DEBUG is on. */ #define JBD2_EXPENSIVE_CHECKING -extern u8 jbd2_journal_enable_debug; +extern ushort jbd2_journal_enable_debug; #define jbd_debug(n, f, a...) \ do { \ -- cgit v1.2.3 From 06b0c886214a223dde7b21cbfc3008fd20a8ce16 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 18 Feb 2013 00:26:51 -0500 Subject: ext4: refine extent status tree This commit refines the extent status tree code. 1) A prefix 'es_' is added to to the extent status tree structure members. 2) Refactored es_remove_extent() so that __es_remove_extent() can be used by es_insert_extent() to remove the old extent entry(-ies) before inserting a new one. 3) Rename extent_status_end() to ext4_es_end() 4) ext4_es_can_be_merged() is define to check whether two extents can be merged or not. 5) Update and clarified comments. Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara --- include/trace/events/ext4.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6080ea1380b8..52c923851959 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2093,75 +2093,75 @@ TRACE_EVENT(ext4_ext_remove_space_done, ); TRACE_EVENT(ext4_es_insert_extent, - TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t len), + TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), - TP_ARGS(inode, start, len), + TP_ARGS(inode, lblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( loff_t, start ) + __field( loff_t, lblk ) __field( loff_t, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->start = start; + __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu es [%lld/%lld)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->start, __entry->len) + __entry->lblk, __entry->len) ); TRACE_EVENT(ext4_es_remove_extent, - TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t len), + TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), - TP_ARGS(inode, start, len), + TP_ARGS(inode, lblk, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( loff_t, start ) + __field( loff_t, lblk ) __field( loff_t, len ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->start = start; + __entry->lblk = lblk; __entry->len = len; ), TP_printk("dev %d,%d ino %lu es [%lld/%lld)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->start, __entry->len) + __entry->lblk, __entry->len) ); TRACE_EVENT(ext4_es_find_extent_enter, - TP_PROTO(struct inode *inode, ext4_lblk_t start), + TP_PROTO(struct inode *inode, ext4_lblk_t lblk), - TP_ARGS(inode, start), + TP_ARGS(inode, lblk), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( ext4_lblk_t, start ) + __field( ext4_lblk_t, lblk ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->start = start; + __entry->lblk = lblk; ), - TP_printk("dev %d,%d ino %lu start %u", + TP_printk("dev %d,%d ino %lu lblk %u", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->start) + (unsigned long) __entry->ino, __entry->lblk) ); TRACE_EVENT(ext4_es_find_extent_exit, @@ -2173,7 +2173,7 @@ TRACE_EVENT(ext4_es_find_extent_exit, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( ext4_lblk_t, start ) + __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) __field( ext4_lblk_t, ret ) ), @@ -2181,15 +2181,15 @@ TRACE_EVENT(ext4_es_find_extent_exit, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->start = es->start; - __entry->len = es->len; + __entry->lblk = es->es_lblk; + __entry->len = es->es_len; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu es [%u/%u) ret %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->start, __entry->len, __entry->ret) + __entry->lblk, __entry->len, __entry->ret) ); #endif /* _TRACE_EXT4_H */ -- cgit v1.2.3 From fdc0212e86ca15c5cfed77088af7cc5eb79ccbc7 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 18 Feb 2013 00:26:51 -0500 Subject: ext4: add physical block and status member into extent status tree This commit adds two members in extent_status structure to let it record physical block and extent status. Here es_pblk is used to record both of them because physical block only has 48 bits. So extent status could be stashed into it so that we can save some memory. Now written, unwritten, delayed and hole are defined as status. Due to new member is added into extent status tree, all interfaces need to be adjusted. Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara --- include/trace/events/ext4.h | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 52c923851959..0ee507ff216d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2093,28 +2093,33 @@ TRACE_EVENT(ext4_ext_remove_space_done, ); TRACE_EVENT(ext4_es_insert_extent, - TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), + TP_PROTO(struct inode *inode, struct extent_status *es), - TP_ARGS(inode, lblk, len), + TP_ARGS(inode, es), TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, lblk ) - __field( loff_t, len ) + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( ext4_lblk_t, lblk ) + __field( ext4_lblk_t, len ) + __field( ext4_fsblk_t, pblk ) + __field( unsigned long long, status ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->lblk = lblk; - __entry->len = len; + __entry->lblk = es->es_lblk; + __entry->len = es->es_len; + __entry->pblk = ext4_es_pblock(es); + __entry->status = ext4_es_status(es); ), - TP_printk("dev %d,%d ino %lu es [%lld/%lld)", + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->lblk, __entry->len) + __entry->lblk, __entry->len, + __entry->pblk, __entry->status) ); TRACE_EVENT(ext4_es_remove_extent, @@ -2175,6 +2180,8 @@ TRACE_EVENT(ext4_es_find_extent_exit, __field( ino_t, ino ) __field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, len ) + __field( ext4_fsblk_t, pblk ) + __field( unsigned long long, status ) __field( ext4_lblk_t, ret ) ), @@ -2183,13 +2190,16 @@ TRACE_EVENT(ext4_es_find_extent_exit, __entry->ino = inode->i_ino; __entry->lblk = es->es_lblk; __entry->len = es->es_len; + __entry->pblk = ext4_es_pblock(es); + __entry->status = ext4_es_status(es); __entry->ret = ret; ), - TP_printk("dev %d,%d ino %lu es [%u/%u) ret %u", + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx ret %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->lblk, __entry->len, __entry->ret) + __entry->lblk, __entry->len, + __entry->pblk, __entry->status, __entry->ret) ); #endif /* _TRACE_EXT4_H */ -- cgit v1.2.3 From be401363ac5ec652c706263a59b0bd0acc3612e8 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 18 Feb 2013 00:27:26 -0500 Subject: ext4: rename and improbe ext4_es_find_extent() This commit renames ext4_es_find_extent with ext4_es_find_delayed_extent and improve this function. First, we split input and output parameter. Second, this function never return the first block of the next delayed extent after 'es'. Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Cc: Jan kara --- include/trace/events/ext4.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 0ee507ff216d..c121cdf55ab3 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2147,7 +2147,7 @@ TRACE_EVENT(ext4_es_remove_extent, __entry->lblk, __entry->len) ); -TRACE_EVENT(ext4_es_find_extent_enter, +TRACE_EVENT(ext4_es_find_delayed_extent_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk), TP_ARGS(inode, lblk), @@ -2169,11 +2169,10 @@ TRACE_EVENT(ext4_es_find_extent_enter, (unsigned long) __entry->ino, __entry->lblk) ); -TRACE_EVENT(ext4_es_find_extent_exit, - TP_PROTO(struct inode *inode, struct extent_status *es, - ext4_lblk_t ret), +TRACE_EVENT(ext4_es_find_delayed_extent_exit, + TP_PROTO(struct inode *inode, struct extent_status *es), - TP_ARGS(inode, es, ret), + TP_ARGS(inode, es), TP_STRUCT__entry( __field( dev_t, dev ) @@ -2182,7 +2181,6 @@ TRACE_EVENT(ext4_es_find_extent_exit, __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( unsigned long long, status ) - __field( ext4_lblk_t, ret ) ), TP_fast_assign( @@ -2192,14 +2190,13 @@ TRACE_EVENT(ext4_es_find_extent_exit, __entry->len = es->es_len; __entry->pblk = ext4_es_pblock(es); __entry->status = ext4_es_status(es); - __entry->ret = ret; ), - TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx ret %u", + TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, - __entry->pblk, __entry->status, __entry->ret) + __entry->pblk, __entry->status) ); #endif /* _TRACE_EXT4_H */ -- cgit v1.2.3 From d100eef2440fea13e4f09e88b1c8bcbca64beb9f Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 18 Feb 2013 00:29:59 -0500 Subject: ext4: lookup block mapping in extent status tree After tracking all extent status, we already have a extent cache in memory. Every time we want to lookup a block mapping, we can first try to lookup it in extent status tree to avoid a potential disk I/O. A new function called ext4_es_lookup_extent is defined to finish this work. When we try to lookup a block mapping, we always call ext4_map_blocks and/or ext4_da_map_blocks. So in these functions we first try to lookup a block mapping in extent status tree. A new flag EXT4_GET_BLOCKS_NO_PUT_HOLE is used in ext4_da_map_blocks in order not to put a hole into extent status tree because this hole will be converted to delayed extent in the tree immediately. Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Cc: Jan kara --- include/trace/events/ext4.h | 56 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index c121cdf55ab3..1e590b68cec4 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2199,6 +2199,62 @@ TRACE_EVENT(ext4_es_find_delayed_extent_exit, __entry->pblk, __entry->status) ); +TRACE_EVENT(ext4_es_lookup_extent_enter, + TP_PROTO(struct inode *inode, ext4_lblk_t lblk), + + TP_ARGS(inode, lblk), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( ext4_lblk_t, lblk ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->lblk = lblk; + ), + + TP_printk("dev %d,%d ino %lu lblk %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, __entry->lblk) +); + +TRACE_EVENT(ext4_es_lookup_extent_exit, + TP_PROTO(struct inode *inode, struct extent_status *es, + int found), + + TP_ARGS(inode, es, found), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( ext4_lblk_t, lblk ) + __field( ext4_lblk_t, len ) + __field( ext4_fsblk_t, pblk ) + __field( unsigned long long, status ) + __field( int, found ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->lblk = es->es_lblk; + __entry->len = es->es_len; + __entry->pblk = ext4_es_pblock(es); + __entry->status = ext4_es_status(es); + __entry->found = found; + ), + + TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, __entry->found, + __entry->lblk, __entry->len, + __entry->found ? __entry->pblk : 0, + __entry->found ? __entry->status : 0) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 74cd15cd02708c7188581f279f33a98b2ae8d322 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 18 Feb 2013 00:32:55 -0500 Subject: ext4: reclaim extents from extent status tree Although extent status is loaded on-demand, we also need to reclaim extent from the tree when we are under a heavy memory pressure because in some cases fragmented extent tree causes status tree costs too much memory. Here we maintain a lru list in super_block. When the extent status of an inode is accessed and changed, this inode will be move to the tail of the list. The inode will be dropped from this list when it is cleared. In the inode, a counter is added to count the number of cached objects in extent status tree. Here only written/unwritten/hole extent is counted because delayed extent doesn't be reclaimed due to fiemap, bigalloc and seek_data/hole need it. The counter will be increased as a new extent is allocated, and it will be decreased as a extent is freed. In this commit we use normal shrinker framework to reclaim memory from the status tree. ext4_es_reclaim_extents_count() traverses the lru list to count the number of reclaimable extents. ext4_es_shrink() tries to reclaim written/unwritten/hole extents from extent status tree. The inode that has been shrunk is moved to the tail of lru list. Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Cc: Jan kara --- include/trace/events/ext4.h | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 1e590b68cec4..c0457c0d1a68 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2255,6 +2255,66 @@ TRACE_EVENT(ext4_es_lookup_extent_exit, __entry->found ? __entry->status : 0) ); +TRACE_EVENT(ext4_es_reclaim_extents_count, + TP_PROTO(struct super_block *sb, int nr_cached), + + TP_ARGS(sb, nr_cached), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, nr_cached ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->nr_cached = nr_cached; + ), + + TP_printk("dev %d,%d cached objects nr %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nr_cached) +); + +TRACE_EVENT(ext4_es_shrink_enter, + TP_PROTO(struct super_block *sb, int nr_to_scan), + + TP_ARGS(sb, nr_to_scan), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, nr_to_scan ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->nr_to_scan = nr_to_scan; + ), + + TP_printk("dev %d,%d nr to scan %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nr_to_scan) +); + +TRACE_EVENT(ext4_es_shrink_exit, + TP_PROTO(struct super_block *sb, int shrunk_nr), + + TP_ARGS(sb, shrunk_nr), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, shrunk_nr ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->shrunk_nr = shrunk_nr; + ), + + TP_printk("dev %d,%d nr to scan %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->shrunk_nr) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.2.3