summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChao Yu <yuchao0@huawei.com>2020-08-04 21:14:49 +0800
committerJaegeuk Kim <jaegeuk@kernel.org>2020-09-11 11:11:15 -0700
commit093749e296e29a4b0162eb925a6701a01e8c9a98 (patch)
treea2bea08c28618555ef57fb6974c1121e20cd72ba
parent568d2a1e37b2fd5a16f7d5c19d9ef0267c47e9af (diff)
f2fs: support age threshold based garbage collection
There are several issues in current background GC algorithm: - valid blocks is one of key factors during cost overhead calculation, so if segment has less valid block, however even its age is young or it locates hot segment, CB algorithm will still choose the segment as victim, it's not appropriate. - GCed data/node will go to existing logs, no matter in-there datas' update frequency is the same or not, it may mix hot and cold data again. - GC alloctor mainly use LFS type segment, it will cost free segment more quickly. This patch introduces a new algorithm named age threshold based garbage collection to solve above issues, there are three steps mainly: 1. select a source victim: - set an age threshold, and select candidates beased threshold: e.g. 0 means youngest, 100 means oldest, if we set age threshold to 80 then select dirty segments which has age in range of [80, 100] as candiddates; - set candidate_ratio threshold, and select candidates based the ratio, so that we can shrink candidates to those oldest segments; - select target segment with fewest valid blocks in order to migrate blocks with minimum cost; 2. select a target victim: - select candidates beased age threshold; - set candidate_radius threshold, search candidates whose age is around source victims, searching radius should less than the radius threshold. - select target segment with most valid blocks in order to avoid migrating current target segment. 3. merge valid blocks from source victim into target victim with SSR alloctor. Test steps: - create 160 dirty segments: * half of them have 128 valid blocks per segment * left of them have 384 valid blocks per segment - run background GC Benefit: GC count and block movement count both decrease obviously: - Before: - Valid: 86 - Dirty: 1 - Prefree: 11 - Free: 6001 (6001) GC calls: 162 (BG: 220) - data segments : 160 (160) - node segments : 2 (2) Try to move 41454 blocks (BG: 41454) - data blocks : 40960 (40960) - node blocks : 494 (494) IPU: 0 blocks SSR: 0 blocks in 0 segments LFS: 41364 blocks in 81 segments - After: - Valid: 87 - Dirty: 0 - Prefree: 4 - Free: 6008 (6008) GC calls: 75 (BG: 76) - data segments : 74 (74) - node segments : 1 (1) Try to move 12813 blocks (BG: 12813) - data blocks : 12544 (12544) - node blocks : 269 (269) IPU: 0 blocks SSR: 12032 blocks in 77 segments LFS: 855 blocks in 2 segments Signed-off-by: Chao Yu <yuchao0@huawei.com> [Jaegeuk Kim: fix a bug along with pinfile in-mem segment & clean up] Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs3
-rw-r--r--Documentation/filesystems/f2fs.rst2
-rw-r--r--fs/f2fs/checkpoint.c4
-rw-r--r--fs/f2fs/data.c2
-rw-r--r--fs/f2fs/debug.c4
-rw-r--r--fs/f2fs/f2fs.h29
-rw-r--r--fs/f2fs/gc.c380
-rw-r--r--fs/f2fs/gc.h25
-rw-r--r--fs/f2fs/segment.c177
-rw-r--r--fs/f2fs/segment.h25
-rw-r--r--fs/f2fs/super.c26
-rw-r--r--fs/f2fs/sysfs.c11
-rw-r--r--include/trace/events/f2fs.h8
13 files changed, 632 insertions, 64 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 7f730c4c8df2..834d0becae6d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -22,7 +22,8 @@ Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
Description: Controls the victim selection policy for garbage collection.
Setting gc_idle = 0(default) will disable this option. Setting
gc_idle = 1 will select the Cost Benefit approach & setting
- gc_idle = 2 will select the greedy approach.
+ gc_idle = 2 will select the greedy approach & setting
+ gc_idle = 3 will select the age-threshold based approach.
What: /sys/fs/f2fs/<disk>/reclaim_segments
Date: October 2013
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 2b3aef2f5fa1..a2425c999586 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -266,6 +266,8 @@ inlinecrypt When possible, encrypt/decrypt the contents of encrypted
inline encryption hardware. The on-disk format is
unaffected. For more details, see
Documentation/block/inline-encryption.rst.
+atgc Enable age-threshold garbage collection, it provides high
+ effectiveness and efficiency on background GC.
======================== ============================================================
Debugfs Entries
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 9e30ff6414b8..6059ce3758d8 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1620,7 +1620,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_sit_entries(sbi, cpc);
/* save inmem log status */
- f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+ f2fs_save_inmem_curseg(sbi);
err = do_checkpoint(sbi, cpc);
if (err)
@@ -1628,7 +1628,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
f2fs_clear_prefree_segments(sbi, cpc);
- f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+ f2fs_restore_inmem_curseg(sbi);
stop:
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 011aea665ae4..6a20f2e664c4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1416,7 +1416,7 @@ alloc:
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
old_blkaddr = dn->data_blkaddr;
f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
- &sum, seg_type, NULL, false);
+ &sum, seg_type, NULL);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 41a91aa8c262..cb679561f44d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -397,6 +397,10 @@ static int stat_show(struct seq_file *s, void *v)
si->curseg[CURSEG_COLD_DATA_PINNED],
si->cursec[CURSEG_COLD_DATA_PINNED],
si->curzone[CURSEG_COLD_DATA_PINNED]);
+ seq_printf(s, " - ATGC data: %8d %8d %8d\n",
+ si->curseg[CURSEG_ALL_DATA_ATGC],
+ si->cursec[CURSEG_ALL_DATA_ATGC],
+ si->curzone[CURSEG_ALL_DATA_ATGC]);
seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
si->main_area_segs - si->dirty_count -
si->prefree_count - si->free_segs,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7446ca639140..eedfea9503f8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -98,6 +98,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
#define F2FS_MOUNT_NORECOVERY 0x04000000
+#define F2FS_MOUNT_ATGC 0x08000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -978,7 +979,7 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
*/
#define NR_CURSEG_DATA_TYPE (3)
#define NR_CURSEG_NODE_TYPE (3)
-#define NR_CURSEG_INMEM_TYPE (1)
+#define NR_CURSEG_INMEM_TYPE (2)
#define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
#define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
@@ -992,6 +993,7 @@ enum {
NR_PERSISTENT_LOG, /* number of persistent log */
CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
/* pinned file that needs consecutive block address */
+ CURSEG_ALL_DATA_ATGC, /* SSR alloctor in hot/warm/cold data area */
NO_CHECK_TYPE, /* number of persistent & inmem log */
};
@@ -1238,6 +1240,18 @@ struct inode_management {
unsigned long ino_num; /* number of entries */
};
+/* for GC_AT */
+struct atgc_management {
+ bool atgc_enabled; /* ATGC is enabled or not */
+ struct rb_root_cached root; /* root of victim rb-tree */
+ struct list_head victim_list; /* linked with all victim entries */
+ unsigned int victim_count; /* victim count in rb-tree */
+ unsigned int candidate_ratio; /* candidate ratio */
+ unsigned int max_candidate_count; /* max candidate count */
+ unsigned int age_weight; /* age weight, vblock_weight = 100 - age_weight */
+ unsigned long long age_threshold; /* age threshold */
+};
+
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
@@ -1270,6 +1284,7 @@ enum {
GC_NORMAL,
GC_IDLE_CB,
GC_IDLE_GREEDY,
+ GC_IDLE_AT,
GC_URGENT_HIGH,
GC_URGENT_LOW,
};
@@ -1521,6 +1536,7 @@ struct f2fs_sb_info {
* race between GC and GC or CP
*/
struct f2fs_gc_kthread *gc_thread; /* GC thread */
+ struct atgc_management am; /* atgc management */
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
@@ -3338,8 +3354,11 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
-void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
-void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
+ unsigned int *newseg, bool new_sec, int dir);
void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
@@ -3367,7 +3386,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
- struct f2fs_io_info *fio, bool from_gc);
+ struct f2fs_io_info *fio);
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered, bool locked);
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -3506,6 +3525,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
+int __init f2fs_create_garbage_collection_cache(void);
+void f2fs_destroy_garbage_collection_cache(void);
/*
* recovery.c
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 697908c333e2..84b9dac942e3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -21,6 +21,8 @@
#include "gc.h"
#include <trace/events/f2fs.h>
+static struct kmem_cache *victim_entry_slab;
+
static unsigned int count_bits(const unsigned long *addr,
unsigned int offset, unsigned int len);
@@ -169,7 +171,16 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
{
- int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+ int gc_mode;
+
+ if (gc_type == BG_GC) {
+ if (sbi->am.atgc_enabled)
+ gc_mode = GC_AT;
+ else
+ gc_mode = GC_CB;
+ } else {
+ gc_mode = GC_GREEDY;
+ }
switch (sbi->gc_mode) {
case GC_IDLE_CB:
@@ -179,7 +190,11 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
case GC_URGENT_HIGH:
gc_mode = GC_GREEDY;
break;
+ case GC_IDLE_AT:
+ gc_mode = GC_AT;
+ break;
}
+
return gc_mode;
}
@@ -193,6 +208,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->dirty_bitmap = dirty_i->dirty_segmap[type];
p->max_search = dirty_i->nr_dirty[type];
p->ofs_unit = 1;
+ } else if (p->alloc_mode == AT_SSR) {
+ p->gc_mode = GC_GREEDY;
+ p->dirty_bitmap = dirty_i->dirty_segmap[type];
+ p->max_search = dirty_i->nr_dirty[type];
+ p->ofs_unit = 1;
} else {
p->gc_mode = select_gc_type(sbi, gc_type);
p->ofs_unit = sbi->segs_per_sec;
@@ -212,6 +232,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
*/
if (gc_type != FG_GC &&
(sbi->gc_mode != GC_URGENT_HIGH) &&
+ (p->gc_mode != GC_AT && p->alloc_mode != AT_SSR) &&
p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
@@ -229,10 +250,16 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
return sbi->blocks_per_seg;
+ else if (p->alloc_mode == AT_SSR)
+ return UINT_MAX;
+
+ /* LFS */
if (p->gc_mode == GC_GREEDY)
return 2 * sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
+ else if (p->gc_mode == GC_AT)
+ return UINT_MAX;
else /* No other gc_mode */
return 0;
}
@@ -298,8 +325,11 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
return get_valid_blocks(sbi, segno, true);
- else
+ else if (p->gc_mode == GC_CB)
return get_cb_cost(sbi, segno);
+
+ f2fs_bug_on(sbi, 1);
+ return 0;
}
static unsigned int count_bits(const unsigned long *addr,
@@ -314,6 +344,273 @@ static unsigned int count_bits(const unsigned long *addr,
return sum;
}
+static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno,
+ struct rb_node *parent, struct rb_node **p,
+ bool left_most)
+{
+ struct atgc_management *am = &sbi->am;
+ struct victim_entry *ve;
+
+ ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS);
+
+ ve->mtime = mtime;
+ ve->segno = segno;
+
+ rb_link_node(&ve->rb_node, parent, p);
+ rb_insert_color_cached(&ve->rb_node, &am->root, left_most);
+
+ list_add_tail(&ve->list, &am->victim_list);
+
+ am->victim_count++;
+
+ return ve;
+}
+
+static void insert_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ bool left_most = true;
+
+ p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most);
+ attach_victim_entry(sbi, mtime, segno, parent, p, left_most);
+}
+
+static void add_victim_entry(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p, unsigned int segno)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
+ unsigned long long mtime = 0;
+ unsigned int i;
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (p->gc_mode == GC_AT &&
+ get_valid_blocks(sbi, segno, true) == 0)
+ return;
+
+ if (p->alloc_mode == AT_SSR &&
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks == 0)
+ return;
+ }
+
+ for (i = 0; i < sbi->segs_per_sec; i++)
+ mtime += get_seg_entry(sbi, start + i)->mtime;
+ mtime = div_u64(mtime, sbi->segs_per_sec);
+
+ /* Handle if the system time has changed by the user */
+ if (mtime < sit_i->min_mtime)
+ sit_i->min_mtime = mtime;
+ if (mtime > sit_i->max_mtime)
+ sit_i->max_mtime = mtime;
+ if (mtime < sit_i->dirty_min_mtime)
+ sit_i->dirty_min_mtime = mtime;
+ if (mtime > sit_i->dirty_max_mtime)
+ sit_i->dirty_max_mtime = mtime;
+
+ /* don't choose young section as candidate */
+ if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
+ return;
+
+ insert_victim_entry(sbi, mtime, segno);
+}
+
+static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *parent = NULL;
+ bool left_most;
+
+ f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most);
+
+ return parent;
+}
+
+static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct atgc_management *am = &sbi->am;
+ struct rb_root_cached *root = &am->root;
+ struct rb_node *node;
+ struct rb_entry *re;
+ struct victim_entry *ve;
+ unsigned long long total_time;
+ unsigned long long age, u, accu;
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
+ unsigned int sec_blocks = BLKS_PER_SEC(sbi);
+ unsigned int vblocks;
+ unsigned int dirty_threshold = max(am->max_candidate_count,
+ am->candidate_ratio *
+ am->victim_count / 100);
+ unsigned int age_weight = am->age_weight;
+ unsigned int cost;
+ unsigned int iter = 0;
+
+ if (max_mtime < min_mtime)
+ return;
+
+ max_mtime += 1;
+ total_time = max_mtime - min_mtime;
+
+ accu = div64_u64(ULLONG_MAX, total_time);
+ accu = min_t(unsigned long long, div_u64(accu, 100),
+ DEFAULT_ACCURACY_CLASS);
+
+ node = rb_first_cached(root);
+next:
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
+ if (!re)
+ return;
+
+ ve = (struct victim_entry *)re;
+
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+ goto skip;
+
+ /* age = 10000 * x% * 60 */
+ age = div64_u64(accu * (max_mtime - ve->mtime), total_time) *
+ age_weight;
+
+ vblocks = get_valid_blocks(sbi, ve->segno, true);
+ f2fs_bug_on(sbi, !vblocks || vblocks == sec_blocks);
+
+ /* u = 10000 * x% * 40 */
+ u = div64_u64(accu * (sec_blocks - vblocks), sec_blocks) *
+ (100 - age_weight);
+
+ f2fs_bug_on(sbi, age + u >= UINT_MAX);
+
+ cost = UINT_MAX - (age + u);
+ iter++;
+
+ if (cost < p->min_cost ||
+ (cost == p->min_cost && age > p->oldest_age)) {
+ p->min_cost = cost;
+ p->oldest_age = age;
+ p->min_segno = ve->segno;
+ }
+skip:
+ if (iter < dirty_threshold) {
+ node = rb_next(node);
+ goto next;
+ }
+}
+
+/*
+ * select candidates around source section in range of
+ * [target - dirty_threshold, target + dirty_threshold]
+ */
+static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *node;
+ struct rb_entry *re;
+ struct victim_entry *ve;
+ unsigned long long age;
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
+ unsigned int seg_blocks = sbi->blocks_per_seg;
+ unsigned int vblocks;
+ unsigned int dirty_threshold = max(am->max_candidate_count,
+ am->candidate_ratio *
+ am->victim_count / 100);
+ unsigned int cost;
+ unsigned int iter = 0;
+ int stage = 0;
+
+ if (max_mtime < min_mtime)
+ return;
+ max_mtime += 1;
+next_stage:
+ node = lookup_central_victim(sbi, p);
+next_node:
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
+ if (!re) {
+ if (stage == 0)
+ goto skip_stage;
+ return;
+ }
+
+ ve = (struct victim_entry *)re;
+
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+ goto skip_node;
+
+ age = max_mtime - ve->mtime;
+
+ vblocks = get_seg_entry(sbi, ve->segno)->ckpt_valid_blocks;
+ f2fs_bug_on(sbi, !vblocks);
+
+ /* rare case */
+ if (vblocks == seg_blocks)
+ goto skip_node;
+
+ iter++;
+
+ age = max_mtime - abs(p->age - age);
+ cost = UINT_MAX - vblocks;
+
+ if (cost < p->min_cost ||
+ (cost == p->min_cost && age > p->oldest_age)) {
+ p->min_cost = cost;
+ p->oldest_age = age;
+ p->min_segno = ve->segno;
+ }
+skip_node:
+ if (iter < dirty_threshold) {
+ if (stage == 0)
+ node = rb_prev(node);
+ else if (stage == 1)
+ node = rb_next(node);
+ goto next_node;
+ }
+skip_stage:
+ if (stage < 1) {
+ stage++;
+ iter = 0;
+ goto next_stage;
+ }
+}
+static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &sbi->am.root, true));
+
+ if (p->gc_mode == GC_AT)
+ atgc_lookup_victim(sbi, p);
+ else if (p->alloc_mode == AT_SSR)
+ atssr_lookup_victim(sbi, p);
+ else
+ f2fs_bug_on(sbi, 1);
+}
+
+static void release_victim_entry(struct f2fs_sb_info *sbi)
+{
+ struct atgc_management *am = &sbi->am;
+ struct victim_entry *ve, *tmp;
+
+ list_for_each_entry_safe(ve, tmp, &am->victim_list, list) {
+ list_del(&ve->list);
+ kmem_cache_free(victim_entry_slab, ve);
+ am->victim_count--;
+ }
+
+ am->root = RB_ROOT_CACHED;
+
+ f2fs_bug_on(sbi, am->victim_count);
+ f2fs_bug_on(sbi, !list_empty(&am->victim_list));
+}
+
/*
* This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
@@ -323,25 +620,37 @@ static unsigned int count_bits(const unsigned long *addr,
* which has minimum valid blocks and removes it from dirty seglist.
*/
static int get_victim_by_default(struct f2fs_sb_info *sbi,
- unsigned int *result, int gc_type, int type, char alloc_mode)
+ unsigned int *result, int gc_type, int type,
+ char alloc_mode, unsigned long long age)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
unsigned int last_segment;
- unsigned int nsearched = 0;
+ unsigned int nsearched;
+ bool is_atgc;
int ret = 0;
mutex_lock(&dirty_i->seglist_lock);
last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
p.alloc_mode = alloc_mode;
- select_policy(sbi, gc_type, type, &p);
+ p.age = age;
+ p.age_threshold = sbi->am.age_threshold;
+retry:
+ select_policy(sbi, gc_type, type, &p);
p.min_segno = NULL_SEGNO;
+ p.oldest_age = 0;
p.min_cost = get_max_cost(sbi, &p);
+ is_atgc = (p.gc_mode == GC_AT || p.alloc_mode == AT_SSR);
+ nsearched = 0;
+
+ if (is_atgc)
+ SIT_I(sbi)->dirty_min_mtime = ULLONG_MAX;
+
if (*result != NULL_SEGNO) {
if (!get_valid_blocks(sbi, *result, false)) {
ret = -ENODATA;
@@ -422,11 +731,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
/* Don't touch checkpointed data */
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
get_ckpt_valid_blocks(sbi, segno) &&
- p.alloc_mode != SSR))
+ p.alloc_mode == LFS))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
+ if (is_atgc) {
+ add_victim_entry(sbi, &p, segno);
+ goto next;
+ }
+
cost = get_gc_cost(sbi, segno, &p);
if (p.min_cost > cost) {
@@ -445,6 +759,19 @@ next:
break;
}
}
+
+ /* get victim for GC_AT/AT_SSR */
+ if (is_atgc) {
+ lookup_victim_by_age(sbi, &p);
+ release_victim_entry(sbi);
+ }
+
+ if (is_atgc && p.min_segno == NULL_SEGNO &&
+ sm->elapsed_time < p.age_threshold) {
+ p.age_threshold = 0;
+ goto retry;
+ }
+
if (p.min_segno != NULL_SEGNO) {
got_it:
*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
@@ -793,6 +1120,8 @@ static int move_data_block(struct inode *inode, block_t bidx,
block_t newaddr;
int err = 0;
bool lfs_mode = f2fs_lfs_mode(fio.sbi);
+ int type = fio.sbi->am.atgc_enabled ?
+ CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
/* do not read out */
page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
@@ -879,7 +1208,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
}
f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
- &sum, CURSEG_COLD_DATA, NULL, true);
+ &sum, type, NULL);
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -1185,7 +1514,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
down_write(&sit_i->sentry_lock);
ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
- NO_CHECK_TYPE, LFS);
+ NO_CHECK_TYPE, LFS, 0);
up_write(&sit_i->sentry_lock);
return ret;
}
@@ -1216,6 +1545,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
end_segno -= sbi->segs_per_sec -
f2fs_usable_segs_in_sec(sbi, segno);
+ sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
+
/* readahead multi ssa blocks those have contiguous address */
if (__is_large_section(sbi))
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
@@ -1426,6 +1757,37 @@ stop:
return ret;
}
+int __init f2fs_create_garbage_collection_cache(void)
+{
+ victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry",
+ sizeof(struct victim_entry));
+ if (!victim_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_garbage_collection_cache(void)
+{
+ kmem_cache_destroy(victim_entry_slab);
+}
+
+static void init_atgc_management(struct f2fs_sb_info *sbi)
+{
+ struct atgc_management *am = &sbi->am;
+
+ if (test_opt(sbi, ATGC) &&
+ SIT_I(sbi)->elapsed_time >= DEF_GC_THREAD_AGE_THRESHOLD)
+ am->atgc_enabled = true;
+
+ am->root = RB_ROOT_CACHED;
+ INIT_LIST_HEAD(&am->victim_list);
+ am->victim_count = 0;
+
+ am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO;
+ am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT;
+ am->age_weight = DEF_GC_THREAD_AGE_WEIGHT;
+}
+
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
{
DIRTY_I(sbi)->v_ops = &default_v_ops;
@@ -1436,6 +1798,8 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
+
+ init_atgc_management(sbi);
}
static int free_segment_range(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index ee5d7f30a1f8..0c8dae12dc51 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -14,6 +14,14 @@
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
+
+/* choose candidates from sections which has age of more than 7 days */
+#define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7)
+#define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */
+#define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */
+#define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */
+#define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */
+
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
@@ -41,6 +49,23 @@ struct gc_inode_list {
struct radix_tree_root iroot;
};
+struct victim_info {
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* section No. */
+};
+
+struct victim_entry {
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ union {
+ struct {
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* segment No. */
+ };
+ struct victim_info vi; /* victim info */
+ };
+ struct list_head list;
+};
+
/*
* inline functions
*/
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8aa36aa25177..6335523090e9 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2424,9 +2424,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
f2fs_put_page(page, 1);
}
-static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
+static int is_next_segment_free(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg, int type)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno = curseg->segno + 1;
struct free_segmap_info *free_i = FREE_I(sbi);
@@ -2530,6 +2530,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
struct summary_footer *sum_footer;
+ unsigned short seg_type = curseg->seg_type;
curseg->inited = true;
curseg->segno = curseg->next_segno;
@@ -2539,16 +2540,22 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
sum_footer = &(curseg->sum_blk->footer);
memset(sum_footer, 0, sizeof(struct summary_footer));
- if (IS_DATASEG(curseg->seg_type))
+
+ sanity_check_seg_type(sbi, seg_type);
+
+ if (IS_DATASEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
- if (IS_NODESEG(curseg->seg_type))
+ if (IS_NODESEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
- __set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
+ __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
}
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
+
+ sanity_check_seg_type(sbi, seg_type);
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
@@ -2562,8 +2569,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
return 0;
if (test_opt(sbi, NOHEAP) &&
- (curseg->seg_type == CURSEG_HOT_DATA ||
- IS_NODESEG(curseg->seg_type)))
+ (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
return 0;
if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2639,7 +2645,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
*/
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2647,8 +2653,10 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
struct f2fs_summary_block *sum_node;
struct page *sum_page;
- write_sum_page(sbi, curseg->sum_blk,
- GET_SUM_BLOCK(sbi, curseg->segno));
+ if (flush)
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+
__set_test_and_inuse(sbi, new_segno);
mutex_lock(&dirty_i->seglist_lock);
@@ -2667,7 +2675,56 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
f2fs_put_page(sum_page, 1);
}
-void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age);
+
+static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+ int target_type, int alloc_mode,
+ unsigned long long age)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ curseg->seg_type = target_type;
+
+ if (get_ssr_segment(sbi, type, alloc_mode, age)) {
+ struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
+
+ curseg->seg_type = se->type;
+ change_curseg(sbi, type, true);
+ } else {
+ /* allocate cold segment by default */
+ curseg->seg_type = CURSEG_COLD_DATA;
+ new_curseg(sbi, type, true);
+ }
+ stat_inc_seg_type(sbi, curseg);
+}
+
+static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+
+ if (!sbi->am.atgc_enabled)
+ return;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+
+ up_write(&SIT_I(sbi)->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
+
+ up_read(&SM_I(sbi)->curseg_lock);
+
+}
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_init_atgc_curseg(sbi);
+}
+
+static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2687,7 +2744,15 @@ out:
mutex_unlock(&curseg->curseg_mutex);
}
-void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2704,23 +2769,35 @@ out:
mutex_unlock(&curseg->curseg_mutex);
}
-static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
unsigned segno = NULL_SEGNO;
+ unsigned short seg_type = curseg->seg_type;
int i, cnt;
bool reversed = false;
+ sanity_check_seg_type(sbi, seg_type);
+
/* f2fs_need_SSR() already forces to do this */
- if (!v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
/* For node segments, let's do SSR more intensively */