summaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-08 12:24:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-08 12:24:17 -0700
commit70ef8f0d37573079e093305214d0cc9eb71100f7 (patch)
treee45ad8b3948b5b015e3bce5bf3e1d944da781779 /fs/f2fs
parent677375cef8cb7763ef620e007873117b9bad72a0 (diff)
parente9cdd307704b5a8f685fa3fff4403691fbf64f97 (diff)
Merge tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, we've focused on enhancing performance with regards to block allocation, GC, and discard/in-place-update IO controls. There are a bunch of clean-ups as well as minor bug fixes. Enhancements: - disable heap-based allocation by default - issue small-sized discard commands by default - change the policy of data hotness for logging - distinguish IOs in terms of size and wbc type - start SSR earlier to avoid foreground GC - enhance data structures managing discard commands - enhance in-place update flow - add some more fault injection routines - secure one more xattr entry Bug fixes: - calculate victim cost for GC correctly - remain correct victim segment number for GC - race condition in nid allocator and initializer - stale pointer produced by atomic_writes - fix missing REQ_SYNC for flush commands - handle missing errors in more corner cases" * tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (111 commits) f2fs: fix a mount fail for wrong next_scan_nid f2fs: enhance scalability of trace macro f2fs: relocate inode_{,un}lock in F2FS_IOC_SETFLAGS f2fs: Make flush bios explicitely sync f2fs: show available_nids in f2fs/status f2fs: flush dirty nats periodically f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard f2fs: allow cpc->reason to indicate more than one reason f2fs: release cp and dnode lock before IPU f2fs: shrink size of struct discard_cmd f2fs: don't hold cmd_lock during waiting discard command f2fs: nullify fio->encrypted_page for each writes f2fs: sanity check segment count f2fs: introduce valid_ipu_blkaddr to clean up f2fs: lookup extent cache first under IPU scenario f2fs: reconstruct code to write a data page f2fs: introduce __wait_discard_cmd f2fs: introduce __issue_discard_cmd f2fs: enable small discard by default f2fs: delay awaking discard thread ...
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/checkpoint.c77
-rw-r--r--fs/f2fs/data.c200
-rw-r--r--fs/f2fs/debug.c62
-rw-r--r--fs/f2fs/dir.c46
-rw-r--r--fs/f2fs/extent_cache.c326
-rw-r--r--fs/f2fs/f2fs.h320
-rw-r--r--fs/f2fs/file.c173
-rw-r--r--fs/f2fs/gc.c93
-rw-r--r--fs/f2fs/inline.c34
-rw-r--r--fs/f2fs/inode.c23
-rw-r--r--fs/f2fs/namei.c53
-rw-r--r--fs/f2fs/node.c144
-rw-r--r--fs/f2fs/node.h31
-rw-r--r--fs/f2fs/recovery.c8
-rw-r--r--fs/f2fs/segment.c776
-rw-r--r--fs/f2fs/segment.h139
-rw-r--r--fs/f2fs/super.c48
-rw-r--r--fs/f2fs/trace.c4
-rw-r--r--fs/f2fs/xattr.c31
-rw-r--r--fs/f2fs/xattr.h8
20 files changed, 1694 insertions, 902 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0339daf4ca02..ea9c317b5916 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -275,10 +275,11 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
goto skip_write;
- trace_f2fs_writepages(mapping->host, wbc, META);
+ /* if locked failed, cp will flush dirty pages instead */
+ if (!mutex_trylock(&sbi->cp_mutex))
+ goto skip_write;
- /* if mounting is failed, skip writing node pages */
- mutex_lock(&sbi->cp_mutex);
+ trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = sync_meta_pages(sbi, META, wbc->nr_to_write);
mutex_unlock(&sbi->cp_mutex);
@@ -567,7 +568,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
if (ni.blk_addr != NULL_ADDR) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: orphan failed (ino=%x), run fsck to fix.",
+ "%s: orphan failed (ino=%x) by kernel, retry mount.",
__func__, ino);
return -EIO;
}
@@ -677,7 +678,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
- if (crc_offset >= blk_size) {
+ if (crc_offset > (blk_size - sizeof(__le32))) {
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
@@ -816,7 +817,9 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type)
return;
set_inode_flag(inode, flag);
- list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
+ if (!f2fs_is_volatile_file(inode))
+ list_add_tail(&F2FS_I(inode)->dirty_list,
+ &sbi->inode_list[type]);
stat_inc_dirty_inode(sbi, type);
}
@@ -941,6 +944,19 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
return 0;
}
+static void __prepare_cp_block(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ nid_t last_nid = nm_i->next_scan_nid;
+
+ next_free_nid(sbi, &last_nid);
+ ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
+ ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
+ ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
+ ckpt->next_free_nid = cpu_to_le32(last_nid);
+}
+
/*
* Freeze all the FS-operations for checkpoint.
*/
@@ -964,21 +980,26 @@ retry_flush_dents:
err = sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
+ cond_resched();
goto retry_flush_dents;
}
+ /*
+ * POR: we should ensure that there are no dirty node pages
+ * until finishing nat/sit flush. inode->i_blocks can be updated.
+ */
+ down_write(&sbi->node_change);
+
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
+ up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
goto out;
+ cond_resched();
goto retry_flush_dents;
}
- /*
- * POR: we should ensure that there are no dirty node pages
- * until finishing nat/sit flush.
- */
retry_flush_nodes:
down_write(&sbi->node_write);
@@ -986,11 +1007,20 @@ retry_flush_nodes:
up_write(&sbi->node_write);
err = sync_node_pages(sbi, &wbc);
if (err) {
+ up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
goto out;
}
+ cond_resched();
goto retry_flush_nodes;
}
+
+ /*
+ * sbi->node_change is used only for AIO write_begin path which produces
+ * dirty node blocks and some checkpoint values by block allocation.
+ */
+ __prepare_cp_block(sbi);
+ up_write(&sbi->node_change);
out:
blk_finish_plug(&plug);
return err;
@@ -1024,16 +1054,20 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
spin_lock(&sbi->cp_lock);
- if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
+ if ((cpc->reason & CP_UMOUNT) &&
+ le32_to_cpu(ckpt->cp_pack_total_block_count) >
sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
disable_nat_bits(sbi, false);
- if (cpc->reason == CP_UMOUNT)
+ if (cpc->reason & CP_TRIMMED)
+ __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
+
+ if (cpc->reason & CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
- if (cpc->reason == CP_FASTBOOT)
+ if (cpc->reason & CP_FASTBOOT)
__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
@@ -1057,7 +1091,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
- nid_t last_nid = nm_i->next_scan_nid;
block_t start_blk;
unsigned int data_sum_blocks, orphan_blocks;
__u32 crc32 = 0;
@@ -1074,14 +1107,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return -EIO;
}
- next_free_nid(sbi, &last_nid);
-
/*
* modify checkpoint
* version number is already updated
*/
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
- ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
ckpt->cur_node_segno[i] =
@@ -1100,10 +1130,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
}
- ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
- ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
- ckpt->next_free_nid = cpu_to_le32(last_nid);
-
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
spin_lock(&sbi->cp_lock);
@@ -1143,7 +1169,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* write nat bits */
if (enabled_nat_bits(sbi, cpc)) {
__u64 cp_ver = cur_cp_version(ckpt);
- unsigned int i;
block_t blk;
cp_ver |= ((__u64)crc32 << 32);
@@ -1250,8 +1275,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
- (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
- (cpc->reason == CP_DISCARD && !sbi->discard_blks)))
+ ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
+ ((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
goto out;
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1273,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_merged_bios(sbi);
/* this is the case of multiple fstrims without any changes */
- if (cpc->reason == CP_DISCARD) {
+ if (cpc->reason & CP_DISCARD) {
if (!exist_trim_candidates(sbi, cpc)) {
unblock_operations(sbi);
goto out;
@@ -1311,7 +1336,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
- if (cpc->reason == CP_RECOVERY)
+ if (cpc->reason & CP_RECOVERY)
f2fs_msg(sbi->sb, KERN_NOTICE,
"checkpoint: version = %llx", ckpt_ver);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 1602b4bccae6..7c0f6bdf817d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -309,7 +309,7 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
io->fio.op = REQ_OP_WRITE;
- io->fio.op_flags = REQ_META | REQ_PRIO;
+ io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
if (!test_opt(sbi, NOBARRIER))
io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
}
@@ -341,7 +341,7 @@ void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
/*
* Fill the locked page with data located in the block address.
- * Return unlocked page.
+ * A caller needs to unlock the page on failure.
*/
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
{
@@ -362,6 +362,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
bio_set_op_attrs(bio, fio->op, fio->op_flags);
__submit_bio(fio->sbi, bio, fio->type);
+
+ if (!is_read_io(fio->op))
+ inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
return 0;
}
@@ -787,6 +790,21 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
return err;
}
+static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+{
+ if (flag == F2FS_GET_BLOCK_PRE_AIO) {
+ if (lock)
+ down_read(&sbi->node_change);
+ else
+ up_read(&sbi->node_change);
+ } else {
+ if (lock)
+ f2fs_lock_op(sbi);
+ else
+ f2fs_unlock_op(sbi);
+ }
+}
+
/*
* f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
* f2fs_map_blocks structure.
@@ -829,7 +847,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
next_dnode:
if (create)
- f2fs_lock_op(sbi);
+ __do_map_lock(sbi, flag, true);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -939,7 +957,7 @@ skip:
f2fs_put_dnode(&dn);
if (create) {
- f2fs_unlock_op(sbi);
+ __do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
goto next_dnode;
@@ -948,7 +966,7 @@ sync_out:
f2fs_put_dnode(&dn);
unlock_out:
if (create) {
- f2fs_unlock_op(sbi);
+ __do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
out:
@@ -1151,9 +1169,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
- prefetchw(&page->flags);
if (pages) {
page = list_last_entry(pages, struct page, lru);
+
+ prefetchw(&page->flags);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
page->index,
@@ -1283,17 +1302,83 @@ static int f2fs_read_data_pages(struct file *file,
return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
}
+static int encrypt_one_page(struct f2fs_io_info *fio)
+{
+ struct inode *inode = fio->page->mapping->host;
+ gfp_t gfp_flags = GFP_NOFS;
+
+ if (!f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode))
+ return 0;
+
+ /* wait for GCed encrypted page writeback */
+ f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr);
+
+retry_encrypt:
+ fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
+ PAGE_SIZE, 0, fio->page->index, gfp_flags);
+ if (!IS_ERR(fio->encrypted_page))
+ return 0;
+
+ /* flush pending IOs and wait for a while in the ENOMEM case */
+ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
+ f2fs_flush_merged_bios(fio->sbi);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ gfp_flags |= __GFP_NOFAIL;
+ goto retry_encrypt;
+ }
+ return PTR_ERR(fio->encrypted_page);
+}
+
+static inline bool need_inplace_update(struct f2fs_io_info *fio)
+{
+ struct inode *inode = fio->page->mapping->host;
+
+ if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
+ return false;
+ if (is_cold_data(fio->page))
+ return false;
+ if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
+ return false;
+
+ return need_inplace_update_policy(inode, fio);
+}
+
+static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
+{
+ if (fio->old_blkaddr == NEW_ADDR)
+ return false;
+ if (fio->old_blkaddr == NULL_ADDR)
+ return false;
+ return true;
+}
+
int do_write_data_page(struct f2fs_io_info *fio)
{
struct page *page = fio->page;
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
+ struct extent_info ei = {0,0,0};
+ bool ipu_force = false;
int err = 0;
set_new_dnode(&dn, inode, NULL, NULL, 0);
+ if (need_inplace_update(fio) &&
+ f2fs_lookup_extent_cache(inode, page->index, &ei)) {
+ fio->old_blkaddr = ei.blk + page->index - ei.fofs;
+
+ if (valid_ipu_blkaddr(fio)) {
+ ipu_force = true;
+ fio->need_lock = false;
+ goto got_it;
+ }
+ }
+
+ if (fio->need_lock)
+ f2fs_lock_op(fio->sbi);
+
err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
if (err)
- return err;
+ goto out;
fio->old_blkaddr = dn.data_blkaddr;
@@ -1302,31 +1387,10 @@ int do_write_data_page(struct f2fs_io_info *fio)
ClearPageUptodate(page);
goto out_writepage;
}
-
- if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
- gfp_t gfp_flags = GFP_NOFS;
-
- /* wait for GCed encrypted page writeback */
- f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
- fio->old_blkaddr);
-retry_encrypt:
- fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
- PAGE_SIZE, 0,
- fio->page->index,
- gfp_flags);
- if (IS_ERR(fio->encrypted_page)) {
- err = PTR_ERR(fio->encrypted_page);
- if (err == -ENOMEM) {
- /* flush pending ios and wait for a while */
- f2fs_flush_merged_bios(F2FS_I_SB(inode));
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- gfp_flags |= __GFP_NOFAIL;
- err = 0;
- goto retry_encrypt;
- }
- goto out_writepage;
- }
- }
+got_it:
+ err = encrypt_one_page(fio);
+ if (err)
+ goto out_writepage;
set_page_writeback(page);
@@ -1334,22 +1398,27 @@ retry_encrypt:
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (unlikely(fio->old_blkaddr != NEW_ADDR &&
- !is_cold_data(page) &&
- !IS_ATOMIC_WRITTEN_PAGE(page) &&
- need_inplace_update(inode))) {
- rewrite_data_page(fio);
+ if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
+ f2fs_put_dnode(&dn);
+ if (fio->need_lock)
+ f2fs_unlock_op(fio->sbi);
+ err = rewrite_data_page(fio);
+ trace_f2fs_do_write_data_page(fio->page, IPU);
set_inode_flag(inode, FI_UPDATE_WRITE);
- trace_f2fs_do_write_data_page(page, IPU);
- } else {
- write_data_page(&dn, fio);
- trace_f2fs_do_write_data_page(page, OPU);
- set_inode_flag(inode, FI_APPEND_WRITE);
- if (page->index == 0)
- set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ return err;
}
+
+ /* LFS mode write path */
+ write_data_page(&dn, fio);
+ trace_f2fs_do_write_data_page(page, OPU);
+ set_inode_flag(inode, FI_APPEND_WRITE);
+ if (page->index == 0)
+ set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
out_writepage:
f2fs_put_dnode(&dn);
+out:
+ if (fio->need_lock)
+ f2fs_unlock_op(fio->sbi);
return err;
}
@@ -1370,9 +1439,11 @@ static int __write_data_page(struct page *page, bool *submitted,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
+ .old_blkaddr = NULL_ADDR,
.page = page,
.encrypted_page = NULL,
.submitted = false,
+ .need_lock = true,
};
trace_f2fs_writepage(page, DATA);
@@ -1408,6 +1479,7 @@ write:
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
+ fio.need_lock = false;
err = do_write_data_page(&fio);
goto done;
}
@@ -1416,6 +1488,8 @@ write:
need_balance_fs = true;
else if (has_not_enough_free_secs(sbi, 0, 0))
goto redirty_out;
+ else
+ set_inode_flag(inode, FI_HOT_DATA);
err = -EAGAIN;
if (f2fs_has_inline_data(inode)) {
@@ -1423,12 +1497,12 @@ write:
if (!err)
goto out;
}
- f2fs_lock_op(sbi);
+
if (err == -EAGAIN)
err = do_write_data_page(&fio);
if (F2FS_I(inode)->last_disk_size < psize)
F2FS_I(inode)->last_disk_size = psize;
- f2fs_unlock_op(sbi);
+
done:
if (err && err != -ENOENT)
goto redirty_out;
@@ -1441,12 +1515,14 @@ out:
if (wbc->for_reclaim) {
f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
DATA, WRITE);
+ clear_inode_flag(inode, FI_HOT_DATA);
remove_dirty_inode(inode);
submitted = NULL;
}
unlock_page(page);
- f2fs_balance_fs(sbi, need_balance_fs);
+ if (!S_ISDIR(inode->i_mode))
+ f2fs_balance_fs(sbi, need_balance_fs);
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
@@ -1495,6 +1571,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
pagevec_init(&pvec, 0);
+ if (get_dirty_pages(mapping->host) <=
+ SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
+ set_inode_flag(mapping->host, FI_HOT_DATA);
+ else
+ clear_inode_flag(mapping->host, FI_HOT_DATA);
+
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
@@ -1580,8 +1662,10 @@ continue_unlock:
last_idx = page->index;
}
- if (--wbc->nr_to_write <= 0 &&
- wbc->sync_mode == WB_SYNC_NONE) {
+ /* give a priority to WB_SYNC threads */
+ if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
+ --wbc->nr_to_write <= 0) &&
+ wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -1637,9 +1721,18 @@ static int f2fs_write_data_pages(struct address_space *mapping,
trace_f2fs_writepages(mapping->host, wbc, DATA);
+ /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_inc(&sbi->wb_sync_req);
+ else if (atomic_read(&sbi->wb_sync_req))
+ goto skip_write;
+
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc);
blk_finish_plug(&plug);
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_dec(&sbi->wb_sync_req);
/*
* if some pages were truncated, we cannot guarantee its mapping->host
* to detect pending bios.
@@ -1687,7 +1780,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
- f2fs_lock_op(sbi);
+ __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
locked = true;
}
restart:
@@ -1723,7 +1816,8 @@ restart:
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err || dn.data_blkaddr == NULL_ADDR) {
f2fs_put_dnode(&dn);
- f2fs_lock_op(sbi);
+ __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
+ true);
locked = true;
goto restart;
}
@@ -1737,7 +1831,7 @@ out:
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
- f2fs_unlock_op(sbi);
+ __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
return err;
}
@@ -1951,7 +2045,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
/* This is atomic written page, keep Private */
if (IS_ATOMIC_WRITTEN_PAGE(page))
- return;
+ return drop_inmem_page(inode, page);
set_page_private(page, 0);
ClearPagePrivate(page);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index ee2d0a485fc3..87f449845f5f 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -51,15 +51,26 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = atomic_read(&sbi->aw_cnt);
+ si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
+ si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
- if (SM_I(sbi) && SM_I(sbi)->fcc_info)
- si->nr_flush =
- atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
- if (SM_I(sbi) && SM_I(sbi)->dcc_info)
- si->nr_discard =
- atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
+ if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
+ si->nr_flushed =
+ atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
+ si->nr_flushing =
+ atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+ }
+ if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
+ si->nr_discarded =
+ atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
+ si->nr_discarding =
+ atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
+ si->nr_discard_cmd =
+ atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
+ si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
+ }
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
@@ -86,6 +97,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
+ si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
@@ -99,8 +111,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
- si->cursec[i] = curseg->segno / sbi->segs_per_sec;
- si->curzone[i] = si->cursec[i] / sbi->secs_per_zone;
+ si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
+ si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
}
for (i = 0; i < 2; i++) {
@@ -124,10 +136,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
- blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
+ blks_per_sec = BLKS_PER_SEC(sbi);
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
- vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+ vblocks = get_valid_blocks(sbi, segno, true);
dist = abs(vblocks - hblks_per_sec);
bimodal += dist * dist;
@@ -156,7 +168,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
if (si->base_mem)
goto get_cache;
- si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
+ /* build stat */
+ si->base_mem = sizeof(struct f2fs_stat_info);
+
+ /* build superblock */
+ si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
si->base_mem += 2 * sizeof(struct f2fs_inode_info);
si->base_mem += sizeof(*sbi->ckpt);
si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE;
@@ -208,8 +224,11 @@ get_cache:
/* build merge flush thread */
if (SM_I(sbi)->fcc_info)
si->cache_mem += sizeof(struct flush_cmd_control);
- if (SM_I(sbi)->dcc_info)
+ if (SM_I(sbi)->dcc_info) {
si->cache_mem += sizeof(struct discard_cmd_control);
+ si->cache_mem += sizeof(struct discard_cmd) *
+ atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
+ }
/* free nids */
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
@@ -330,11 +349,16 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
+ seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), "
+ "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
- si->nr_flush, si->nr_discard);
- seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
- si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
+ si->nr_flushing, si->nr_flushed,
+ si->nr_discarding, si->nr_discarded,
+ si->nr_discard_cmd, si->undiscard_blks);
+ seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
+ "volatile IO: %4d (Max. %4d)\n",
+ si->inmem_pages, si->aw_cnt, si->max_aw_cnt,
+ si->vw_cnt, si->max_vw_cnt);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
@@ -347,8 +371,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_imeta);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
- seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n",
- si->free_nids, si->alloc_nids);
+ seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n",
+ si->free_nids, si->avail_nids, si->alloc_nids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
@@ -434,7 +458,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inplace_count, 0);
atomic_set(&sbi->aw_cnt, 0);
+ atomic_set(&sbi->vw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
+ atomic_set(&sbi->max_vw_cnt, 0);
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index e64087052834..94756f55a97e 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -94,7 +94,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
- make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
+ make_dentry_ptr_block(NULL, &d, dentry_blk);
de = find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
@@ -192,13 +192,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_put_page(dentry_page, 0);
}
- /* This is to increase the speed of f2fs_create */
- if (!de && room) {
- F2FS_I(dir)->task = current;
- if (F2FS_I(dir)->chash != namehash) {
- F2FS_I(dir)->chash = namehash;
- F2FS_I(dir)->clevel = level;
- }
+ if (!de && room && F2FS_I(dir)->chash != namehash) {
+ F2FS_I(dir)->chash = namehash;
+ F2FS_I(dir)->clevel = level;
}
return de;
@@ -239,6 +235,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
break;
}
out:
+ /* This is to increase the speed of f2fs_create */
+ if (!de)
+ F2FS_I(dir)->task = current;
return de;
}
@@ -322,24 +321,6 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
set_page_dirty(ipage);
}
-int update_dent_inode(struct inode *inode, struct inode *to,
- const struct qstr *name)
-{
- struct page *page;
-
- if (file_enc_name(to))
- return 0;
-
- page = get_node_page(F2FS_I_SB(inode), inode->i_ino);
- if (IS_ERR(page))
- return PTR_ERR(page);
-
- init_dent_inode(name, page);
- f2fs_put_page(page, 1);
-
- return 0;
-}
-
void do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
@@ -369,7 +350,7 @@ static int make_empty_dir(struct inode *inode,
dentry_blk = kmap_atomic(dentry_page);
- make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
+ make_dentry_ptr_block(NULL, &d, dentry_blk);
do_make_empty_dir(inode, parent, &d);
kunmap_atomic(dentry_blk);
@@ -423,8 +404,11 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
set_cold_node(inode, page);
}
- if (new_name)
+ if (new_name) {
init_dent_inode(new_name, page);
+ if (f2fs_encrypted_inode(dir))
+ file_set_enc_name(inode);
+ }
/*
* This file should be checkpointed during fsync.
@@ -584,11 +568,9 @@ add_dentry:
err = PTR_ERR(page);
goto fail;
}
- if (f2fs_encrypted_inode(dir))
- file_set_enc_name(inode);
}
- make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
+ make_dentry_ptr_block(NULL, &d, dentry_blk);
f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
set_page_dirty(dentry_page);
@@ -896,7 +878,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
dentry_blk = kmap(dentry_page);
- make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
+ make_dentry_ptr_block(inode, &d, dentry_blk);
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index c6934f014e0f..2f98d7039701 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -18,6 +18,179 @@
#include "node.h"
#include <trace/events/f2fs.h>
+static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
+ unsigned int ofs)
+{
+ if (cached_re) {
+ if (cached_re->ofs <= ofs &&
+ cached_re->ofs + cached_re->len > ofs) {
+ return cached_re;
+ }
+ }
+ return NULL;
+}
+
+static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
+ unsigned int ofs)
+{
+ struct rb_node *node = root->rb_node;
+ struct rb_entry *re;
+
+ while (node) {
+ re = rb_entry(node, struct rb_entry, rb_node);
+
+ if (ofs < re->ofs)
+ node = node->rb_left;
+ else if (ofs >= re->ofs + re->len)
+ node = no