diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-08 12:24:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-08 12:24:17 -0700 |
commit | 70ef8f0d37573079e093305214d0cc9eb71100f7 (patch) | |
tree | e45ad8b3948b5b015e3bce5bf3e1d944da781779 /fs/f2fs | |
parent | 677375cef8cb7763ef620e007873117b9bad72a0 (diff) | |
parent | e9cdd307704b5a8f685fa3fff4403691fbf64f97 (diff) |
Merge tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"In this round, we've focused on enhancing performance with regards to
block allocation, GC, and discard/in-place-update IO controls. There
are a bunch of clean-ups as well as minor bug fixes.
Enhancements:
- disable heap-based allocation by default
- issue small-sized discard commands by default
- change the policy of data hotness for logging
- distinguish IOs in terms of size and wbc type
- start SSR earlier to avoid foreground GC
- enhance data structures managing discard commands
- enhance in-place update flow
- add some more fault injection routines
- secure one more xattr entry
Bug fixes:
- calculate victim cost for GC correctly
- remain correct victim segment number for GC
- race condition in nid allocator and initializer
- stale pointer produced by atomic_writes
- fix missing REQ_SYNC for flush commands
- handle missing errors in more corner cases"
* tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (111 commits)
f2fs: fix a mount fail for wrong next_scan_nid
f2fs: enhance scalability of trace macro
f2fs: relocate inode_{,un}lock in F2FS_IOC_SETFLAGS
f2fs: Make flush bios explicitely sync
f2fs: show available_nids in f2fs/status
f2fs: flush dirty nats periodically
f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard
f2fs: allow cpc->reason to indicate more than one reason
f2fs: release cp and dnode lock before IPU
f2fs: shrink size of struct discard_cmd
f2fs: don't hold cmd_lock during waiting discard command
f2fs: nullify fio->encrypted_page for each writes
f2fs: sanity check segment count
f2fs: introduce valid_ipu_blkaddr to clean up
f2fs: lookup extent cache first under IPU scenario
f2fs: reconstruct code to write a data page
f2fs: introduce __wait_discard_cmd
f2fs: introduce __issue_discard_cmd
f2fs: enable small discard by default
f2fs: delay awaking discard thread
...
Diffstat (limited to 'fs/f2fs')
-rw-r--r-- | fs/f2fs/checkpoint.c | 77 | ||||
-rw-r--r-- | fs/f2fs/data.c | 200 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 62 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 46 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 326 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 320 | ||||
-rw-r--r-- | fs/f2fs/file.c | 173 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 93 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 34 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 23 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 53 | ||||
-rw-r--r-- | fs/f2fs/node.c | 144 | ||||
-rw-r--r-- | fs/f2fs/node.h | 31 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 8 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 776 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 139 | ||||
-rw-r--r-- | fs/f2fs/super.c | 48 | ||||
-rw-r--r-- | fs/f2fs/trace.c | 4 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 31 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 8 |
20 files changed, 1694 insertions, 902 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0339daf4ca02..ea9c317b5916 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -275,10 +275,11 @@ static int f2fs_write_meta_pages(struct address_space *mapping, get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) goto skip_write; - trace_f2fs_writepages(mapping->host, wbc, META); + /* if locked failed, cp will flush dirty pages instead */ + if (!mutex_trylock(&sbi->cp_mutex)) + goto skip_write; - /* if mounting is failed, skip writing node pages */ - mutex_lock(&sbi->cp_mutex); + trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); written = sync_meta_pages(sbi, META, wbc->nr_to_write); mutex_unlock(&sbi->cp_mutex); @@ -567,7 +568,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) if (ni.blk_addr != NULL_ADDR) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", + "%s: orphan failed (ino=%x) by kernel, retry mount.", __func__, ino); return -EIO; } @@ -677,7 +678,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); - if (crc_offset >= blk_size) { + if (crc_offset > (blk_size - sizeof(__le32))) { f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc_offset: %zu", crc_offset); return -EINVAL; @@ -816,7 +817,9 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type) return; set_inode_flag(inode, flag); - list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]); + if (!f2fs_is_volatile_file(inode)) + list_add_tail(&F2FS_I(inode)->dirty_list, + &sbi->inode_list[type]); stat_inc_dirty_inode(sbi, type); } @@ -941,6 +944,19 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) return 0; } +static void __prepare_cp_block(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + nid_t last_nid = nm_i->next_scan_nid; + + next_free_nid(sbi, &last_nid); + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); + ckpt->next_free_nid = cpu_to_le32(last_nid); +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -964,21 +980,26 @@ retry_flush_dents: err = sync_dirty_inodes(sbi, DIR_INODE); if (err) goto out; + cond_resched(); goto retry_flush_dents; } + /* + * POR: we should ensure that there are no dirty node pages + * until finishing nat/sit flush. inode->i_blocks can be updated. + */ + down_write(&sbi->node_change); + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { + up_write(&sbi->node_change); f2fs_unlock_all(sbi); err = f2fs_sync_inode_meta(sbi); if (err) goto out; + cond_resched(); goto retry_flush_dents; } - /* - * POR: we should ensure that there are no dirty node pages - * until finishing nat/sit flush. - */ retry_flush_nodes: down_write(&sbi->node_write); @@ -986,11 +1007,20 @@ retry_flush_nodes: up_write(&sbi->node_write); err = sync_node_pages(sbi, &wbc); if (err) { + up_write(&sbi->node_change); f2fs_unlock_all(sbi); goto out; } + cond_resched(); goto retry_flush_nodes; } + + /* + * sbi->node_change is used only for AIO write_begin path which produces + * dirty node blocks and some checkpoint values by block allocation. + */ + __prepare_cp_block(sbi); + up_write(&sbi->node_change); out: blk_finish_plug(&plug); return err; @@ -1024,16 +1054,20 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) spin_lock(&sbi->cp_lock); - if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count > + if ((cpc->reason & CP_UMOUNT) && + le32_to_cpu(ckpt->cp_pack_total_block_count) > sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) disable_nat_bits(sbi, false); - if (cpc->reason == CP_UMOUNT) + if (cpc->reason & CP_TRIMMED) + __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); + + if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); else __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - if (cpc->reason == CP_FASTBOOT) + if (cpc->reason & CP_FASTBOOT) __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); else __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); @@ -1057,7 +1091,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; - nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; @@ -1074,14 +1107,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return -EIO; } - next_free_nid(sbi, &last_nid); - /* * modify checkpoint * version number is already updated */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); - ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = @@ -1100,10 +1130,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); } - ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); - ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); - ckpt->next_free_nid = cpu_to_le32(last_nid); - /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi, false); spin_lock(&sbi->cp_lock); @@ -1143,7 +1169,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* write nat bits */ if (enabled_nat_bits(sbi, cpc)) { __u64 cp_ver = cur_cp_version(ckpt); - unsigned int i; block_t blk; cp_ver |= ((__u64)crc32 << 32); @@ -1250,8 +1275,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && - (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || - (cpc->reason == CP_DISCARD && !sbi->discard_blks))) + ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || + ((cpc->reason & CP_DISCARD) && !sbi->discard_blks))) goto out; if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1273,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_merged_bios(sbi); /* this is the case of multiple fstrims without any changes */ - if (cpc->reason == CP_DISCARD) { + if (cpc->reason & CP_DISCARD) { if (!exist_trim_candidates(sbi, cpc)) { unblock_operations(sbi); goto out; @@ -1311,7 +1336,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); - if (cpc->reason == CP_RECOVERY) + if (cpc->reason & CP_RECOVERY) f2fs_msg(sbi->sb, KERN_NOTICE, "checkpoint: version = %llx", ckpt_ver); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1602b4bccae6..7c0f6bdf817d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -309,7 +309,7 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - io->fio.op_flags = REQ_META | REQ_PRIO; + io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC; if (!test_opt(sbi, NOBARRIER)) io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } @@ -341,7 +341,7 @@ void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) /* * Fill the locked page with data located in the block address. - * Return unlocked page. + * A caller needs to unlock the page on failure. */ int f2fs_submit_page_bio(struct f2fs_io_info *fio) { @@ -362,6 +362,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_set_op_attrs(bio, fio->op, fio->op_flags); __submit_bio(fio->sbi, bio, fio->type); + + if (!is_read_io(fio->op)) + inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); return 0; } @@ -787,6 +790,21 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) return err; } +static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +{ + if (flag == F2FS_GET_BLOCK_PRE_AIO) { + if (lock) + down_read(&sbi->node_change); + else + up_read(&sbi->node_change); + } else { + if (lock) + f2fs_lock_op(sbi); + else + f2fs_unlock_op(sbi); + } +} + /* * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with * f2fs_map_blocks structure. @@ -829,7 +847,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, next_dnode: if (create) - f2fs_lock_op(sbi); + __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -939,7 +957,7 @@ skip: f2fs_put_dnode(&dn); if (create) { - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } goto next_dnode; @@ -948,7 +966,7 @@ sync_out: f2fs_put_dnode(&dn); unlock_out: if (create) { - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } out: @@ -1151,9 +1169,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping, for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { - prefetchw(&page->flags); if (pages) { page = list_last_entry(pages, struct page, lru); + + prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, @@ -1283,17 +1302,83 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } +static int encrypt_one_page(struct f2fs_io_info *fio) +{ + struct inode *inode = fio->page->mapping->host; + gfp_t gfp_flags = GFP_NOFS; + + if (!f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode)) + return 0; + + /* wait for GCed encrypted page writeback */ + f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr); + +retry_encrypt: + fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, + PAGE_SIZE, 0, fio->page->index, gfp_flags); + if (!IS_ERR(fio->encrypted_page)) + return 0; + + /* flush pending IOs and wait for a while in the ENOMEM case */ + if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { + f2fs_flush_merged_bios(fio->sbi); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + goto retry_encrypt; + } + return PTR_ERR(fio->encrypted_page); +} + +static inline bool need_inplace_update(struct f2fs_io_info *fio) +{ + struct inode *inode = fio->page->mapping->host; + + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) + return false; + if (is_cold_data(fio->page)) + return false; + if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + return false; + + return need_inplace_update_policy(inode, fio); +} + +static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) +{ + if (fio->old_blkaddr == NEW_ADDR) + return false; + if (fio->old_blkaddr == NULL_ADDR) + return false; + return true; +} + int do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; + struct extent_info ei = {0,0,0}; + bool ipu_force = false; int err = 0; set_new_dnode(&dn, inode, NULL, NULL, 0); + if (need_inplace_update(fio) && + f2fs_lookup_extent_cache(inode, page->index, &ei)) { + fio->old_blkaddr = ei.blk + page->index - ei.fofs; + + if (valid_ipu_blkaddr(fio)) { + ipu_force = true; + fio->need_lock = false; + goto got_it; + } + } + + if (fio->need_lock) + f2fs_lock_op(fio->sbi); + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) - return err; + goto out; fio->old_blkaddr = dn.data_blkaddr; @@ -1302,31 +1387,10 @@ int do_write_data_page(struct f2fs_io_info *fio) ClearPageUptodate(page); goto out_writepage; } - - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - gfp_t gfp_flags = GFP_NOFS; - - /* wait for GCed encrypted page writeback */ - f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode), - fio->old_blkaddr); -retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, - PAGE_SIZE, 0, - fio->page->index, - gfp_flags); - if (IS_ERR(fio->encrypted_page)) { - err = PTR_ERR(fio->encrypted_page); - if (err == -ENOMEM) { - /* flush pending ios and wait for a while */ - f2fs_flush_merged_bios(F2FS_I_SB(inode)); - congestion_wait(BLK_RW_ASYNC, HZ/50); - gfp_flags |= __GFP_NOFAIL; - err = 0; - goto retry_encrypt; - } - goto out_writepage; - } - } +got_it: + err = encrypt_one_page(fio); + if (err) + goto out_writepage; set_page_writeback(page); @@ -1334,22 +1398,27 @@ retry_encrypt: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(fio->old_blkaddr != NEW_ADDR && - !is_cold_data(page) && - !IS_ATOMIC_WRITTEN_PAGE(page) && - need_inplace_update(inode))) { - rewrite_data_page(fio); + if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { + f2fs_put_dnode(&dn); + if (fio->need_lock) + f2fs_unlock_op(fio->sbi); + err = rewrite_data_page(fio); + trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); - trace_f2fs_do_write_data_page(page, IPU); - } else { - write_data_page(&dn, fio); - trace_f2fs_do_write_data_page(page, OPU); - set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + return err; } + + /* LFS mode write path */ + write_data_page(&dn, fio); + trace_f2fs_do_write_data_page(page, OPU); + set_inode_flag(inode, FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); out_writepage: f2fs_put_dnode(&dn); +out: + if (fio->need_lock) + f2fs_unlock_op(fio->sbi); return err; } @@ -1370,9 +1439,11 @@ static int __write_data_page(struct page *page, bool *submitted, .type = DATA, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), + .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, .submitted = false, + .need_lock = true, }; trace_f2fs_writepage(page, DATA); @@ -1408,6 +1479,7 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { + fio.need_lock = false; err = do_write_data_page(&fio); goto done; } @@ -1416,6 +1488,8 @@ write: need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0, 0)) goto redirty_out; + else + set_inode_flag(inode, FI_HOT_DATA); err = -EAGAIN; if (f2fs_has_inline_data(inode)) { @@ -1423,12 +1497,12 @@ write: if (!err) goto out; } - f2fs_lock_op(sbi); + if (err == -EAGAIN) err = do_write_data_page(&fio); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - f2fs_unlock_op(sbi); + done: if (err && err != -ENOENT) goto redirty_out; @@ -1441,12 +1515,14 @@ out: if (wbc->for_reclaim) { f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index, DATA, WRITE); + clear_inode_flag(inode, FI_HOT_DATA); remove_dirty_inode(inode); submitted = NULL; } unlock_page(page); - f2fs_balance_fs(sbi, need_balance_fs); + if (!S_ISDIR(inode->i_mode)) + f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, DATA, WRITE); @@ -1495,6 +1571,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pagevec_init(&pvec, 0); + if (get_dirty_pages(mapping->host) <= + SM_I(F2FS_M_SB(mapping))->min_hot_blocks) + set_inode_flag(mapping->host, FI_HOT_DATA); + else + clear_inode_flag(mapping->host, FI_HOT_DATA); + if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; @@ -1580,8 +1662,10 @@ continue_unlock: last_idx = page->index; } - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) { + /* give a priority to WB_SYNC threads */ + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) || + --wbc->nr_to_write <= 0) && + wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } @@ -1637,9 +1721,18 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_inc(&sbi->wb_sync_req); + else if (atomic_read(&sbi->wb_sync_req)) + goto skip_write; + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc); blk_finish_plug(&plug); + + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_dec(&sbi->wb_sync_req); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. @@ -1687,7 +1780,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - f2fs_lock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); locked = true; } restart: @@ -1723,7 +1816,8 @@ restart: err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err || dn.data_blkaddr == NULL_ADDR) { f2fs_put_dnode(&dn); - f2fs_lock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, + true); locked = true; goto restart; } @@ -1737,7 +1831,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - f2fs_unlock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); return err; } @@ -1951,7 +2045,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) - return; + return drop_inmem_page(inode, page); set_page_private(page, 0); ClearPagePrivate(page); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ee2d0a485fc3..87f449845f5f 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -51,15 +51,26 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->aw_cnt = atomic_read(&sbi->aw_cnt); + si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); + si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); - if (SM_I(sbi) && SM_I(sbi)->fcc_info) - si->nr_flush = - atomic_read(&SM_I(sbi)->fcc_info->submit_flush); - if (SM_I(sbi) && SM_I(sbi)->dcc_info) - si->nr_discard = - atomic_read(&SM_I(sbi)->dcc_info->submit_discard); + if (SM_I(sbi) && SM_I(sbi)->fcc_info) { + si->nr_flushed = + atomic_read(&SM_I(sbi)->fcc_info->issued_flush); + si->nr_flushing = + atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + } + if (SM_I(sbi) && SM_I(sbi)->dcc_info) { + si->nr_discarded = + atomic_read(&SM_I(sbi)->dcc_info->issued_discard); + si->nr_discarding = + atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + si->nr_discard_cmd = + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; + } si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); @@ -86,6 +97,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; + si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -99,8 +111,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); si->curseg[i] = curseg->segno; - si->cursec[i] = curseg->segno / sbi->segs_per_sec; - si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; + si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno); + si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } for (i = 0; i < 2; i++) { @@ -124,10 +136,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi) bimodal = 0; total_vblocks = 0; - blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; + blks_per_sec = BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + vblocks = get_valid_blocks(sbi, segno, true); dist = abs(vblocks - hblks_per_sec); bimodal += dist * dist; @@ -156,7 +168,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi) if (si->base_mem) goto get_cache; - si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; + /* build stat */ + si->base_mem = sizeof(struct f2fs_stat_info); + + /* build superblock */ + si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; si->base_mem += 2 * sizeof(struct f2fs_inode_info); si->base_mem += sizeof(*sbi->ckpt); si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; @@ -208,8 +224,11 @@ get_cache: /* build merge flush thread */ if (SM_I(sbi)->fcc_info) si->cache_mem += sizeof(struct flush_cmd_control); - if (SM_I(sbi)->dcc_info) + if (SM_I(sbi)->dcc_info) { si->cache_mem += sizeof(struct discard_cmd_control); + si->cache_mem += sizeof(struct discard_cmd) * + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + } /* free nids */ si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + @@ -330,11 +349,16 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n", + seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " + "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, - si->nr_flush, si->nr_discard); - seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n", - si->inmem_pages, si->aw_cnt, si->max_aw_cnt); + si->nr_flushing, si->nr_flushed, + si->nr_discarding, si->nr_discarded, + si->nr_discard_cmd, si->undiscard_blks); + seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " + "volatile IO: %4d (Max. %4d)\n", + si->inmem_pages, si->aw_cnt, si->max_aw_cnt, + si->vw_cnt, si->max_vw_cnt); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", @@ -347,8 +371,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_imeta); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); - seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n", - si->free_nids, si->alloc_nids); + seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", + si->free_nids, si->avail_nids, si->alloc_nids); seq_puts(s, "\nDistribution of User Blocks:"); seq_puts(s, " [ valid | invalid | free ]\n"); seq_puts(s, " ["); @@ -434,7 +458,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inplace_count, 0); atomic_set(&sbi->aw_cnt, 0); + atomic_set(&sbi->vw_cnt, 0); atomic_set(&sbi->max_aw_cnt, 0); + atomic_set(&sbi->max_vw_cnt, 0); mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e64087052834..94756f55a97e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -94,7 +94,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); de = find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; @@ -192,13 +192,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - /* This is to increase the speed of f2fs_create */ - if (!de && room) { - F2FS_I(dir)->task = current; - if (F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; - F2FS_I(dir)->clevel = level; - } + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; } return de; @@ -239,6 +235,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, break; } out: + /* This is to increase the speed of f2fs_create */ + if (!de) + F2FS_I(dir)->task = current; return de; } @@ -322,24 +321,6 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -int update_dent_inode(struct inode *inode, struct inode *to, - const struct qstr *name) -{ - struct page *page; - - if (file_enc_name(to)) - return 0; - - page = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(page)) - return PTR_ERR(page); - - init_dent_inode(name, page); - f2fs_put_page(page, 1); - - return 0; -} - void do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { @@ -369,7 +350,7 @@ static int make_empty_dir(struct inode *inode, dentry_blk = kmap_atomic(dentry_page); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); do_make_empty_dir(inode, parent, &d); kunmap_atomic(dentry_blk); @@ -423,8 +404,11 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, set_cold_node(inode, page); } - if (new_name) + if (new_name) { init_dent_inode(new_name, page); + if (f2fs_encrypted_inode(dir)) + file_set_enc_name(inode); + } /* * This file should be checkpointed during fsync. @@ -584,11 +568,9 @@ add_dentry: err = PTR_ERR(page); goto fail; } - if (f2fs_encrypted_inode(dir)) - file_set_enc_name(inode); } - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -896,7 +878,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) dentry_blk = kmap(dentry_page); - make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index c6934f014e0f..2f98d7039701 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -18,6 +18,179 @@ #include "node.h" #include <trace/events/f2fs.h> +static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, + unsigned int ofs) +{ + if (cached_re) { + if (cached_re->ofs <= ofs && + cached_re->ofs + cached_re->len > ofs) { + return cached_re; + } + } + return NULL; +} + +static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, + unsigned int ofs) +{ + struct rb_node *node = root->rb_node; + struct rb_entry *re; + + while (node) { + re = rb_entry(node, struct rb_entry, rb_node); + + if (ofs < re->ofs) + node = node->rb_left; + else if (ofs >= re->ofs + re->len) + node = no |