diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-fs-f2fs | 6 | ||||
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 6 | ||||
-rw-r--r-- | fs/f2fs/Kconfig | 10 | ||||
-rw-r--r-- | fs/f2fs/Makefile | 1 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 6 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 95 | ||||
-rw-r--r-- | fs/f2fs/data.c | 218 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 59 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 3 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 120 | ||||
-rw-r--r-- | fs/f2fs/file.c | 100 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 38 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 33 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 32 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 37 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 2 | ||||
-rw-r--r-- | fs/f2fs/node.c | 154 | ||||
-rw-r--r-- | fs/f2fs/node.h | 45 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 11 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 194 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 29 | ||||
-rw-r--r-- | fs/f2fs/super.c | 75 | ||||
-rw-r--r-- | fs/f2fs/trace.c | 159 | ||||
-rw-r--r-- | fs/f2fs/trace.h | 46 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 7 | ||||
-rw-r--r-- | include/trace/events/f2fs.h | 148 |
26 files changed, 1075 insertions, 559 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 6f9157f16725..2c4cc42006e8 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -74,3 +74,9 @@ Date: March 2014 Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> Description: Controls the memory footprint used by f2fs. + +What: /sys/fs/f2fs/<disk>/trim_sections +Date: February 2015 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Controls the trimming rate in batch mode. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index e0950c483c22..dac11d7fef27 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -106,6 +106,8 @@ background_gc=%s Turn on/off cleaning operations, namely garbage Default value for this option is on. So garbage collection is on by default. disable_roll_forward Disable the roll-forward recovery routine +norecovery Disable the roll-forward recovery routine, mounted read- + only (i.e., -o ro,disable_roll_forward) discard Issue discard/TRIM commands when a segment is cleaned. no_heap Disable heap-style segment allocation which finds free segments for data from the beginning of main area, while @@ -197,6 +199,10 @@ Files in /sys/fs/f2fs/<devname> checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. + trim_sections This parameter controls the number of sections + to be trimmed out in batch mode when FITRIM + conducts. 32 sections is set by default. + ipu_policy This parameter controls the policy of in-place updates in f2fs. There are five policies: 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 736a348509f7..94e2d2ffabe1 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -71,3 +71,13 @@ config F2FS_CHECK_FS Enables BUG_ONs which check the filesystem consistency in runtime. If you want to improve the performance, say N. + +config F2FS_IO_TRACE + bool "F2FS IO tracer" + depends on F2FS_FS + depends on FUNCTION_TRACER + help + F2FS IO trace is based on a function trace, which gathers process + information and block IO patterns in the filesystem level. + + If unsure, say N. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 2e35da12d292..d92397731db8 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -5,3 +5,4 @@ f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o +f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 1ccb26bc2a0b..742202779bd5 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); @@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) int i; f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * - sizeof(struct f2fs_acl_entry), GFP_KERNEL); + sizeof(struct f2fs_acl_entry), GFP_NOFS); if (!f2fs_acl) return ERR_PTR(-ENOMEM); @@ -396,7 +396,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, posix_acl_release(default_acl); } if (acl) { - if (error) + if (!error) error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e6c271fefaca..7f794b72b3b7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -20,10 +20,11 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include <trace/events/f2fs.h> static struct kmem_cache *ino_entry_slab; -static struct kmem_cache *inode_entry_slab; +struct kmem_cache *inode_entry_slab; /* * We guarantee no failure on the returned page. @@ -50,6 +51,11 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); struct page *page; + struct f2fs_io_info fio = { + .type = META, + .rw = READ_SYNC | REQ_META | REQ_PRIO, + .blk_addr = index, + }; repeat: page = grab_cache_page(mapping, index); if (!page) { @@ -59,8 +65,7 @@ repeat: if (PageUptodate(page)) goto out; - if (f2fs_submit_page_bio(sbi, page, index, - READ_SYNC | REQ_META | REQ_PRIO)) + if (f2fs_submit_page_bio(sbi, page, &fio)) goto repeat; lock_page(page); @@ -112,14 +117,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; for (; nrpages-- > 0; blkno++) { - block_t blk_addr; if (!is_valid_blkaddr(sbi, blkno, type)) goto out; @@ -130,27 +133,27 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; /* get nat block addr */ - blk_addr = current_nat_addr(sbi, + fio.blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - blk_addr = current_sit_addr(sbi, + fio.blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); - if (blkno != start && prev_blk_addr + 1 != blk_addr) + if (blkno != start && prev_blk_addr + 1 != fio.blk_addr) goto out; - prev_blk_addr = blk_addr; + prev_blk_addr = fio.blk_addr; break; case META_SSA: case META_CP: case META_POR: - blk_addr = blkno; + fio.blk_addr = blkno; break; default: BUG(); } - page = grab_cache_page(META_MAPPING(sbi), blk_addr); + page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr); if (!page) continue; if (PageUptodate(page)) { @@ -158,7 +161,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type continue; } - f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); + f2fs_submit_page_mbio(sbi, page, &fio); f2fs_put_page(page, 0); } out: @@ -187,7 +190,7 @@ static int f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; @@ -299,6 +302,8 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); + SetPagePrivate(page); + f2fs_trace_pid(page); return 1; } return 0; @@ -308,6 +313,8 @@ const struct address_space_operations f2fs_meta_aops = { .writepage = f2fs_write_meta_page, .writepages = f2fs_write_meta_pages, .set_page_dirty = f2fs_set_meta_page_dirty, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, }; static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -462,7 +469,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return; - sbi->por_doing = true; + set_sbi_flag(sbi, SBI_POR_DOING); start_blk = __start_cp_addr(sbi) + 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); @@ -483,7 +490,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) } /* clear Orphan Flag */ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); return; } @@ -567,7 +574,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp1; - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; @@ -582,7 +589,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp2; - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; @@ -669,7 +676,7 @@ fail_no_cp: return -EINVAL; } -static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) +static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -686,7 +693,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new; + struct inode_entry *new; int ret = 0; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) @@ -710,12 +717,13 @@ void update_dirty_page(struct inode *inode, struct page *page) kmem_cache_free(inode_entry_slab, new); out: SetPagePrivate(page); + f2fs_trace_pid(page); } void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new = + struct inode_entry *new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); int ret = 0; @@ -733,7 +741,7 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *entry; + struct inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; @@ -763,7 +771,7 @@ void remove_dirty_dir_inode(struct inode *inode) void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; - struct dir_inode_entry *entry; + struct inode_entry *entry; struct inode *inode; retry: if (unlikely(f2fs_cp_error(sbi))) @@ -776,7 +784,7 @@ retry: spin_unlock(&sbi->dir_inode_lock); return; } - entry = list_entry(head->next, struct dir_inode_entry, list); + entry = list_entry(head->next, struct inode_entry, list); inode = igrab(entry->inode); spin_unlock(&sbi->dir_inode_lock); if (inode) { @@ -922,7 +930,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->next_free_nid = cpu_to_le32(last_nid); /* 2 cp + n data seg summary + orphan inode blocks */ - data_sum_blocks = npages_for_summary_flush(sbi); + data_sum_blocks = npages_for_summary_flush(sbi, false); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else @@ -932,24 +940,31 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); - if (cpc->reason == CP_UMOUNT) { - set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + if (__remain_node_summaries(cpc->reason)) ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); - } else { - clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks); - } + + if (cpc->reason == CP_UMOUNT) + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + + if (cpc->reason == CP_FASTBOOT) + set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + else + clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (sbi->need_fsck) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) set_ckpt_flags(ckpt, CP_FSCK_FLAG); /* update SIT/NAT bitmap */ @@ -966,15 +981,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* write out checkpoint buffer at block 0 */ cp_page = grab_meta_page(sbi, start_blk++); kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + memcpy(kaddr, ckpt, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); for (i = 1; i < 1 + cp_payload_blks; i++) { cp_page = grab_meta_page(sbi, start_blk++); kaddr = page_address(cp_page); - memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, - (1 << sbi->log_blocksize)); + memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); } @@ -986,7 +1000,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) write_data_summaries(sbi, start_blk); start_blk += data_sum_blocks; - if (cpc->reason == CP_UMOUNT) { + if (__remain_node_summaries(cpc->reason)) { write_node_summaries(sbi, start_blk); start_blk += NR_CURSEG_NODE_TYPE; } @@ -994,7 +1008,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* writeout checkpoint block */ cp_page = grab_meta_page(sbi, start_blk); kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + memcpy(kaddr, ckpt, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); @@ -1023,7 +1037,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return; clear_prefree_segments(sbi); - F2FS_RESET_SB_DIRT(sbi); + clear_sbi_flag(sbi, SBI_IS_DIRTY); } /* @@ -1038,10 +1052,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); - if (!sbi->s_dirty && cpc->reason != CP_DISCARD) + if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && + cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; + if (f2fs_readonly(sbi->sb)) + goto out; if (block_operations(sbi)) goto out; @@ -1102,8 +1119,8 @@ int __init create_checkpoint_caches(void) sizeof(struct ino_entry)); if (!ino_entry_slab) return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", - sizeof(struct dir_inode_entry)); + inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", + sizeof(struct inode_entry)); if (!inode_entry_slab) { kmem_cache_destroy(ino_entry_slab); return -ENOMEM; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7ec697b37f19..985ed023a750 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -22,6 +22,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include <trace/events/f2fs.h> static void f2fs_read_end_io(struct bio *bio, int err) @@ -95,11 +96,9 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) return; if (is_read_io(fio->rw)) - trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, - fio->type, io->bio); + trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); else - trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, - fio->type, io->bio); + trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); submit_bio(fio->rw, io->bio); io->bio = NULL; @@ -132,14 +131,15 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, * Return unlocked page. */ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, int rw) + struct f2fs_io_info *fio) { struct bio *bio; - trace_f2fs_submit_page_bio(page, blk_addr, rw); + trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(page, fio, 0); /* Allocate a new bio */ - bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); + bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { bio_put(bio); @@ -147,12 +147,12 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, return -EFAULT; } - submit_bio(rw, bio); + submit_bio(fio->rw, bio); return 0; } void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, struct f2fs_io_info *fio) + struct f2fs_io_info *fio) { enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; @@ -160,21 +160,21 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, io = is_read ? &sbi->read_io : &sbi->write_io[btype]; - verify_block_addr(sbi, blk_addr); + verify_block_addr(sbi, fio->blk_addr); down_write(&io->io_rwsem); if (!is_read) inc_page_count(sbi, F2FS_WRITEBACK); - if (io->bio && (io->last_block_in_bio != blk_addr - 1 || + if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 || io->fio.rw != fio->rw)) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { int bio_blocks = MAX_BIO_BLOCKS(sbi); - io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); + io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read); io->fio = *fio; } @@ -184,10 +184,11 @@ alloc_new: goto alloc_new; } - io->last_block_in_bio = blk_addr; + io->last_block_in_bio = fio->blk_addr; + f2fs_trace_ios(page, fio, 0); up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); + trace_f2fs_submit_page_mbio(page, fio); } /* @@ -196,7 +197,7 @@ alloc_new: * ->node_page * update block addresses in the node page */ -static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +static void __set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn; __le32 *addr_array; @@ -209,7 +210,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); - addr_array[ofs_in_node] = cpu_to_le32(new_addr); + addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); set_page_dirty(node_page); } @@ -224,8 +225,8 @@ int reserve_new_block(struct dnode_of_data *dn) trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); - __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; + __set_data_blkaddr(dn); mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; @@ -273,7 +274,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, unsigned int blkbits = inode->i_sb->s_blocksize_bits; size_t count; - clear_buffer_new(bh_result); + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, start_blkaddr + pgofs - start_fofs); count = end_fofs - pgofs + 1; @@ -290,23 +291,24 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } -void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +void update_extent_cache(struct dnode_of_data *dn) { struct f2fs_inode_info *fi = F2FS_I(dn->inode); pgoff_t fofs, start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; int need_update = true; - f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + - dn->ofs_in_node; + f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); /* Update the page address in the parent node */ - __set_data_blkaddr(dn, blk_addr); + __set_data_blkaddr(dn); if (is_inode_flag_set(fi, FI_NO_EXTENT)) return; + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + write_lock(&fi->ext.ext_lock); start_fofs = fi->ext.fofs; @@ -320,16 +322,16 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) /* Initial extent */ if (fi->ext.len == 0) { - if (blk_addr != NULL_ADDR) { + if (dn->data_blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk_addr = blk_addr; + fi->ext.blk_addr = dn->data_blkaddr; fi->ext.len = 1; } goto end_update; } /* Front merge */ - if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { fi->ext.fofs--; fi->ext.blk_addr--; fi->ext.len++; @@ -337,7 +339,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) } /* Back merge */ - if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { fi->ext.len++; goto end_update; } @@ -376,6 +378,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct dnode_of_data dn; struct page *page; int err; + struct f2fs_io_info fio = { + .type = DATA, + .rw = sync ? READ_SYNC : READA, + }; page = find_get_page(mapping, index); if (page && PageUptodate(page)) @@ -404,8 +410,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) return page; } - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, - sync ? READ_SYNC : READA); + fio.blk_addr = dn.data_blkaddr; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) return ERR_PTR(err); @@ -430,7 +436,10 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct dnode_of_data dn; struct page *page; int err; - + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + }; repeat: page = grab_cache_page(mapping, index); if (!page) @@ -464,8 +473,8 @@ repeat: return page; } - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, - dn.data_blkaddr, READ_SYNC); + fio.blk_addr = dn.data_blkaddr; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) return ERR_PTR(err); @@ -515,8 +524,12 @@ repeat: zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, - dn.data_blkaddr, READ_SYNC); + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + .blk_addr = dn.data_blkaddr, + }; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) goto put_err; @@ -550,30 +563,25 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_inode_info *fi = F2FS_I(dn->inode); struct f2fs_summary sum; - block_t new_blkaddr; struct node_info ni; + int seg = CURSEG_WARM_DATA; pgoff_t fofs; - int type; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; - __set_data_blkaddr(dn, NEW_ADDR); - dn->data_blkaddr = NEW_ADDR; - get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - type = CURSEG_WARM_DATA; + if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) + seg = CURSEG_DIRECT_IO; - allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); + allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ - set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); - update_extent_cache(new_blkaddr, dn); - clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); + __set_data_blkaddr(dn); /* update i_size */ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + @@ -581,10 +589,59 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); - dn->data_blkaddr = new_blkaddr; return 0; } +static void __allocate_data_blocks(struct inode *inode, loff_t offset, + size_t count) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + u64 start = F2FS_BYTES_TO_BLK(offset); + u64 len = F2FS_BYTES_TO_BLK(count); + bool allocated; + u64 end_offset; + + while (len) { + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); + + /* When reading holes, we need its node page */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&dn, start, ALLOC_NODE)) + goto out; + + allocated = false; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); + + while (dn.ofs_in_node < end_offset && len) { + if (dn.data_blkaddr == NULL_ADDR) { + if (__allocate_data_block(&dn)) + goto sync_out; + allocated = true; + } + len--; + start++; + dn.ofs_in_node++; + } + + if (allocated) + sync_inode_page(&dn); + + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + } + return; + +sync_out: + if (allocated) + sync_inode_page(&dn); + f2fs_put_dnode(&dn); +out: + f2fs_unlock_op(sbi); + return; +} + /* * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. * If original data blocks are allocated, then give them to blockdev. @@ -610,10 +667,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (check_extent_cache(inode, pgofs, bh_result)) goto out; - if (create) { - f2fs_balance_fs(F2FS_I_SB(inode)); + if (create) f2fs_lock_op(F2FS_I_SB(inode)); - } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -627,12 +682,14 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto put_out; if (dn.data_blkaddr != NULL_ADDR) { + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, dn.data_blkaddr); } else if (create) { err = __allocate_data_block(&dn); if (err) goto put_out; allocated = true; + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, dn.data_blkaddr); } else { goto put_out; @@ -745,7 +802,6 @@ static int f2fs_read_data_pages(struct file *file, int do_write_data_page(struct page *page, struct f2fs_io_info *fio) { struct inode *inode = page->mapping->host; - block_t old_blkaddr, new_blkaddr; struct dnode_of_data dn; int err = 0; @@ -754,10 +810,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) if (err) return err; - old_blkaddr = dn.data_blkaddr; + fio->blk_addr = dn.data_blkaddr; /* This page is already truncated */ - if (old_blkaddr == NULL_ADDR) + if (fio->blk_addr == NULL_ADDR) goto out_writepage; set_page_writeback(page); @@ -766,14 +822,14 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(old_blkaddr != NEW_ADDR && + if (unlikely(fio->blk_addr != NEW_ADDR && !is_cold_data(page) && need_inplace_update(inode))) { - rewrite_data_page(page, old_blkaddr, fio); + rewrite_data_page(page, fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { - write_data_page(page, &dn, &new_blkaddr, fio); - update_extent_cache(new_blkaddr, &dn); + write_data_page(page, &dn, fio); + update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: @@ -812,7 +868,12 @@ static int f2fs_write_data_page(struct page *page, zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto redirty_out; + if (f2fs_is_drop_cache(inode)) + goto out; + if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && + available_free_memory(sbi, BASE_CHECK)) goto redirty_out; /* Dentry blocks are controlled by checkpoint */ @@ -826,7 +887,6 @@ write: /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { SetPageError(page); - unlock_page(page); goto out; } @@ -1002,8 +1062,12 @@ put_next: if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + .blk_addr = dn.data_blkaddr, + }; + err = f2fs_submit_page_bio(sbi, page, &fio); if (err) goto fail; @@ -1092,6 +1156,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, trace_f2fs_direct_IO_enter(inode, offset, count, rw); + if (rw & WRITE) + __allocate_data_blocks(inode, offset, count); + err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); if (err < 0 && (rw & WRITE)) f2fs_write_failed(mapping, offset + count); @@ -1101,24 +1168,33 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, return err; } -static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, - unsigned int length) +void f2fs_invalidate_page(struct page *page, unsigned int offset, + unsigned int length) { struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) + if (inode->i_ino >= F2FS_ROOT_INO(sbi) && + (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))< |