summaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-12 20:05:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-12 20:05:58 -0700
commit6d8ef53e8b2fed8b0f91df0c6da7cc92747d934a (patch)
treefabe39021985b631ea5d457126e9ed3d53e910f0 /fs/f2fs
parentcdb897e3279ad1677138d6bdf1cfaf1393718a08 (diff)
parente6c6de18f010d9a7d592f4044d2c30213cb3a7bc (diff)
Merge tag 'f2fs-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, we've mostly tuned f2fs to provide better user experience for Android. Especially, we've worked on atomic write feature again with SQLite community in order to support it officially. And we added or modified several facilities to analyze and enhance IO behaviors. Major changes include: - add app/fs io stat - add inode checksum feature - support project/journalled quota - enhance atomic write with new ioctl() which exposes feature set - enhance background gc/discard/fstrim flows with new gc_urgent mode - add F2FS_IOC_FS{GET,SET}XATTR - fix some quota flows" * tag 'f2fs-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (63 commits) f2fs: hurry up to issue discard after io interruption f2fs: fix to show correct discard_granularity in sysfs f2fs: detect dirty inode in evict_inode f2fs: clear radix tree dirty tag of pages whose dirty flag is cleared f2fs: speed up gc_urgent mode with SSR f2fs: better to wait for fstrim completion f2fs: avoid race in between read xattr & write xattr f2fs: make get_lock_data_page to handle encrypted inode f2fs: use generic terms used for encrypted block management f2fs: introduce f2fs_encrypted_file for clean-up Revert "f2fs: add a new function get_ssr_cost" f2fs: constify super_operations f2fs: fix to wake up all sleeping flusher f2fs: avoid race in between atomic_read & atomic_inc f2fs: remove unneeded parameter of change_curseg f2fs: update i_flags correctly f2fs: don't check inode's checksum if it was dirtied or writebacked f2fs: don't need to update inode checksum for recovery f2fs: trigger fdatasync for non-atomic_write file f2fs: fix to avoid race in between aio and gc ...
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/acl.c5
-rw-r--r--fs/f2fs/checkpoint.c60
-rw-r--r--fs/f2fs/data.c177
-rw-r--r--fs/f2fs/dir.c7
-rw-r--r--fs/f2fs/f2fs.h285
-rw-r--r--fs/f2fs/file.c360
-rw-r--r--fs/f2fs/gc.c115
-rw-r--r--fs/f2fs/gc.h27
-rw-r--r--fs/f2fs/inline.c142
-rw-r--r--fs/f2fs/inode.c132
-rw-r--r--fs/f2fs/namei.c43
-rw-r--r--fs/f2fs/node.c79
-rw-r--r--fs/f2fs/recovery.c83
-rw-r--r--fs/f2fs/segment.c292
-rw-r--r--fs/f2fs/segment.h47
-rw-r--r--fs/f2fs/super.c433
-rw-r--r--fs/f2fs/sysfs.c251
-rw-r--r--fs/f2fs/xattr.c8
18 files changed, 2062 insertions, 484 deletions
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index b4b8438c42ef..436b3a1464d9 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -207,15 +207,16 @@ static int __f2fs_set_acl(struct inode *inode, int type,
void *value = NULL;
size_t size = 0;
int error;
+ umode_t mode = inode->i_mode;
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl && !ipage) {
- error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ error = posix_acl_update_mode(inode, &mode, &acl);
if (error)
return error;
- set_acl_inode(inode, inode->i_mode);
+ set_acl_inode(inode, mode);
}
break;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 5b876f6d3f6b..04fe1df052b2 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -230,8 +230,9 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
}
-static int f2fs_write_meta_page(struct page *page,
- struct writeback_control *wbc)
+static int __f2fs_write_meta_page(struct page *page,
+ struct writeback_control *wbc,
+ enum iostat_type io_type)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
@@ -244,7 +245,7 @@ static int f2fs_write_meta_page(struct page *page,
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
- write_meta_page(sbi, page);
+ write_meta_page(sbi, page, io_type);
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
@@ -263,6 +264,12 @@ redirty_out:
return AOP_WRITEPAGE_ACTIVATE;
}
+static int f2fs_write_meta_page(struct page *page,
+ struct writeback_control *wbc)
+{
+ return __f2fs_write_meta_page(page, wbc, FS_META_IO);
+}
+
static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -283,7 +290,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
- written = sync_meta_pages(sbi, META, wbc->nr_to_write);
+ written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -295,7 +302,7 @@ skip_write:
}
long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
- long nr_to_write)
+ long nr_to_write, enum iostat_type io_type)
{
struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
@@ -346,7 +353,7 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- if (mapping->a_ops->writepage(page, &wbc)) {
+ if (__f2fs_write_meta_page(page, &wbc, io_type)) {
unlock_page(page);
break;
}
@@ -581,11 +588,24 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blocks, i, j;
- int err;
+ unsigned int s_flags = sbi->sb->s_flags;
+ int err = 0;
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
+ if (s_flags & MS_RDONLY) {
+ f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ sbi->sb->s_flags &= ~MS_RDONLY;
+ }
+
+#ifdef CONFIG_QUOTA
+ /* Needed for iput() to work correctly and not trash data */
+ sbi->sb->s_flags |= MS_ACTIVE;
+ /* Turn on quotas so that they are updated correctly */
+ f2fs_enable_quota_files(sbi);
+#endif
+
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
@@ -601,14 +621,21 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
err = recover_orphan_inode(sbi, ino);
if (err) {
f2fs_put_page(page, 1);
- return err;
+ goto out;
}
}
f2fs_put_page(page, 1);
}
/* clear Orphan Flag */
clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
- return 0;
+out:
+#ifdef CONFIG_QUOTA
+ /* Turn quotas off */
+ f2fs_quota_off_umount(sbi->sb);
+#endif
+ sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+
+ return err;
}
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
@@ -904,7 +931,14 @@ retry:
if (inode) {
unsigned long cur_ino = inode->i_ino;
+ if (is_dir)
+ F2FS_I(inode)->cp_task = current;
+
filemap_fdatawrite(inode->i_mapping);
+
+ if (is_dir)
+ F2FS_I(inode)->cp_task = NULL;
+
iput(inode);
/* We need to give cpu to another writers. */
if (ino == cur_ino) {
@@ -1017,7 +1051,7 @@ retry_flush_nodes:
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
- err = sync_node_pages(sbi, &wbc);
+ err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
@@ -1115,7 +1149,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
- sync_meta_pages(sbi, META, LONG_MAX);
+ sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
}
@@ -1194,7 +1228,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT BITS pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
- sync_meta_pages(sbi, META, LONG_MAX);
+ sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
}
@@ -1249,7 +1283,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
percpu_counter_set(&sbi->alloc_valid_block_count, 0);
/* Here, we only have one bio having CP pack */
- sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
+ sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO);
/* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index fb96bb71da00..36b535207c88 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -457,14 +457,65 @@ out_fail:
return err;
}
+static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
+ unsigned nr_pages)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct fscrypt_ctx *ctx = NULL;
+ struct bio *bio;
+
+ if (f2fs_encrypted_file(inode)) {
+ ctx = fscrypt_get_ctx(inode, GFP_NOFS);
+ if (IS_ERR(ctx))
+ return ERR_CAST(ctx);
+
+ /* wait the page to be moved by cleaning */
+ f2fs_wait_on_block_writeback(sbi, blkaddr);
+ }
+
+ bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES));
+ if (!bio) {
+ if (ctx)
+ fscrypt_release_ctx(ctx);
+ return ERR_PTR(-ENOMEM);
+ }
+ f2fs_target_device(sbi, blkaddr, bio);
+ bio->bi_end_io = f2fs_read_end_io;
+ bio->bi_private = ctx;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+ return bio;
+}
+
+/* This can handle encryption stuffs */
+static int f2fs_submit_page_read(struct inode *inode, struct page *page,
+ block_t blkaddr)
+{
+ struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
+
+ if (IS_ERR(bio))
+ return PTR_ERR(bio);
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ bio_put(bio);
+ return -EFAULT;
+ }
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ return 0;
+}
+
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
struct f2fs_node *rn = F2FS_NODE(dn->node_page);
__le32 *addr_array;
+ int base = 0;
+
+ if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
+ base = get_extra_isize(dn->inode);
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
- addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
+ addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
}
/*
@@ -508,8 +559,8 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
for (; count > 0; dn->ofs_in_node++) {
- block_t blkaddr =
- datablock_addr(dn->node_page, dn->ofs_in_node);
+ block_t blkaddr = datablock_addr(dn->inode,
+ dn->node_page, dn->ofs_in_node);
if (blkaddr == NULL_ADDR) {
dn->data_blkaddr = NEW_ADDR;
__set_data_blkaddr(dn);
@@ -570,16 +621,6 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
struct page *page;
struct extent_info ei = {0,0,0};
int err;
- struct f2fs_io_info fio = {
- .sbi = F2FS_I_SB(inode),
- .type = DATA,
- .op = REQ_OP_READ,
- .op_flags = op_flags,
- .encrypted_page = NULL,
- };
-
- if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
- return read_mapping_page(mapping, index, NULL);
page = f2fs_grab_cache_page(mapping, index, for_write);
if (!page)
@@ -620,9 +661,7 @@ got_it:
return page;
}
- fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
- fio.page = page;
- err = f2fs_submit_page_bio(&fio);
+ err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
if (err)
goto put_err;
return page;
@@ -756,7 +795,8 @@ static int __allocate_data_block(struct dnode_of_data *dn)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
+ dn->data_blkaddr = datablock_addr(dn->inode,
+ dn->node_page, dn->ofs_in_node);
if (dn->data_blkaddr == NEW_ADDR)
goto alloc;
@@ -782,7 +822,7 @@ alloc:
static inline bool __force_buffered_io(struct inode *inode, int rw)
{
- return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) ||
+ return (f2fs_encrypted_file(inode) ||
(rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
F2FS_I_SB(inode)->s_ndevs);
}
@@ -814,7 +854,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO);
}
- if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
+ if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
@@ -903,7 +943,7 @@ next_dnode:
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
next_block:
- blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
if (create) {
@@ -1040,7 +1080,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL);
+ F2FS_GET_BLOCK_DEFAULT, NULL);
}
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -1146,35 +1186,6 @@ out:
return ret;
}
-static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages)
-{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct fscrypt_ctx *ctx = NULL;
- struct bio *bio;
-
- if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
- ctx = fscrypt_get_ctx(inode, GFP_NOFS);
- if (IS_ERR(ctx))
- return ERR_CAST(ctx);
-
- /* wait the page to be moved by cleaning */
- f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
- }
-
- bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES));
- if (!bio) {
- if (ctx)
- fscrypt_release_ctx(ctx);
- return ERR_PTR(-ENOMEM);
- }
- f2fs_target_device(sbi, blkaddr, bio);
- bio->bi_end_io = f2fs_read_end_io;
- bio->bi_private = ctx;
-
- return bio;
-}
-
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
@@ -1240,7 +1251,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
map.m_len = last_block - block_in_file;
if (f2fs_map_blocks(inode, &map, 0,
- F2FS_GET_BLOCK_READ))
+ F2FS_GET_BLOCK_DEFAULT))
goto set_error_page;
}
got_it:
@@ -1271,12 +1282,11 @@ submit_and_realloc:
bio = NULL;
}
if (bio == NULL) {
- bio = f2fs_grab_bio(inode, block_nr, nr_pages);
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
if (IS_ERR(bio)) {
bio = NULL;
goto set_error_page;
}
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
}
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
@@ -1341,11 +1351,11 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
struct inode *inode = fio->page->mapping->host;
gfp_t gfp_flags = GFP_NOFS;
- if (!f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode))
+ if (!f2fs_encrypted_file(inode))
return 0;
/* wait for GCed encrypted page writeback */
- f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr);
+ f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
@@ -1471,7 +1481,8 @@ out:
}
static int __write_data_page(struct page *page, bool *submitted,
- struct writeback_control *wbc)
+ struct writeback_control *wbc,
+ enum iostat_type io_type)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1492,6 +1503,7 @@ static int __write_data_page(struct page *page, bool *submitted,
.encrypted_page = NULL,
.submitted = false,
.need_lock = LOCK_RETRY,
+ .io_type = io_type,
};
trace_f2fs_writepage(page, DATA);
@@ -1598,7 +1610,7 @@ redirty_out:
static int f2fs_write_data_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_data_page(page, NULL, wbc);
+ return __write_data_page(page, NULL, wbc, FS_DATA_IO);
}
/*
@@ -1607,7 +1619,8 @@ static int f2fs_write_data_page(struct page *page,
* warm/hot data page.
*/
static int f2fs_write_cache_pages(struct address_space *mapping,
- struct writeback_control *wbc)
+ struct writeback_control *wbc,
+ enum iostat_type io_type)
{
int ret = 0;
int done = 0;
@@ -1697,7 +1710,7 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = __write_data_page(page, &submitted, wbc);
+ ret = __write_data_page(page, &submitted, wbc, io_type);
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
@@ -1752,8 +1765,9 @@ continue_unlock:
return ret;
}
-static int f2fs_write_data_pages(struct address_space *mapping,
- struct writeback_control *wbc)
+int __f2fs_write_data_pages(struct address_space *mapping,
+ struct writeback_control *wbc,
+ enum iostat_type io_type)
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1790,7 +1804,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
goto skip_write;
blk_start_plug(&plug);
- ret = f2fs_write_cache_pages(mapping, wbc);
+ ret = f2fs_write_cache_pages(mapping, wbc, io_type);
blk_finish_plug(&plug);
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -1809,6 +1823,16 @@ skip_write:
return 0;
}
+static int f2fs_write_data_pages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+
+ return __f2fs_write_data_pages(mapping, wbc,
+ F2FS_I(inode)->cp_task == current ?
+ FS_CP_DATA_IO : FS_DATA_IO);
+}
+
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
@@ -1858,7 +1882,7 @@ restart:
set_new_dnode(&dn, inode, ipage, ipage, 0);
if (f2fs_has_inline_data(inode)) {
- if (pos + len <= MAX_INLINE_DATA) {
+ if (pos + len <= MAX_INLINE_DATA(inode)) {
read_inline_data(page, ipage);
set_inode_flag(inode, FI_DATA_EXIST);
if (inode->i_nlink)
@@ -1956,8 +1980,8 @@ repeat:
f2fs_wait_on_page_writeback(page, DATA, false);
/* wait for GCed encrypted page writeback */
- if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
- f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
+ if (f2fs_encrypted_file(inode))
+ f2fs_wait_on_block_writeback(sbi, blkaddr);
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
@@ -1971,21 +1995,9 @@ repeat:
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
} else {
- struct bio *bio;
-
- bio = f2fs_grab_bio(inode, blkaddr, 1);
- if (IS_ERR(bio)) {
- err = PTR_ERR(bio);
- goto fail;
- }
- bio->bi_opf = REQ_OP_READ;
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- bio_put(bio);
- err = -EFAULT;
+ err = f2fs_submit_page_read(inode, page, blkaddr);
+ if (err)
goto fail;
- }
-
- __submit_bio(sbi, bio, DATA);
lock_page(page);
if (unlikely(page->mapping != mapping)) {
@@ -2075,10 +2087,13 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
up_read(&F2FS_I(inode)->dio_rwsem[rw]);
if (rw == WRITE) {
- if (err > 0)
+ if (err > 0) {
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
+ err);
set_inode_flag(inode, FI_UPDATE_WRITE);
- else if (err < 0)
+ } else if (err < 0) {
f2fs_write_failed(mapping, offset + count);
+ }
}
trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 37f9c7f55605..c0c933ad43c8 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -705,6 +705,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
+ struct address_space *mapping = page_mapping(page);
+ unsigned long flags;
int i;
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
@@ -735,6 +737,11 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) {
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
ClearPageUptodate(page);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 94a88b233e98..9a7c90386947 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -91,6 +91,8 @@ extern char *fault_name[FAULT_MAX];
#define F2FS_MOUNT_LFS 0x00040000
#define F2FS_MOUNT_USRQUOTA 0x00080000
#define F2FS_MOUNT_GRPQUOTA 0x00100000
+#define F2FS_MOUNT_PRJQUOTA 0x00200000
+#define F2FS_MOUNT_QUOTA 0x00400000
#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -110,8 +112,12 @@ struct f2fs_mount_info {
unsigned int opt;
};
-#define F2FS_FEATURE_ENCRYPT 0x0001
-#define F2FS_FEATURE_BLKZONED 0x0002
+#define F2FS_FEATURE_ENCRYPT 0x0001
+#define F2FS_FEATURE_BLKZONED 0x0002
+#define F2FS_FEATURE_ATOMIC_WRITE 0x0004
+#define F2FS_FEATURE_EXTRA_ATTR 0x0008
+#define F2FS_FEATURE_PRJQUOTA 0x0010
+#define F2FS_FEATURE_INODE_CHKSUM 0x0020
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -142,6 +148,8 @@ enum {
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DISCARD_ISSUE_RATE 8
+#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
+#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
@@ -190,11 +198,18 @@ struct discard_entry {
unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */
};
+/* default discard granularity of inner discard thread, unit: block count */
+#define DEFAULT_DISCARD_GRANULARITY 16
+
/* max discard pend list number */
#define MAX_PLIST_NUM 512
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
(MAX_PLIST_NUM - 1) : (blk_num - 1))
+#define P_ACTIVE 0x01
+#define P_TRIM 0x02
+#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM))
+
enum {
D_PREP,
D_SUBMIT,
@@ -230,11 +245,14 @@ struct discard_cmd_control {
struct task_struct *f2fs_issue_discard; /* discard thread */
struct list_head entry_list; /* 4KB discard entry list */
struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
+ unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */
struct list_head wait_list; /* store on-flushing entries */
wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
+ unsigned int discard_wake; /* to wake up discard thread */
struct mutex cmd_lock;
unsigned int nr_discards; /* # of discards in the list */
unsigned int max_discards; /* max. discards to be issued */
+ unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
atomic_t issued_discard; /* # of issued discard */
atomic_t issing_discard; /* # of issing discard */
@@ -308,6 +326,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
struct f2fs_flush_device)
#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \
struct f2fs_gc_range)
+#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32)
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
@@ -332,6 +351,9 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION
#endif
+#define F2FS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define F2FS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
+
struct f2fs_gc_range {
u32 sync;
u64 start;
@@ -355,16 +377,36 @@ struct f2fs_flush_device {
u32 segments; /* # of segments to flush */
};
+/* for inline stuff */
+#define DEF_INLINE_RESERVED_SIZE 1
+static inline int get_extra_isize(struct inode *inode);
+#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
+ (CUR_ADDRS_PER_INODE(inode) - \
+ DEF_INLINE_RESERVED_SIZE - \
+ F2FS_INLINE_XATTR_ADDRS))
+
+/* for inline dir */
+#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
+ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
+ BITS_PER_BYTE + 1))
+#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \
+ BITS_PER_BYTE - 1) / BITS_PER_BYTE)
+#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \
+ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
+ NR_INLINE_DENTRY(inode) + \
+ INLINE_DENTRY_BITMAP_SIZE(inode)))
+
/*
* For INODE and NODE manager
*/
/* for directory operations */
struct f2fs_dentry_ptr {
struct inode *inode;
- const void *bitmap;
+ void *bitmap;
struct f2fs_dir_entry *dentry;
__u8 (*filename)[F2FS_SLOT_LEN];
int max;
+ int nr_bitmap;
};
static inline void make_dentry_ptr_block(struct inode *inode,
@@ -372,19 +414,26 @@ static inline void make_dentry_ptr_block(struct inode *inode,
{
d->inode = inode;
d->max = NR_DENTRY_IN_BLOCK;
+ d->nr_bitmap = SIZE_OF_DENTRY_BITMAP;
d->bitmap = &t->dentry_bitmap;
d->dentry = t->dentry;
d->filename = t->filename;
}
static inline void make_dentry_ptr_inline(struct inode *inode,
- struct f2fs_dentry_ptr *d, struct f2fs_inline_dentry *t)
+ struct f2fs_dentry_ptr *d, void *t)
{
+ int entry_cnt = NR_INLINE_DENTRY(inode);
+ int bitmap_size = INLINE_DENTRY_BITMAP_SIZE(inode);
+ int reserved_size = INLINE_RESERVED_SIZE(inode);
+
d->inode = inode;
- d->max = NR_INLINE_DENTRY;
- d->bitmap = &t->dentry_bitmap;
- d->dentry = t->dentry;
- d->filename = t->filename;
+ d->max = entry_cnt;
+ d->nr_bitmap = bitmap_size;
+ d->bitmap = t;
+ d->dentry = t + bitmap_size + reserved_size;
+ d->filename = t + bitmap_size + reserved_size +
+ SIZE_OF_DIR_ENTRY * entry_cnt;
}
/*
@@ -473,12 +522,13 @@ struct f2fs_map_blocks {
};
/* for flag in get_data_block */
-#define F2FS_GET_BLOCK_READ 0
-#define F2FS_GET_BLOCK_DIO 1
-#define F2FS_GET_BLOCK_FIEMAP 2
-#define F2FS_GET_BLOCK_BMAP 3
-#define F2FS_GET_BLOCK_PRE_DIO 4
-#define F2FS_GET_BLOCK_PRE_AIO 5
+enum {
+ F2FS_GET_BLOCK_DEFAULT,
+ F2FS_GET_BLOCK_FIEMAP,
+ F2FS_GET_BLOCK_BMAP,
+ F2FS_GET_BLOCK_PRE_DIO,
+ F2FS_GET_BLOCK_PRE_AIO,
+};
/*
* i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
@@ -521,6 +571,7 @@ struct f2fs_inode_info {
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
struct task_struct *task; /* lookup and create consistency */
+ struct task_struct *cp_task; /* separate cp/wb IO stats*/
nid_t i_xattr_nid; /* node id that contains xattrs */
loff_t last_disk_size; /* lastly written file size */
@@ -533,10 +584,15 @@ struct f2fs_inode_info {
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
+ struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
struct extent_tree *extent_tree; /* cached extent_tree entry */
struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
struct rw_semaphore i_mmap_sem;
+ struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
+
+ int i_extra_isize; /* size of extra space located in i_addr */
+ kprojid_t i_projid; /* id for project quota */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -823,6 +879,23 @@ enum need_lock_type {
LOCK_RETRY,
};
+enum iostat_type {
+ APP_DIRECT_IO, /* app direct IOs */
+ APP_BUFFERED_IO, /* app buffered IOs */
+ APP_WRITE_IO, /* app write IOs */
+ APP_MAPPED_IO, /* app mapped IOs */
+ FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */
+ FS_NODE_IO, /* node IOs from kworker/fsync/reclaimer */
+ FS_META_IO, /* meta IOs from kworker/reclaimer */
+ FS_GC_DATA_IO, /* data IOs from forground gc */
+ FS_GC_NODE_IO, /* node IOs from forground gc */
+ FS_CP_DATA_IO, /* data IOs from checkpoint */
+ FS_CP_NODE_IO, /* node IOs from checkpoint */
+ FS_CP_META_IO, /* meta IOs from checkpoint */
+ FS_DISCARD, /* discard */
+ NR_IO_TYPE,
+};
+
struct f2fs_io_info {
struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
@@ -837,6 +910,7 @@ struct f2fs_io_info {
bool submitted; /* indicate IO submission */
int need_lock; /* indicate we need to lock cp_rwsem */
bool in_list; /* indicate fio is in io_list */
+ enum iostat_type io_type; /* io type */
};
#define is_read_io(rw) ((rw) == READ)
@@ -1028,6 +1102,11 @@ struct f2fs_sb_info {
#endif
spinlock_t stat_lock; /* lock for stat operations */
+ /* For app/fs IO statistics */
+ spinlock_t iostat_lock;
+ unsigned long long write_iostat[NR_IO_TYPE];
+ bool iostat_enable;
+
/* For sysfs suppport */
struct kobject s_kobj;
struct completion s_kobj_unregister;
@@ -1046,10 +1125,19 @@ struct f2fs_sb_info {
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
+ /* Precomputed FS UUID checksum for seeding other checksums */
+ __u32 s_chksum_seed;
+
/* For fault injection */
#ifdef CONFIG_F2FS_FAULT_INJECTION
struct f2fs_fault_info fault_info;
#endif
+
+#ifdef CONFIG_QUOTA
+ /* Names of quota files with journalled quota */
+ char *s_qf_names[MAXQUOTAS];
+ int s_jquota_fmt; /* Format of quota to use */
+#endif
};
#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -1137,6 +1225,27 @@ static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
return f2fs_crc32(sbi, buf, buf_size) == blk_crc;
}
+static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc,
+ const void *address, unsigned int length)
+{
+ struct {
+ struct shash_desc shash;
+ char ctx[4];
+ } desc;
+ int err;
+
+ BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx));
+
+ desc.shash.tfm = sbi->s_chksum_driver;
+ desc.shash.flags = 0;
+ *(u32 *)desc.ctx = crc;
+
+ err = crypto_shash_update(&desc.shash, address, length);
+ BUG_ON(err);
+
+ return *(u32 *)desc.ctx;
+}
+
static inline struct f2fs_inode_info *F2FS_I(struct inode *inode)
{
return container_of(inode, struct f2fs_inode_info, vfs_inode);
@@ -1760,20 +1869,38 @@ static inline bool IS_INODE(struct page *page)
return RAW_IS_INODE(p);
}
+static inline int offset_in_addr(struct f2fs_inode *i)
+{
+ return (i->i_inline & F2FS_EXTRA_ATTR) ?
+ (le16_to_cpu(i->i_extra_isize) / sizeof(__le32)) : 0;
+}
+
static inline __le32 *blkaddr_in_node(struct f2fs_node *node)
{
return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr;
}