summaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 12:10:21 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 12:10:21 -0800
commita02cd4229e298aadbe8f5cf286edee8058d87116 (patch)
treebf22338b0280b9c5d638c9277e9cb8d96d4746f9 /fs/f2fs
parent487e2c9f44c4b5ea23bfe87bb34679f7297a0bce (diff)
parentead710b7d82dc9e8184e10871c155a3ed8b3f673 (diff)
Merge tag 'f2fs-for-4.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, we introduce sysfile-based quota support which is required for Android by default. In addition, we allow that users are able to reserve some blocks in runtime to mitigate performance drops in low free space. Enhancements: - assign proper data segments according to write_hints given by user - issue cache_flush on dirty devices only among multiple devices - exploit cp_error flag and add more faults to enhance fault injection test - conduct more readaheads during f2fs_readdir - add a range for discard commands Bug fixes: - fix zero stat->st_blocks when inline_data is set - drop crypto key and free stale memory pointer while evict_inode is failing - fix some corner cases in free space and segment management - fix wrong last_disk_size This series includes lots of clean-ups and code enhancement in terms of xattr operations, discard/flush command control. In addition, it adds versatile debugfs entries to monitor f2fs status" * tag 'f2fs-for-4.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (75 commits) f2fs: deny accessing encryption policy if encryption is off f2fs: inject fault in inc_valid_node_count f2fs: fix to clear FI_NO_PREALLOC f2fs: expose quota information in debugfs f2fs: separate nat entry mem alloc from nat_tree_lock f2fs: validate before set/clear free nat bitmap f2fs: avoid opened loop codes in __add_ino_entry f2fs: apply write hints to select the type of segments for buffered write f2fs: introduce scan_curseg_cache for cleanup f2fs: optimize the way of traversing free_nid_bitmap f2fs: keep scanning until enough free nids are acquired f2fs: trace checkpoint reason in fsync() f2fs: keep isize once block is reserved cross EOF f2fs: avoid race in between GC and block exchange f2fs: save a multiplication for last_nid calculation f2fs: fix summary info corruption f2fs: remove dead code in update_meta_page f2fs: remove unneeded semicolon f2fs: don't bother with inode->i_version f2fs: check curseg space before foreground GC ...
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/acl.c3
-rw-r--r--fs/f2fs/checkpoint.c64
-rw-r--r--fs/f2fs/data.c37
-rw-r--r--fs/f2fs/debug.c31
-rw-r--r--fs/f2fs/dir.c32
-rw-r--r--fs/f2fs/f2fs.h222
-rw-r--r--fs/f2fs/file.c123
-rw-r--r--fs/f2fs/gc.c37
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c26
-rw-r--r--fs/f2fs/namei.c101
-rw-r--r--fs/f2fs/node.c410
-rw-r--r--fs/f2fs/node.h16
-rw-r--r--fs/f2fs/recovery.c8
-rw-r--r--fs/f2fs/segment.c509
-rw-r--r--fs/f2fs/segment.h39
-rw-r--r--fs/f2fs/shrinker.c2
-rw-r--r--fs/f2fs/super.c219
-rw-r--r--fs/f2fs/sysfs.c53
-rw-r--r--fs/f2fs/xattr.c174
20 files changed, 1495 insertions, 612 deletions
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 436b3a1464d9..2bb7c9fc5144 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -250,6 +250,9 @@ static int __f2fs_set_acl(struct inode *inode, int type,
int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
return __f2fs_set_acl(inode, type, acl, NULL);
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0bb8e2c022d3..dd2e73e10857 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -29,7 +29,6 @@ struct kmem_cache *inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
set_ckpt_flags(sbi, CP_ERROR_FLAG);
- sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
f2fs_flush_merged_writes(sbi);
}
@@ -398,24 +397,23 @@ const struct address_space_operations f2fs_meta_aops = {
#endif
};
-static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
{
struct inode_management *im = &sbi->im[type];
struct ino_entry *e, *tmp;
tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
-retry:
+
radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
spin_lock(&im->ino_lock);
e = radix_tree_lookup(&im->ino_root, ino);
if (!e) {
e = tmp;
- if (radix_tree_insert(&im->ino_root, ino, e)) {
- spin_unlock(&im->ino_lock);
- radix_tree_preload_end();
- goto retry;
- }
+ if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
+ f2fs_bug_on(sbi, 1);
+
memset(e, 0, sizeof(struct ino_entry));
e->ino = ino;
@@ -423,6 +421,10 @@ retry:
if (type != ORPHAN_INO)
im->ino_num++;
}
+
+ if (type == FLUSH_INO)
+ f2fs_set_bit(devidx, (char *)&e->dirty_device);
+
spin_unlock(&im->ino_lock);
radix_tree_preload_end();
@@ -451,7 +453,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
- __add_ino_entry(sbi, ino, type);
+ __add_ino_entry(sbi, ino, 0, type);
}
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -477,7 +479,7 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
struct ino_entry *e, *tmp;
int i;
- for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
+ for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
struct inode_management *im = &sbi->im[i];
spin_lock(&im->ino_lock);
@@ -491,6 +493,27 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
}
}
+void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
+{
+ __add_ino_entry(sbi, ino, devidx, type);
+}
+
+bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
+{
+ struct inode_management *im = &sbi->im[type];
+ struct ino_entry *e;
+ bool is_dirty = false;
+
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
+ if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device))
+ is_dirty = true;
+ spin_unlock(&im->ino_lock);
+ return is_dirty;
+}
+
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
struct inode_management *im = &sbi->im[ORPHAN_INO];
@@ -527,7 +550,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
void add_orphan_inode(struct inode *inode)
{
/* add new orphan ino entry into list */
- __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
+ __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
update_inode_page(inode);
}
@@ -551,7 +574,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
return err;
}
- __add_ino_entry(sbi, ino, ORPHAN_INO);
+ __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
@@ -587,6 +610,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
block_t start_blk, orphan_blocks, i, j;
unsigned int s_flags = sbi->sb->s_flags;
int err = 0;
+#ifdef CONFIG_QUOTA
+ int quota_enabled;
+#endif
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
@@ -599,8 +625,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
+
/* Turn on quotas so that they are updated correctly */
- f2fs_enable_quota_files(sbi);
+ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@@ -628,7 +655,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
- f2fs_quota_off_umount(sbi->sb);
+ if (quota_enabled)
+ f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -983,7 +1011,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
update_inode_page(inode);
iput(inode);
}
- };
+ }
return 0;
}
@@ -1143,6 +1171,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct super_block *sb = sbi->sb;
struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
u64 kbytes_written;
+ int err;
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
@@ -1236,6 +1265,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ /* flush all device cache */
+ err = f2fs_flush_device_cache(sbi);
+ if (err)
+ return err;
+
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7b3ad5d8e2e9..516fa0d3ff9c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -173,7 +173,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
{
struct bio *bio;
- bio = f2fs_bio_alloc(npages);
+ bio = f2fs_bio_alloc(sbi, npages, true);
f2fs_target_device(sbi, blk_addr, bio);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
@@ -418,8 +418,8 @@ next:
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
- /* set submitted = 1 as a return value */
- fio->submitted = 1;
+ /* set submitted = true as a return value */
+ fio->submitted = true;
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
@@ -473,7 +473,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
f2fs_wait_on_block_writeback(sbi, blkaddr);
}
- bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES));
+ bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio) {
if (ctx)
fscrypt_release_ctx(ctx);
@@ -833,6 +833,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
struct f2fs_map_blocks map;
int err = 0;
+ /* convert inline data for Direct I/O*/
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
@@ -845,15 +852,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
- if (iocb->ki_flags & IOCB_DIRECT) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
+ if (iocb->ki_flags & IOCB_DIRECT)
return f2fs_map_blocks(inode, &map, 1,
__force_buffered_io(inode, WRITE) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO);
- }
if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
@@ -1334,7 +1337,7 @@ static int f2fs_read_data_pages(struct file *file,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = mapping->host;
struct page *page = list_last_entry(pages, struct page, lru);
trace_f2fs_readpages(inode, page, nr_pages);
@@ -1495,6 +1498,7 @@ static int __write_data_page(struct page *page, bool *submitted,
int err = 0;
struct f2fs_io_info fio = {
.sbi = sbi,
+ .ino = inode->i_ino,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
@@ -1566,8 +1570,11 @@ write:
err = do_write_data_page(&fio);
}
}
+
+ down_write(&F2FS_I(inode)->i_sem);
if (F2FS_I(inode)->last_disk_size < psize)
F2FS_I(inode)->last_disk_size = psize;
+ up_write(&F2FS_I(inode)->i_sem);
done:
if (err && err != -ENOENT)
@@ -1932,6 +1939,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
trace_f2fs_write_begin(inode, pos, len, flags);
+ if (f2fs_is_atomic_file(inode) &&
+ !available_free_memory(sbi, INMEM_PAGES)) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
@@ -1947,7 +1960,7 @@ repeat:
* Do not use grab_cache_page_write_begin() to avoid deadlock due to
* wait_for_stable_page. Will wait that below with our IO control.
*/
- page = pagecache_get_page(mapping, index,
+ page = f2fs_pagecache_get_page(mapping, index,
FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
if (!page) {
err = -ENOMEM;
@@ -2009,6 +2022,8 @@ repeat:
fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
+ if (f2fs_is_atomic_file(inode))
+ drop_inmem_pages_all(sbi);
return err;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 87f449845f5f..ecada8425268 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -45,9 +45,18 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
+ si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+
+ si->nquota_files = 0;
+ if (f2fs_sb_has_quota_ino(sbi->sb)) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (f2fs_qf_ino(sbi->sb, i))
+ si->nquota_files++;
+ }
+ }
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = atomic_read(&sbi->aw_cnt);
@@ -61,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+ si->flush_list_empty =
+ llist_empty(&SM_I(sbi)->fcc_info->issue_list);
}
if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
si->nr_discarded =
@@ -96,9 +107,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
- si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
+ si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
si->avail_nids = NM_I(sbi)->available_nids;
- si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
+ si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
@@ -231,14 +242,14 @@ get_cache:
}
/* free nids */
- si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
- NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) *
+ si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
+ NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
sizeof(struct free_nid);
si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
- for (i = 0; i <= ORPHAN_INO; i++)
+ for (i = 0; i < MAX_INO_ENTRY; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
si->cache_mem += atomic_read(&sbi->total_ext_tree) *
sizeof(struct extent_tree);
@@ -262,9 +273,10 @@ static int stat_show(struct seq_file *s, void *v)
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
update_general_status(si->sbi);
- seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n",
+ seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
si->sbi->sb->s_bdev, i++,
- f2fs_readonly(si->sbi->sb) ? "RO": "RW");
+ f2fs_readonly(si->sbi->sb) ? "RO": "RW",
+ f2fs_cp_error(si->sbi) ? "Error": "Good");
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -349,10 +361,11 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), "
+ seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flushing, si->nr_flushed,
+ si->flush_list_empty,
si->nr_discarding, si->nr_discarded,
si->nr_discard_cmd, si->undiscard_blks);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
@@ -365,6 +378,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
seq_printf(s, " - datas: %4d in files:%4d\n",
si->ndirty_data, si->ndirty_files);
+ seq_printf(s, " - quota datas: %4d in quota files:%4d\n",
+ si->ndirty_qdata, si->nquota_files);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - imeta: %4d\n",
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c0c933ad43c8..2d98d877c09d 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -10,10 +10,12 @@
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
+#include <linux/sched/signal.h>
#include "f2fs.h"
#include "node.h"
#include "acl.h"
#include "xattr.h"
+#include <trace/events/f2fs.h>
static unsigned long dir_blocks(struct inode *inode)
{
@@ -847,6 +849,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
struct f2fs_dentry_block *dentry_blk = NULL;
struct page *dentry_page = NULL;
struct file_ra_state *ra = &file->f_ra;
+ loff_t start_pos = ctx->pos;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
struct f2fs_dentry_ptr d;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
@@ -855,24 +858,32 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
if (f2fs_encrypted_inode(inode)) {
err = fscrypt_get_encryption_info(inode);
if (err && err != -ENOKEY)
- return err;
+ goto out;
err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr);
if (err < 0)
- return err;
+ goto out;
}
if (f2fs_has_inline_dentry(inode)) {
err = f2fs_read_inline_dir(file, ctx, &fstr);
- goto out;
+ goto out_free;
}
- /* readahead for multi pages of dir */
- if (npages - n > 1 && !ra_has_index(ra, n))
- page_cache_sync_readahead(inode->i_mapping, ra, file, n,
+ for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) {
+
+ /* allow readdir() to be interrupted */
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto out_free;
+ }
+ cond_resched();
+
+ /* readahead for multi pages of dir */
+ if (npages - n > 1 && !ra_has_index(ra, n))
+ page_cache_sync_readahead(inode->i_mapping, ra, file, n,
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
- for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
@@ -880,7 +891,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
err = 0;
continue;
} else {
- goto out;
+ goto out_free;
}
}
@@ -896,12 +907,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
break;
}
- ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
-out:
+out_free:
fscrypt_fname_free_buffer(&fstr);
+out:
+ trace_f2fs_readdir(inode, start_pos, ctx->pos, err);
return err < 0 ? err : 0;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 115204fdefcc..f4e094e816c6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -44,6 +44,8 @@
enum {
FAULT_KMALLOC,
FAULT_PAGE_ALLOC,
+ FAULT_PAGE_GET,
+ FAULT_ALLOC_BIO,
FAULT_ALLOC_NID,
FAULT_ORPHAN,
FAULT_BLOCK,
@@ -91,6 +93,7 @@ extern char *fault_name[FAULT_MAX];
#define F2FS_MOUNT_GRPQUOTA 0x00100000
#define F2FS_MOUNT_PRJQUOTA 0x00200000
#define F2FS_MOUNT_QUOTA 0x00400000
+#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -116,6 +119,8 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_EXTRA_ATTR 0x0008
#define F2FS_FEATURE_PRJQUOTA 0x0010
#define F2FS_FEATURE_INODE_CHKSUM 0x0020
+#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040
+#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -145,7 +150,7 @@ enum {
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
-#define DISCARD_ISSUE_RATE 8
+#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
#define DEF_CP_INTERVAL 60 /* 60 secs */
@@ -156,7 +161,6 @@ struct cp_control {
__u64 trim_start;
__u64 trim_end;
__u64 trim_minlen;
- __u64 trimmed;
};
/*
@@ -175,12 +179,14 @@ enum {
ORPHAN_INO, /* for orphan ino list */
APPEND_INO, /* for append ino list */
UPDATE_INO, /* for update ino list */
+ FLUSH_INO, /* for multiple device flushing */
MAX_INO_ENTRY, /* max. list */
};
struct ino_entry {
- struct list_head list; /* list head */
- nid_t ino; /* inode number */
+ struct list_head list; /* list head */
+ nid_t ino; /* inode number */
+ unsigned int dirty_device; /* dirty device bitmap */
};
/* for the list of inodes to be GCed */
@@ -204,10 +210,6 @@ struct discard_entry {
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
(MAX_PLIST_NUM - 1) : (blk_num - 1))
-#define P_ACTIVE 0x01
-#define P_TRIM 0x02
-#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM))
-
enum {
D_PREP,
D_SUBMIT,
@@ -239,12 +241,32 @@ struct discard_cmd {
int error; /* bio error */
};
+enum {
+ DPOLICY_BG,
+ DPOLICY_FORCE,
+ DPOLICY_FSTRIM,
+ DPOLICY_UMOUNT,
+ MAX_DPOLICY,
+};
+
+struct discard_policy {
+ int type; /* type of discard */
+ unsigned int min_interval; /* used for candidates exist */
+ unsigned int max_interval; /* used for candidates not exist */
+ unsigned int max_requests; /* # of discards issued per round */
+ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
+ bool io_aware; /* issue discard in idle time */
+ bool sync; /* submit discard with REQ_SYNC flag */
+ unsigned int granularity; /* discard granularity */
+};
+
struct discard_cmd_control {
struct task_struct *f2fs_issue_discard; /* discard thread */
struct list_head entry_list; /* 4KB discard entry list */
struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */
struct list_head wait_list; /* store on-flushing entries */
+ struct list_head fstrim_list; /* in-flight discard from fstrim */
wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
unsigned int discard_wake; /* to wake up discard thread */
struct mutex cmd_lock;
@@ -377,11 +399,14 @@ struct f2fs_flush_device {
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
+#define DEF_MIN_INLINE_SIZE 1
static inline int get_extra_isize(struct inode *inode);
-#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
- (CUR_ADDRS_PER_INODE(inode) - \
- DEF_INLINE_RESERVED_SIZE - \
- F2FS_INLINE_XATTR_ADDRS))
+static inline int get_inline_xattr_addrs(struct inode *inode);
+#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode)
+#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
+ (CUR_ADDRS_PER_INODE(inode) - \
+ F2FS_INLINE_XATTR_ADDRS(inode) - \
+ DEF_INLINE_RESERVED_SIZE))
/* for inline dir */
#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
@@ -581,6 +606,7 @@ struct f2fs_inode_info {
#endif
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
+ struct list_head inmem_ilist; /* list for inmem inodes */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
@@ -591,6 +617,7 @@ struct f2fs_inode_info {
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
+ int i_inline_xattr_size; /* inline xattr size */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -664,10 +691,13 @@ static inline void __try_update_largest_extent(struct inode *inode,
}
}
-enum nid_list {
- FREE_NID_LIST,
- ALLOC_NID_LIST,
- MAX_NID_LIST,
+/*
+ * For free nid management
+ */
+enum nid_state {
+ FREE_NID, /* newly added to free nid list */
+ PREALLOC_NID, /* it is preallocated */
+ MAX_NID_STATE,
};
struct f2fs_nm_info {
@@ -690,8 +720,8 @@ struct f2fs_nm_info {
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
- struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */
- unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
+ struct list_head free_nid_list; /* list for free nids excluding preallocated nids */
+ unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */
spinlock_t nid_list_lock; /* protect nid lists ops */
struct mutex build_lock; /* lock for build free nids */
unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
@@ -769,6 +799,7 @@ enum {
struct flush_cmd {
struct completion wait;
struct llist_node llnode;
+ nid_t ino;
int ret;
};
@@ -787,6 +818,8 @@ struct f2fs_sm_info {
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
+ struct rw_semaphore curseg_lock; /* for preventing curseg change */
+
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
block_t ssa_blkaddr; /* start block address of SSA area */
@@ -808,6 +841,7 @@ struct f2fs_sm_info {
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
+ unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
/* for flush command control */
struct flush_cmd_control *fcc_info;
@@ -829,6 +863,7 @@ struct f2fs_sm_info {
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
+ F2FS_DIRTY_QDATA,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
F2FS_INMEM_PAGES,
@@ -877,6 +912,18 @@ enum need_lock_type {
LOCK_RETRY,
};
+enum cp_reason_type {
+ CP_NO_NEEDED,
+ CP_NON_REGULAR,
+ CP_HARDLINK,
+ CP_SB_NEED_CP,
+ CP_WRONG_PINO,
+ CP_NO_SPC_ROLL,
+ CP_NODE_NEED_CP,
+ CP_FASTBOOT_MODE,
+ CP_SPEC_LOG_NUM,
+};
+
enum iostat_type {
APP_DIRECT_IO, /* app direct IOs */
APP_BUFFERED_IO, /* app buffered IOs */
@@ -896,6 +943,7 @@ enum iostat_type {
struct f2fs_io_info {
struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
+ nid_t ino; /* inode number */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
enum temp_type temp; /* contains HOT/WARM/COLD */
int op; /* contains REQ_OP_ */
@@ -940,6 +988,7 @@ enum inode_type {
DIR_INODE, /* for dirty dir inode */
FILE_INODE, /* for dirty regular/symlink inode */
DIRTY_META, /* for all dirtied inode metadata */
+ ATOMIC_FILE, /* for all atomic files */
NR_INODE_TYPE,
};
@@ -1042,12 +1091,15 @@ struct f2fs_sb_info {
loff_t max_file_blocks; /* max block index of file */
int active_logs; /* # of active logs */
int dir_level; /* directory level */
+ int inline_xattr_size; /* inline xattr size */
+ unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */
block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
block_t discard_blks; /* discard command candidats */
block_t last_valid_block_count; /* for recovery */
block_t reserved_blocks; /* configurable reserved blocks */
+ block_t current_reserved_blocks; /* current reserved blocks */
u32 s_next_generation; /* for NFS support */
@@ -1113,6 +1165,8 @@ struct f2fs_sb_info {
struct list_head s_list;
int s_ndevs; /* number of devices */
struct f2fs_dev_info *devs; /* for device list */
+ unsigned int dirty_device; /* for checkpoint data flush */
+ spinlock_t dev_lock; /* protect dirty_device */
struct mutex umount_mutex;
unsigned int shrinker_run_no;
@@ -1176,8 +1230,7 @@ static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
{
- struct timespec ts = {sbi->interval_time[type], 0};
- unsigned long interval = timespec_to_jiffies(&ts);
+ unsigned long interval = sbi->interval_time[type] * HZ;
return time_after(jiffies, sbi->last_time[type] + interval);
}
@@ -1344,6 +1397,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
return le64_to_cpu(cp->checkpoint_ver);
}
+static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type)
+{
+ if (type < F2FS_MAX_QUOTAS)
+ return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]);
+ return 0;
+}
+
static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp)
{
size_t crc_offset = le32_to_cpu(cp->checksum_offset);
@@ -1522,7 +1582,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
sbi->total_valid_block_count += (block_t)(*count);
- avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks;
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks;
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
*count -= diff;
@@ -1556,6 +1617,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
+ if (sbi->reserved_blocks &&
+ sbi->current_reserved_blocks < sbi->reserved_blocks)
+ sbi->current_reserved_blocks = min(sbi->reserved_blocks,
+ sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
f2fs_i_blocks_write(inode, count, false, true);
}
@@ -1576,6 +1641,8 @@ static inline void inode_inc_dirty_pages(struct inode *inode)
atomic_inc(&F2FS_I(inode)->dirty_pages);
inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
+ if (IS_NOQUOTA(inode))
+ inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
}
static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1592,6 +1659,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
atomic_dec(&F2FS_I(inode)->dirty_pages);
dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
+ if (IS_NOQUOTA(inode))
+ dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
}
static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -1699,10 +1768,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
return ret;
}
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(sbi, FAULT_BLOCK)) {
+ f2fs_show_injection_info(FAULT_BLOCK);
+ goto enospc;
+ }
+#endif
+
spin_lock(&sbi->stat_lock);
valid_block_count = sbi->total_valid_block_count + 1;
- if (unlikely(valid_block_count + sbi->reserved_blocks >
+ if (unlikely(valid_block_count + sbi->current_reserved_blocks >
sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
@@ -1745,6 +1821,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
sbi->total_valid_node_count--;