From 5576cd6ca555bd79e76a2d798aec44b28851d369 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Thu, 12 Jun 2014 13:25:01 +0800
Subject: f2fs: avoid unneeded SetPageUptodate in f2fs_write_end

We have already set page update in ->write_begin, so we should remove redundant
SetPageUptodate in ->write_end.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619edb5f..2eb2764173ca 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1016,7 +1016,6 @@ static int f2fs_write_end(struct file *file,
 
 	trace_f2fs_write_end(inode, pos, len, copied);
 
-	SetPageUptodate(page);
 	set_page_dirty(page);
 
 	if (pos + copied > i_size_read(inode)) {
-- 
cgit v1.2.3


From ca0a81b397b5f153ac2a9880c5e85b08b0447e4e Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Thu, 12 Jun 2014 13:31:50 +0800
Subject: f2fs: avoid to truncate non-updated page partially

After we call find_data_page in truncate_partial_data_page, we could not
guarantee this page is updated or not as error may occurred in lower layer.

We'd better check status of the page to avoid this no updated page be
writebacked to device.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d8b96275092..36fa50587bb8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -380,13 +380,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
 		return;
 
 	lock_page(page);
-	if (unlikely(page->mapping != inode->i_mapping)) {
-		f2fs_put_page(page, 1);
-		return;
-	}
+	if (unlikely(!PageUptodate(page) ||
+			page->mapping != inode->i_mapping))
+		goto out;
+
 	f2fs_wait_on_page_writeback(page, DATA);
 	zero_user(page, offset, PAGE_CACHE_SIZE - offset);
 	set_page_dirty(page);
+
+out:
 	f2fs_put_page(page, 1);
 }
 
-- 
cgit v1.2.3


From 50732df02eefb39ab414ef655979c2c9b64ad21c Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Thu, 19 Jun 2014 16:23:19 +0800
Subject: f2fs: support ->tmpfile()

Add function f2fs_tmpfile() to support O_TMPFILE file creation, and modify logic
of init_inode_metadata to enable linkat temp file.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c   | 10 ++++++++-
 fs/f2fs/namei.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a4addd72ebbd..3677f4124541 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -333,10 +333,12 @@ static int make_empty_dir(struct inode *inode,
 static struct page *init_inode_metadata(struct inode *inode,
 		struct inode *dir, const struct qstr *name)
 {
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	struct page *page;
 	int err;
 
-	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
+	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE) &&
+			inode->i_nlink) {
 		page = new_inode_page(inode, name);
 		if (IS_ERR(page))
 			return page;
@@ -370,6 +372,12 @@ static struct page *init_inode_metadata(struct inode *inode,
 	 */
 	if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
 		file_lost_pino(inode);
+		/*
+		 * If link the tmpfile to alias through linkat path,
+		 * we should remove this inode from orphan list.
+		 */
+		if (inode->i_nlink == 0)
+			remove_orphan_inode(sbi, inode->i_ino);
 		inc_nlink(inode);
 	}
 	return page;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a6bdddc33ce2..5c08c546e4c8 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched.h>
 #include <linux/ctype.h>
+#include <linux/dcache.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -487,6 +488,68 @@ out:
 	return err;
 }
 
+static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct super_block *sb = dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *inode;
+	struct page *page;
+	int err;
+
+	inode = f2fs_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &f2fs_file_inode_operations;
+	inode->i_fop = &f2fs_file_operations;
+	inode->i_mapping->a_ops = &f2fs_dblock_aops;
+
+	f2fs_lock_op(sbi);
+	err = acquire_orphan_inode(sbi);
+	if (err)
+		goto out;
+	/*
+	 * add this non-linked tmpfile to orphan list, in this way we could
+	 * remove all unused data of tmpfile after abnormal power-off.
+	 */
+	add_orphan_inode(sbi, inode->i_ino);
+
+	page = new_inode_page(inode, NULL);
+	if (IS_ERR(page)) {
+		err = PTR_ERR(page);
+		goto remove_out;
+	}
+
+	err = f2fs_init_acl(inode, dir, page);
+	if (err)
+		goto unlock_out;
+
+	err = f2fs_init_security(inode, dir, NULL, page);
+	if (err)
+		goto unlock_out;
+
+	f2fs_put_page(page, 1);
+	f2fs_unlock_op(sbi);
+
+	alloc_nid_done(sbi, inode->i_ino);
+	mark_inode_dirty(inode);
+	d_tmpfile(dentry, inode);
+	unlock_new_inode(inode);
+	return 0;
+unlock_out:
+	f2fs_put_page(page, 1);
+remove_out:
+	remove_orphan_inode(sbi, inode->i_ino);
+out:
+	f2fs_unlock_op(sbi);
+	clear_nlink(inode);
+	unlock_new_inode(inode);
+	make_bad_inode(inode);
+	iput(inode);
+	alloc_nid_failed(sbi, inode->i_ino);
+	return err;
+}
+
 const struct inode_operations f2fs_dir_inode_operations = {
 	.create		= f2fs_create,
 	.lookup		= f2fs_lookup,
@@ -497,6 +560,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
 	.rmdir		= f2fs_rmdir,
 	.mknod		= f2fs_mknod,
 	.rename		= f2fs_rename,
+	.tmpfile	= f2fs_tmpfile,
 	.getattr	= f2fs_getattr,
 	.setattr	= f2fs_setattr,
 	.get_acl	= f2fs_get_acl,
-- 
cgit v1.2.3


From b97a9b5da891ab6aff5a6a19c569c9c4c5563d48 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 20 Jun 2014 21:37:02 -0700
Subject: f2fs: introduce f2fs_do_tmpfile for code consistency

This patch adds f2fs_do_tmpfile to eliminate the redundant init_inode_metadata
flow.
Throught this, we can provide the consistent lock usage, e.g., fi->i_sem,  and
this will enable better debugging stuffs.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c   | 27 ++++++++++++++++++++++++---
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/namei.c | 30 ++++++++----------------------
 3 files changed, 33 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 3677f4124541..fb7e01ed47dc 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -337,8 +337,7 @@ static struct page *init_inode_metadata(struct inode *inode,
 	struct page *page;
 	int err;
 
-	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE) &&
-			inode->i_nlink) {
+	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
 		page = new_inode_page(inode, name);
 		if (IS_ERR(page))
 			return page;
@@ -364,7 +363,8 @@ static struct page *init_inode_metadata(struct inode *inode,
 		set_cold_node(inode, page);
 	}
 
-	init_dent_inode(name, page);
+	if (name)
+		init_dent_inode(name, page);
 
 	/*
 	 * This file should be checkpointed during fsync.
@@ -537,6 +537,27 @@ fail:
 	return err;
 }
 
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
+{
+	struct page *page;
+	int err = 0;
+
+	down_write(&F2FS_I(inode)->i_sem);
+	page = init_inode_metadata(inode, dir, NULL);
+	if (IS_ERR(page)) {
+		err = PTR_ERR(page);
+		goto fail;
+	}
+	/* we don't need to mark_inode_dirty now */
+	update_inode(inode, page);
+	f2fs_put_page(page, 1);
+
+	clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+fail:
+	up_write(&F2FS_I(inode)->i_sem);
+	return err;
+}
+
 /*
  * It only removes the dentry from the dentry page,corresponding name
  * entry in name page does not need to be touched during deletion.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 58df97e174d0..ca421a8e47d9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1136,6 +1136,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
 int update_dent_inode(struct inode *, const struct qstr *);
 int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
 void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+int f2fs_do_tmpfile(struct inode *, struct inode *);
 int f2fs_make_empty(struct inode *, struct inode *);
 bool f2fs_empty_dir(struct inode *);
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 5c08c546e4c8..1994d2e449d6 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -493,7 +493,6 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct super_block *sb = dir->i_sb;
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	struct inode *inode;
-	struct page *page;
 	int err;
 
 	inode = f2fs_new_inode(dir, mode);
@@ -508,38 +507,25 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	err = acquire_orphan_inode(sbi);
 	if (err)
 		goto out;
+
+	err = f2fs_do_tmpfile(inode, dir);
+	if (err)
+		goto release_out;
+
 	/*
 	 * add this non-linked tmpfile to orphan list, in this way we could
 	 * remove all unused data of tmpfile after abnormal power-off.
 	 */
 	add_orphan_inode(sbi, inode->i_ino);
-
-	page = new_inode_page(inode, NULL);
-	if (IS_ERR(page)) {
-		err = PTR_ERR(page);
-		goto remove_out;
-	}
-
-	err = f2fs_init_acl(inode, dir, page);
-	if (err)
-		goto unlock_out;
-
-	err = f2fs_init_security(inode, dir, NULL, page);
-	if (err)
-		goto unlock_out;
-
-	f2fs_put_page(page, 1);
 	f2fs_unlock_op(sbi);
 
 	alloc_nid_done(sbi, inode->i_ino);
-	mark_inode_dirty(inode);
 	d_tmpfile(dentry, inode);
 	unlock_new_inode(inode);
 	return 0;
-unlock_out:
-	f2fs_put_page(page, 1);
-remove_out:
-	remove_orphan_inode(sbi, inode->i_ino);
+
+release_out:
+	release_orphan_inode(sbi);
 out:
 	f2fs_unlock_op(sbi);
 	clear_nlink(inode);
-- 
cgit v1.2.3


From a014e037be26b5c9ee6fb4e49e7804141cf3bb89 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 20 Jun 2014 21:44:02 -0700
Subject: f2fs: clean up an unused parameter and assignment

This patch cleans up simple unnecessary codes.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c   |  2 +-
 fs/f2fs/f2fs.h  |  2 +-
 fs/f2fs/namei.c | 26 +++++++++-----------------
 fs/f2fs/node.c  |  2 +-
 4 files changed, 12 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index fb7e01ed47dc..087b03dbca9b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -338,7 +338,7 @@ static struct page *init_inode_metadata(struct inode *inode,
 	int err;
 
 	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
-		page = new_inode_page(inode, name);
+		page = new_inode_page(inode);
 		if (IS_ERR(page))
 			return page;
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ca421a8e47d9..3f0291b840ef 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1174,7 +1174,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t);
 int truncate_xattr_node(struct inode *, struct page *);
 int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
 void remove_inode_page(struct inode *);
-struct page *new_inode_page(struct inode *, const struct qstr *);
+struct page *new_inode_page(struct inode *);
 struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
 void ra_node_page(struct f2fs_sb_info *, nid_t);
 struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1994d2e449d6..89b0bd9420cc 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -23,14 +23,13 @@
 
 static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 {
-	struct super_block *sb = dir->i_sb;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	nid_t ino;
 	struct inode *inode;
 	bool nid_free = false;
 	int err;
 
-	inode = new_inode(sb);
+	inode = new_inode(dir->i_sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
@@ -103,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
 static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 						bool excl)
 {
-	struct super_block *sb = dir->i_sb;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	struct inode *inode;
 	nid_t ino = 0;
 	int err;
@@ -147,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 		struct dentry *dentry)
 {
 	struct inode *inode = old_dentry->d_inode;
-	struct super_block *sb = dir->i_sb;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	int err;
 
 	f2fs_balance_fs(sbi);
@@ -208,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 
 static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 {
-	struct super_block *sb = dir->i_sb;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	struct inode *inode = dentry->d_inode;
 	struct f2fs_dir_entry *de;
 	struct page *page;
@@ -243,8 +239,7 @@ fail:
 static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 					const char *symname)
 {
-	struct super_block *sb = dir->i_sb;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	struct inode *inode;
 	size_t symlen = strlen(symname) + 1;
 	int err;
@@ -331,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
 static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 				umode_t mode, dev_t rdev)
 {
-	struct super_block *sb = dir->i_sb;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	struct inode *inode;
 	int err = 0;
 
@@ -370,8 +364,7 @@ out:
 static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			struct inode *new_dir, struct dentry *new_dentry)
 {
-	struct super_block *sb = old_dir->i_sb;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);
 	struct inode *old_inode = old_dentry->d_inode;
 	struct inode *new_inode = new_dentry->d_inode;
 	struct page *old_dir_page;
@@ -490,8 +483,7 @@ out:
 
 static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-	struct super_block *sb = dir->i_sb;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 	struct inode *inode;
 	int err;
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4b697ccc9b0c..de709f0a445e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -845,7 +845,7 @@ void remove_inode_page(struct inode *inode)
 	truncate_node(&dn);
 }
 
-struct page *new_inode_page(struct inode *inode, const struct qstr *name)
+struct page *new_inode_page(struct inode *inode)
 {
 	struct dnode_of_data dn;
 
-- 
cgit v1.2.3


From aec71382c68135261ef6efc3d8a96b7149939446 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 24 Jun 2014 09:18:20 +0800
Subject: f2fs: refactor flush_nat_entries codes for reducing NAT writes

Although building NAT journal in cursum reduce the read/write work for NAT
block, but previous design leave us lower performance when write checkpoint
frequently for these cases:
1. if journal in cursum has already full, it's a bit of waste that we flush all
   nat entries to page for persistence, but not to cache any entries.
2. if journal in cursum is not full, we fill nat entries to journal util
   journal is full, then flush the left dirty entries to disk without merge
   journaled entries, so these journaled entries may be flushed to disk at next
   checkpoint but lost chance to flushed last time.

In this patch we merge dirty entries located in same NAT block to nat entry set,
and linked all set to list, sorted ascending order by entries' count of set.
Later we flush entries in sparse set into journal as many as we can, and then
flush merged entries to disk. In this way we can not only gain in performance,
but also save lifetime of flash device.

In my testing environment, it shows this patch can help to reduce NAT block
writes obviously. In hard disk test case: cost time of fsstress is stablely
reduced by about 5%.

1. virtual machine + hard disk:
fsstress -p 20 -n 200 -l 5
		node num	cp count	nodes/cp
based		4599.6		1803.0		2.551
patched		2714.6		1829.6		1.483

2. virtual machine + 32g micro SD card:
fsstress -p 20 -n 200 -l 1 -w -f chown=0 -f creat=4 -f dwrite=0
-f fdatasync=4 -f fsync=4 -f link=0 -f mkdir=4 -f mknod=4 -f rename=5
-f rmdir=5 -f symlink=0 -f truncate=4 -f unlink=5 -f write=0 -S

		node num	cp count	nodes/cp
based		84.5		43.7		1.933
patched		49.2		40.0		1.23

Our latency of merging op shows not bad when handling extreme case like:
merging a great number of dirty nats:
latency(ns)	dirty nat count
3089219		24922
5129423		27422
4000250		24523

change log from v1:
 o fix wrong logic in add_nat_entry when grab a new nat entry set.
 o swith to create slab cache in create_node_manager_caches.
 o use GFP_ATOMIC instead of GFP_NOFS to avoid potential long latency.

change log from v2:
 o make comment position more appropriate suggested by Jaegeuk Kim.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h |   2 +
 fs/f2fs/node.c | 263 +++++++++++++++++++++++++++++++++++++++------------------
 fs/f2fs/node.h |   7 ++
 3 files changed, 188 insertions(+), 84 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3f0291b840ef..ec480b1a6e33 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -256,6 +256,8 @@ struct f2fs_nm_info {
 	unsigned int nat_cnt;		/* the # of cached nat entries */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
+	struct list_head nat_entry_set;	/* nat entry set list */
+	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
 
 	/* free node ids management */
 	struct radix_tree_root free_nid_root;/* root of the free_nid cache */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index de709f0a445e..a90f51d32482 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
 
 static struct kmem_cache *nat_entry_slab;
 static struct kmem_cache *free_nid_slab;
+static struct kmem_cache *nat_entry_set_slab;
 
 bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 {
@@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
 
 	/* get current nat block page with lock */
 	src_page = get_meta_page(sbi, src_off);
-
-	/* Dirty src_page means that it is already the new target NAT page. */
-	if (PageDirty(src_page))
-		return src_page;
-
 	dst_page = grab_meta_page(sbi, dst_off);
+	f2fs_bug_on(PageDirty(src_page));
 
 	src_addr = page_address(src_page);
 	dst_addr = page_address(dst_page);
@@ -1744,7 +1741,90 @@ skip:
 	return err;
 }
 
-static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
+static struct nat_entry_set *grab_nat_entry_set(void)
+{
+	struct nat_entry_set *nes =
+			f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
+
+	nes->entry_cnt = 0;
+	INIT_LIST_HEAD(&nes->set_list);
+	INIT_LIST_HEAD(&nes->entry_list);
+	return nes;
+}
+
+static void release_nat_entry_set(struct nat_entry_set *nes,
+						struct f2fs_nm_info *nm_i)
+{
+	f2fs_bug_on(!list_empty(&nes->entry_list));
+
+	nm_i->dirty_nat_cnt -= nes->entry_cnt;
+	list_del(&nes->set_list);
+	kmem_cache_free(nat_entry_set_slab, nes);
+}
+
+static void adjust_nat_entry_set(struct nat_entry_set *nes,
+						struct list_head *head)
+{
+	struct nat_entry_set *next = nes;
+
+	if (list_is_last(&nes->set_list, head))
+		return;
+
+	list_for_each_entry_continue(next, head, set_list)
+		if (nes->entry_cnt <= next->entry_cnt)
+			break;
+
+	list_move_tail(&nes->set_list, &next->set_list);
+}
+
+static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
+{
+	struct nat_entry_set *nes;
+	nid_t start_nid = START_NID(ne->ni.nid);
+
+	list_for_each_entry(nes, head, set_list) {
+		if (nes->start_nid == start_nid) {
+			list_move_tail(&ne->list, &nes->entry_list);
+			nes->entry_cnt++;
+			adjust_nat_entry_set(nes, head);
+			return;
+		}
+	}
+
+	nes = grab_nat_entry_set();
+
+	nes->start_nid = start_nid;
+	list_move_tail(&ne->list, &nes->entry_list);
+	nes->entry_cnt++;
+	list_add(&nes->set_list, head);
+}
+
+static void merge_nats_in_set(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct list_head *dirty_list = &nm_i->dirty_nat_entries;
+	struct list_head *set_list = &nm_i->nat_entry_set;
+	struct nat_entry *ne, *tmp;
+
+	write_lock(&nm_i->nat_tree_lock);
+	list_for_each_entry_safe(ne, tmp, dirty_list, list) {
+		if (nat_get_blkaddr(ne) == NEW_ADDR)
+			continue;
+		add_nat_entry(ne, set_list);
+		nm_i->dirty_nat_cnt++;
+	}
+	write_unlock(&nm_i->nat_tree_lock);
+}
+
+static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
+{
+	if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
+		return true;
+	else
+		return false;
+}
+
+static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1752,12 +1832,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
 	int i;
 
 	mutex_lock(&curseg->curseg_mutex);
-
-	if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
-		mutex_unlock(&curseg->curseg_mutex);
-		return false;
-	}
-
 	for (i = 0; i < nats_in_cursum(sum); i++) {
 		struct nat_entry *ne;
 		struct f2fs_nat_entry raw_ne;
@@ -1767,23 +1841,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
 retry:
 		write_lock(&nm_i->nat_tree_lock);
 		ne = __lookup_nat_cache(nm_i, nid);
-		if (ne) {
-			__set_nat_cache_dirty(nm_i, ne);
-			write_unlock(&nm_i->nat_tree_lock);
-			continue;
-		}
+		if (ne)
+			goto found;
+
 		ne = grab_nat_entry(nm_i, nid);
 		if (!ne) {
 			write_unlock(&nm_i->nat_tree_lock);
 			goto retry;
 		}
 		node_info_from_raw_nat(&ne->ni, &raw_ne);
+found:
 		__set_nat_cache_dirty(nm_i, ne);
 		write_unlock(&nm_i->nat_tree_lock);
 	}
 	update_nats_in_cursum(sum, -i);
 	mutex_unlock(&curseg->curseg_mutex);
-	return true;
 }
 
 /*
@@ -1794,80 +1866,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
 	struct f2fs_summary_block *sum = curseg->sum_blk;
-	struct nat_entry *ne, *cur;
-	struct page *page = NULL;
-	struct f2fs_nat_block *nat_blk = NULL;
-	nid_t start_nid = 0, end_nid = 0;
-	bool flushed;
-
-	flushed = flush_nats_in_journal(sbi);
+	struct nat_entry_set *nes, *tmp;
+	struct list_head *head = &nm_i->nat_entry_set;
+	bool to_journal = true;
 
-	if (!flushed)
-		mutex_lock(&curseg->curseg_mutex);
+	/* merge nat entries of dirty list to nat entry set temporarily */
+	merge_nats_in_set(sbi);
 
-	/* 1) flush dirty nat caches */
-	list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
-		nid_t nid;
-		struct f2fs_nat_entry raw_ne;
-		int offset = -1;
-
-		if (nat_get_blkaddr(ne) == NEW_ADDR)
-			continue;
-
-		nid = nat_get_nid(ne);
+	/*
+	 * if there are no enough space in journal to store dirty nat
+	 * entries, remove all entries from journal and merge them
+	 * into nat entry set.
+	 */
+	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
+		remove_nats_in_journal(sbi);
 
-		if (flushed)
-			goto to_nat_page;
+		/*
+		 * merge nat entries of dirty list to nat entry set temporarily
+		 */
+		merge_nats_in_set(sbi);
+	}
 
-		/* if there is room for nat enries in curseg->sumpage */
-		offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
-		if (offset >= 0) {
-			raw_ne = nat_in_journal(sum, offset);
-			goto flush_now;
-		}
-to_nat_page:
-		if (!page || (start_nid > nid || nid > end_nid)) {
-			if (page) {
-				f2fs_put_page(page, 1);
-				page = NULL;
-			}
-			start_nid = START_NID(nid);
-			end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
+	if (!nm_i->dirty_nat_cnt)
+		return;
 
-			/*
-			 * get nat block with dirty flag, increased reference
-			 * count, mapped and lock
-			 */
+	/*
+	 * there are two steps to flush nat entries:
+	 * #1, flush nat entries to journal in current hot data summary block.
+	 * #2, flush nat entries to nat page.
+	 */
+	list_for_each_entry_safe(nes, tmp, head, set_list) {
+		struct f2fs_nat_block *nat_blk;
+		struct nat_entry *ne, *cur;
+		struct page *page;
+		nid_t start_nid = nes->start_nid;
+
+		if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
+			to_journal = false;
+
+		if (to_journal) {
+			mutex_lock(&curseg->curseg_mutex);
+		} else {
 			page = get_next_nat_page(sbi, start_nid);
 			nat_blk = page_address(page);
+			f2fs_bug_on(!nat_blk);
 		}
 
-		f2fs_bug_on(!nat_blk);
-		raw_ne = nat_blk->entries[nid - start_nid];
-flush_now:
-		raw_nat_from_node_info(&raw_ne, &ne->ni);
-
-		if (offset < 0) {
-			nat_blk->entries[nid - start_nid] = raw_ne;
-		} else {
-			nat_in_journal(sum, offset) = raw_ne;
-			nid_in_journal(sum, offset) = cpu_to_le32(nid);
-		}
+		/* flush dirty nats in nat entry set */
+		list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
+			struct f2fs_nat_entry *raw_ne;
+			nid_t nid = nat_get_nid(ne);
+			int offset;
+
+			if (to_journal) {
+				offset = lookup_journal_in_cursum(sum,
+							NAT_JOURNAL, nid, 1);
+				f2fs_bug_on(offset < 0);
+				raw_ne = &nat_in_journal(sum, offset);
+				nid_in_journal(sum, offset) = cpu_to_le32(nid);
+			} else {
+				raw_ne = &nat_blk->entries[nid - start_nid];
+			}
+			raw_nat_from_node_info(raw_ne, &ne->ni);
 
-		if (nat_get_blkaddr(ne) == NULL_ADDR &&
+			if (nat_get_blkaddr(ne) == NULL_ADDR &&
 				add_free_nid(sbi, nid, false) <= 0) {
-			write_lock(&nm_i->nat_tree_lock);
-			__del_from_nat_cache(nm_i, ne);
-			write_unlock(&nm_i->nat_tree_lock);
-		} else {
-			write_lock(&nm_i->nat_tree_lock);
-			__clear_nat_cache_dirty(nm_i, ne);
-			write_unlock(&nm_i->nat_tree_lock);
+				write_lock(&nm_i->nat_tree_lock);
+				__del_from_nat_cache(nm_i, ne);
+				write_unlock(&nm_i->nat_tree_lock);
+			} else {
+				write_lock(&nm_i->nat_tree_lock);
+				__clear_nat_cache_dirty(nm_i, ne);
+				write_unlock(&nm_i->nat_tree_lock);
+			}
 		}
+
+		if (to_journal)
+			mutex_unlock(&curseg->curseg_mutex);
+		else
+			f2fs_put_page(page, 1);
+
+		release_nat_entry_set(nes, nm_i);
 	}
-	if (!flushed)
-		mutex_unlock(&curseg->curseg_mutex);
-	f2fs_put_page(page, 1);
+
+	f2fs_bug_on(!list_empty(head));
+	f2fs_bug_on(nm_i->dirty_nat_cnt);
 }
 
 static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1896,6 +1979,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
 	INIT_LIST_HEAD(&nm_i->nat_entries);
 	INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
+	INIT_LIST_HEAD(&nm_i->nat_entry_set);
 
 	mutex_init(&nm_i->build_lock);
 	spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1976,19 +2060,30 @@ int __init create_node_manager_caches(void)
 	nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
 			sizeof(struct nat_entry));
 	if (!nat_entry_slab)
-		return -ENOMEM;
+		goto fail;
 
 	free_nid_slab = f2fs_kmem_cache_create("free_nid",
 			sizeof(struct free_nid));
-	if (!free_nid_slab) {
-		kmem_cache_destroy(nat_entry_slab);
-		return -ENOMEM;
-	}
+	if (!free_nid_slab)
+		goto destory_nat_entry;
+
+	nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
+			sizeof(struct nat_entry_set));
+	if (!nat_entry_set_slab)
+		goto destory_free_nid;
 	return 0;
+
+destory_free_nid:
+	kmem_cache_destroy(free_nid_slab);
+destory_nat_entry:
+	kmem_cache_destroy(nat_entry_slab);
+fail:
+	return -ENOMEM;
 }
 
 void destroy_node_manager_caches(void)
 {
+	kmem_cache_destroy(nat_entry_set_slab);
 	kmem_cache_destroy(free_nid_slab);
 	kmem_cache_destroy(nat_entry_slab);
 }
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112cd1c8..8a116a407599 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@ enum mem_type {
 	DIRTY_DENTS	/* indicates dirty dentry pages */
 };
 
+struct nat_entry_set {
+	struct list_head set_list;	/* link with all nat sets */
+	struct list_head entry_list;	/* link with dirty nat entries */
+	nid_t start_nid;		/* start nid of nats in set */
+	unsigned int entry_cnt;		/* the # of nat entries in set */
+};
+
 /*
  * For free nid mangement
  */
-- 
cgit v1.2.3


From b434babf854eab82397759e98425423cd9bc4786 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Mon, 23 Jun 2014 18:39:15 +0200
Subject: f2fs: replace count*size kzalloc by kcalloc

kcalloc manages count*sizeof overflow.

Cc: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Cc: linux-f2fs-devel@lists.sourceforge.net
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d04613df710a..a4f8375f6ae6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1703,7 +1703,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 	struct curseg_info *array;
 	int i;
 
-	array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
+	array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
 	if (!array)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 1256010ab1a372c8ffc01fc37ff767d8bc15378b Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 24 Jun 2014 14:16:24 +0800
Subject: f2fs: reduce region of f2fs_lock_op covered for better concurrency

In our rename process, region of f2fs_lock_op covered is too big as some of the
code like f2fs_empty_dir/f2fs_find_entry are not needed to protect by this lock.

So in the extreme case like doing checkpoint when we rename old inode to exist
inode in a large directory could cause lower concurrency.

Let's reduce the region of f2fs_lock_op to fix this.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 89b0bd9420cc..1b3cae03c24f 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -387,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			goto out_old;
 	}
 
-	f2fs_lock_op(sbi);
-
 	if (new_inode) {
 
 		err = -ENOTEMPTY;
@@ -401,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (!new_entry)
 			goto out_dir;
 
+		f2fs_lock_op(sbi);
+
 		err = acquire_orphan_inode(sbi);
 		if (err)
 			goto put_out_dir;
@@ -429,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		update_inode_page(old_inode);
 		update_inode_page(new_inode);
 	} else {
+		f2fs_lock_op(sbi);
+
 		err = f2fs_add_link(new_dentry, old_inode);
-		if (err)
+		if (err) {
+			f2fs_unlock_op(sbi);
 			goto out_dir;
+		}
 
 		if (old_dir_entry) {
 			inc_nlink(new_dir);
@@ -466,6 +470,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return 0;
 
 put_out_dir:
+	f2fs_unlock_op(sbi);
 	kunmap(new_page);
 	f2fs_put_page(new_page, 0);
 out_dir:
@@ -473,7 +478,6 @@ out_dir:
 		kunmap(old_dir_page);
 		f2fs_put_page(old_dir_page, 0);
 	}
-	f2fs_unlock_op(sbi);
 out_old:
 	kunmap(old_page);
 	f2fs_put_page(old_page, 0);
-- 
cgit v1.2.3


From 34e6d456da4a61f655655a6d431657da5753913f Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Tue, 24 Jun 2014 18:18:14 +0800
Subject: f2fs: remove the redundant validation check of acl

kernel side(xx_init_acl), the acl is get/cloned from the parent dir's,
which is credible. So remove the redundant validation check of acl
here.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/acl.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index dbe2141d10ad..83b9b5a8d112 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 	size_t size = 0;
 	int error;
 
-	if (acl) {
-		error = posix_acl_valid(acl);
-		if (error < 0)
-			return error;
-	}
-
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
-- 
cgit v1.2.3


From 1c3bb97899c91d420562c51882ee81a9d7c35306 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Fri, 27 Jun 2014 17:57:04 +0800
Subject: f2fs: remove the needless point-cast

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 087b03dbca9b..bcbfbc467f46 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -298,14 +298,13 @@ static int make_empty_dir(struct inode *inode,
 	struct page *dentry_page;
 	struct f2fs_dentry_block *dentry_blk;
 	struct f2fs_dir_entry *de;
-	void *kaddr;
 
 	dentry_page = get_new_data_page(inode, page, 0, true);
 	if (IS_ERR(dentry_page))
 		return PTR_ERR(dentry_page);
 
-	kaddr = kmap_atomic(dentry_page);
-	dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+	dentry_blk = kmap_atomic(dentry_page);
 
 	de = &dentry_blk->dentry[0];
 	de->name_len = cpu_to_le16(1);
@@ -323,7 +322,7 @@ static int make_empty_dir(struct inode *inode,
 
 	test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
 	test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
-	kunmap_atomic(kaddr);
+	kunmap_atomic(dentry_blk);
 
 	set_page_dirty(dentry_page);
 	f2fs_put_page(dentry_page, 1);
@@ -570,14 +569,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	struct address_space *mapping = page->mapping;
 	struct inode *dir = mapping->host;
 	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
-	void *kaddr = page_address(page);
 	int i;
 
 	lock_page(page);
 	f2fs_wait_on_page_writeback(page, DATA);
 
-	dentry_blk = (struct f2fs_dentry_block *)kaddr;
-	bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
+	dentry_blk = page_address(page);
+	bit_pos = dentry - dentry_blk->dentry;
 	for (i = 0; i < slots; i++)
 		test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
 
@@ -632,7 +630,6 @@ bool f2fs_empty_dir(struct inode *dir)
 	unsigned long nblock = dir_blocks(dir);
 
 	for (bidx = 0; bidx < nblock; bidx++) {
-		void *kaddr;
 		dentry_page = get_lock_data_page(dir, bidx);
 		if (IS_ERR(dentry_page)) {
 			if (PTR_ERR(dentry_page) == -ENOENT)
@@ -641,8 +638,8 @@ bool f2fs_empty_dir(struct inode *dir)
 				return false;
 		}
 
-		kaddr = kmap_atomic(dentry_page);
-		dentry_blk = (struct f2fs_dentry_block *)kaddr;
+
+		dentry_blk = kmap_atomic(dentry_page);
 		if (bidx == 0)
 			bit_pos = 2;
 		else
@@ -650,7 +647,7 @@ bool f2fs_empty_dir(struct inode *dir)
 		bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
 						NR_DENTRY_IN_BLOCK,
 						bit_pos);
-		kunmap_atomic(kaddr);
+		kunmap_atomic(dentry_blk);
 
 		f2fs_put_page(dentry_page, 1);
 
-- 
cgit v1.2.3


From eee6160f2e324f0f6a626159fa1120caad31a6be Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Tue, 24 Jun 2014 18:21:23 +0800
Subject: f2fs: arguments cleanup of finding file flow functions

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c  | 28 +++++++++++++---------------
 fs/f2fs/f2fs.h |  2 +-
 fs/f2fs/hash.c |  4 +++-
 3 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index bcbfbc467f46..e84e8807c651 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level,
 	return bidx;
 }
 
-static bool early_match_name(const char *name, size_t namelen,
-			f2fs_hash_t namehash, struct f2fs_dir_entry *de)
+static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
+				struct f2fs_dir_entry *de)
 {
 	if (le16_to_cpu(de->name_len) != namelen)
 		return false;
@@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen,
 }
 
 static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
-			const char *name, size_t namelen, int *max_slots,
+			struct qstr *name, int *max_slots,
 			f2fs_hash_t namehash, struct page **res_page)
 {
 	struct f2fs_dir_entry *de;
@@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
 			continue;
 		}
 		de = &dentry_blk->dentry[bit_pos];
-		if (early_match_name(name, namelen, namehash, de)) {
+		if (early_match_name(name->len, namehash, de)) {
 			if (!memcmp(dentry_blk->filename[bit_pos],
-							name, namelen)) {
+							name->name,
+							name->len)) {
 				*res_page = dentry_page;
 				goto found;
 			}
@@ -132,10 +133,10 @@ found:
 }
 
 static struct f2fs_dir_entry *find_in_level(struct inode *dir,
-		unsigned int level, const char *name, size_t namelen,
+			unsigned int level, struct qstr *name,
 			f2fs_hash_t namehash, struct page **res_page)
 {
-	int s = GET_DENTRY_SLOTS(namelen);
+	int s = GET_DENTRY_SLOTS(name->len);
 	unsigned int nbucket, nblock;
 	unsigned int bidx, end_block;
 	struct page *dentry_page;
@@ -160,8 +161,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 			continue;
 		}
 
-		de = find_in_block(dentry_page, name, namelen,
-					&max_slots, namehash, res_page);
+		de = find_in_block(dentry_page, name, &max_slots,
+					namehash, res_page);
 		if (de)
 			break;
 
@@ -187,8 +188,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
 			struct qstr *child, struct page **res_page)
 {
-	const char *name = child->name;
-	size_t namelen = child->len;
 	unsigned long npages = dir_blocks(dir);
 	struct f2fs_dir_entry *de = NULL;
 	f2fs_hash_t name_hash;
@@ -200,12 +199,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
 
 	*res_page = NULL;
 
-	name_hash = f2fs_dentry_hash(name, namelen);
+	name_hash = f2fs_dentry_hash(child);
 	max_depth = F2FS_I(dir)->i_current_depth;
 
 	for (level = 0; level < max_depth; level++) {
-		de = find_in_level(dir, level, name,
-				namelen, name_hash, res_page);
+		de = find_in_level(dir, level, child, name_hash, res_page);
 		if (de)
 			break;
 	}
@@ -460,7 +458,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
 	int err = 0;
 	int i;
 
-	dentry_hash = f2fs_dentry_hash(name->name, name->len);
+	dentry_hash = f2fs_dentry_hash(name);
 	level = 0;
 	current_depth = F2FS_I(dir)->i_current_depth;
 	if (F2FS_I(dir)->chash == dentry_hash) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ec480b1a6e33..ae3b4ac7a254 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1158,7 +1158,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
 /*
  * hash.c
  */
-f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
 
 /*
  * node.c
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 6eb8d269b53b..948d17bf7281 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
 		*buf++ = pad;
 }
 
-f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
 {
 	__u32 hash;
 	f2fs_hash_t f2fs_hash;
 	const char *p;
 	__u32 in[8], buf[4];
+	const char *name = name_info->name;
+	size_t len = name_info->len;
 
 	if ((len <= 2) && (name[0] == '.') &&
 		(name[1] == '.' || name[1] == '\0'))
-- 
cgit v1.2.3


From 3aab8f828ef358ae92545294a14cd52d510cc585 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Wed, 2 Jul 2014 13:25:04 +0800
Subject: f2fs: introduce f2fs_write_failed to handle error case when write

When we fail in ->write_begin()/->direct_IO(), our allocated node block in disk
and page cache are still kept, despite these may not be used again.

This patch introduce f2fs_write_failed() to handle the error case of these two
interfaces, it will truncate page cache and blocks of this file according to
i_size.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2eb2764173ca..05154d6de49a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -914,6 +914,16 @@ skip_write:
 	return 0;
 }
 
+static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+{
+	struct inode *inode = mapping->host;
+
+	if (to > inode->i_size) {
+		truncate_pagecache(inode, inode->i_size);
+		truncate_blocks(inode, inode->i_size);
+	}
+}
+
 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
@@ -931,11 +941,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 repeat:
 	err = f2fs_convert_inline_data(inode, pos + len);
 	if (err)
-		return err;
+		goto fail;
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page)
-		return -ENOMEM;
+	if (!page) {
+		err = -ENOMEM;
+		goto fail;
+	}
 
 	/* to avoid latency during memory pressure */
 	unlock_page(page);
@@ -949,10 +961,9 @@ repeat:
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = f2fs_reserve_block(&dn, index);
 	f2fs_unlock_op(sbi);
-
 	if (err) {
 		f2fs_put_page(page, 0);
-		return err;
+		goto fail;
 	}
 inline_data:
 	lock_page(page);
@@ -982,19 +993,20 @@ inline_data:
 			err = f2fs_read_inline_data(inode, page);
 			if (err) {
 				page_cache_release(page);
-				return err;
+				goto fail;
 			}
 		} else {
 			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
 							READ_SYNC);
 			if (err)
-				return err;
+				goto fail;
 		}
 
 		lock_page(page);
 		if (unlikely(!PageUptodate(page))) {
 			f2fs_put_page(page, 1);
-			return -EIO;
+			err = -EIO;
+			goto fail;
 		}
 		if (unlikely(page->mapping != mapping)) {
 			f2fs_put_page(page, 1);
@@ -1005,6 +1017,9 @@ out:
 	SetPageUptodate(page);
 	clear_cold_data(page);
 	return 0;
+fail:
+	f2fs_write_failed(mapping, pos + len);
+	return err;
 }
 
 static int f2fs_write_end(struct file *file,
@@ -1049,7 +1064,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
+	struct address_space *mapping = file->f_mapping;
+	struct inode *inode = mapping->host;
+	size_t count = iov_iter_count(iter);
+	int err;
 
 	/* Let buffer I/O handle the inline data case. */
 	if (f2fs_has_inline_data(inode))
@@ -1061,8 +1079,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 	/* clear fsync mark to recover these blocks */
 	fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
 
-	return blockdev_direct_IO(rw, iocb, inode, iter, offset,
-				  get_data_block);
+	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
+	if (err < 0 && (rw & WRITE))
+		f2fs_write_failed(mapping, offset + count);
+	return err;
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
-- 
cgit v1.2.3


From 6b2920a513ec9972f7b80d219fcf6f59130a1f31 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Mon, 7 Jul 2014 11:21:59 +0800
Subject: f2fs: use inner macro and function to clean up codes

In this patch we use below inner macro and function to clean up codes.
1. ADDRS_PER_PAGE
2. SM_I
3. f2fs_readonly

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c    | 3 +--
 fs/f2fs/segment.c | 9 ++++-----
 fs/f2fs/super.c   | 7 +++----
 3 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 36fa50587bb8..7c652b3e1511 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -272,8 +272,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 			}
 		}
 
-		end_offset = IS_INODE(dn.node_page) ?
-			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 
 		/* find data/hole in dnode block */
 		for (; dn.ofs_in_node < end_offset;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a4f8375f6ae6..8a6e57d83c79 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -272,13 +272,13 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 		return -ENOMEM;
 	spin_lock_init(&fcc->issue_lock);
 	init_waitqueue_head(&fcc->flush_wait_queue);
-	sbi->sm_info->cmd_control_info = fcc;
+	SM_I(sbi)->cmd_control_info = fcc;
 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(fcc->f2fs_issue_flush)) {
 		err = PTR_ERR(fcc->f2fs_issue_flush);
 		kfree(fcc);
-		sbi->sm_info->cmd_control_info = NULL;
+		SM_I(sbi)->cmd_control_info = NULL;
 		return err;
 	}
 
@@ -287,13 +287,12 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 
 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
 {
-	struct flush_cmd_control *fcc =
-				sbi->sm_info->cmd_control_info;
+	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
 
 	if (fcc && fcc->f2fs_issue_flush)
 		kthread_stop(fcc->f2fs_issue_flush);
 	kfree(fcc);
-	sbi->sm_info->cmd_control_info = NULL;
+	SM_I(sbi)->cmd_control_info = NULL;
 }
 
 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8f96d9372ade..870fe199bafb 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -615,7 +615,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	 * Previous and new state of filesystem is RO,
 	 * so skip checking GC and FLUSH_MERGE conditions.
 	 */
-	if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+	if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
 		goto skip;
 
 	/*
@@ -642,8 +642,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	 */
 	if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
 		destroy_flush_cmd_control(sbi);
-	} else if (test_opt(sbi, FLUSH_MERGE) &&
-					!sbi->sm_info->cmd_control_info) {
+	} else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
 		err = create_flush_cmd_control(sbi);
 		if (err)
 			goto restore_gc;
@@ -1082,7 +1081,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	 * If filesystem is not mounted as read-only then
 	 * do start the gc_thread.
 	 */
-	if (!(sb->s_flags & MS_RDONLY)) {
+	if (!f2fs_readonly(sb)) {
 		/* After POR, we can run background GC thread.*/
 		err = start_gc_thread(sbi);
 		if (err)
-- 
cgit v1.2.3


From 81e366f87f52c62671fb1d8050572e68dbcf1d22 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Thu, 10 Jul 2014 12:37:46 +0800
Subject: f2fs: check name_len of dir entry to prevent from deadloop

We assume that modification of some special application could result in zeroed
name_len, or it is consciously made by somebody. We will deadloop in
find_in_block when name_len of dir entry is zero.

This patch is added for preventing deadloop in above scenario.

change log from v1:
 o use f2fs_bug_on rather than break out from searching dir entry suggested by
Jaegeuk Kim.

Jaegeuk describe:
"Well, IMO, it would be good to add f2fs_bug_on() here with a specific comment.
In the current phase of f2fs, it is more important to investigate the file
system bugs, rather than workarounds for any corrupted images.
And, definitely it needs to stop the kernel if any corrupted image was mounted,
so that we can figure out where the bugs are occurred."

Suggested-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index e84e8807c651..bcf893c3d903 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -121,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
 			*max_slots = max_len;
 			max_len = 0;
 		}
+
+		/*
+		 * For the most part, it should be a bug when name_len is zero.
+		 * We stop here for figuring out where the bugs are occurred.
+		 */
+		f2fs_bug_on(!de->name_len);
+
 		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 	}
 
-- 
cgit v1.2.3


From 7a6c76b1b2f2c9555571647f07fc7ccada0a1b0a Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Fri, 11 Jul 2014 18:35:43 +0800
Subject: f2fs: cleanup the needless return of f2fs_create_root_stats

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index b52c12cf5873..3f99266ccc17 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)
 
 	f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
 	if (!f2fs_debugfs_root)
-		goto bail;
+		return;
 
 	file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
 			NULL, &stat_fops);
-	if (!file)
-		goto free_debugfs_dir;
-
-	return;
-
-free_debugfs_dir:
-	debugfs_remove(f2fs_debugfs_root);
-
-bail:
-	f2fs_debugfs_root = NULL;
-	return;
+	if (!file) {
+		debugfs_remove(f2fs_debugfs_root);
+		f2fs_debugfs_root = NULL;
+	}
 }
 
 void f2fs_destroy_root_stats(void)
-- 
cgit v1.2.3


From 4b2868aa4f07eeb64e96c2e0823387c426eca356 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Fri, 11 Jul 2014 18:35:44 +0800
Subject: f2fs: remove the unused stat_lock

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ae3b4ac7a254..8f507d4c0f71 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1298,7 +1298,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *);
 struct f2fs_stat_info {
 	struct list_head stat_list;
 	struct f2fs_sb_info *sbi;
-	struct mutex stat_lock;
 	int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
 	int main_area_segs, main_area_sections, main_area_zones;
 	int hit_ext, total_ext;
-- 
cgit v1.2.3


From f1121ab0ba9ab9c1592049533b511877600f409e Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Mon, 14 Jul 2014 16:45:15 +0800
Subject: f2fs: reduce searching region of segmap when free section

In __set_test_and_free we will check whether all segment are free in one section
When free one segment, in order to set section to free status.
But the searching region of segmap is from start segno to last segno of f2fs,
it's not necessary. So let's just only check all segment bitmap of target
section.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7091204680f4..ee5c75e08d9c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
 	if (test_and_clear_bit(segno, free_i->free_segmap)) {
 		free_i->free_segments++;
 
-		next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi),
-								start_segno);
+		next = find_next_bit(free_i->free_segmap,
+				start_segno + sbi->segs_per_sec, start_segno);
 		if (next >= start_segno + sbi->segs_per_sec) {
 			if (test_and_clear_bit(secno, free_i->free_secmap))
 				free_i->free_sections++;
-- 
cgit v1.2.3


From 79e35dc3c23dd2ac9f8681361026c82b71a0b006 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Sat, 12 Jul 2014 20:10:00 +0800
Subject: f2fs: add f2fs_balance_fs for direct IO

Otherwise, if a large amount of direct IO writes were done, the
segment allocation may be failed because no enough segments are gced.

Changes:

v2: add f2fs_balance_fs into __get_data_block instead of f2fs_direct_IO.

Signed-off-by: Huang, Ying <ying.huang@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 05154d6de49a..c77c66723d70 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -626,8 +626,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
 	if (check_extent_cache(inode, pgofs, bh_result))
 		goto out;
 
-	if (create)
+	if (create) {
+		f2fs_balance_fs(sbi);
 		f2fs_lock_op(sbi);
+	}
 
 	/* When reading holes, we need its node page */
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
-- 
cgit v1.2.3


From 32f9bc25cbda00410e2379c58ae027e88bf24770 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Sat, 12 Jul 2014 19:13:54 +0800
Subject: f2fs: support ->rename2()

Now new interface ->rename2() is added to VFS, here are related description:
https://lkml.org/lkml/2014/2/7/873
https://lkml.org/lkml/2014/2/7/758

This patch adds function f2fs_rename2() to support ->rename2() including
handling both RENAME_EXCHANGE and RENAME_NOREPLACE flag.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 164 insertions(+)

(limited to 'fs')

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1b3cae03c24f..27b03776ffd2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -485,6 +485,169 @@ out:
 	return err;
 }
 
+static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+			     struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct super_block *sb = old_dir->i_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct inode *old_inode = old_dentry->d_inode;
+	struct inode *new_inode = new_dentry->d_inode;
+	struct page *old_dir_page, *new_dir_page;
+	struct page *old_page, *new_page;
+	struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
+	struct f2fs_dir_entry *old_entry, *new_entry;
+	int old_nlink = 0, new_nlink = 0;
+	int err = -ENOENT;
+
+	f2fs_balance_fs(sbi);
+
+	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+	if (!old_entry)
+		goto out;
+
+	new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
+	if (!new_entry)
+		goto out_old;
+
+	/* prepare for updating ".." directory entry info later */
+	if (old_dir != new_dir) {
+		if (S_ISDIR(old_inode->i_mode)) {
+			err = -EIO;
+			old_dir_entry = f2fs_parent_dir(old_inode,
+							&old_dir_page);
+			if (!old_dir_entry)
+				goto out_new;
+		}
+
+		if (S_ISDIR(new_inode->i_mode)) {
+			err = -EIO;
+			new_dir_entry = f2fs_parent_dir(new_inode,
+							&new_dir_page);
+			if (!new_dir_entry)
+				goto out_old_dir;
+		}
+	}
+
+	/*
+	 * If cross rename between file and directory those are not
+	 * in the same directory, we will inc nlink of file's parent
+	 * later, so we should check upper boundary of its nlink.
+	 */
+	if ((!old_dir_entry || !new_dir_entry) &&
+				old_dir_entry != new_dir_entry) {
+		old_nlink = old_dir_entry ? -1 : 1;
+		new_nlink = -old_nlink;
+		err = -EMLINK;
+		if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
+			(new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
+			goto out_new_dir;
+	}
+
+	f2fs_lock_op(sbi);
+
+	err = update_dent_inode(old_inode, &new_dentry->d_name);
+	if (err)
+		goto out_unlock;
+
+	err = update_dent_inode(new_inode, &old_dentry->d_name);
+	if (err)
+		goto out_undo;
+
+	/* update ".." directory entry info of old dentry */
+	if (old_dir_entry)
+		f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
+
+	/* update ".." directory entry info of new dentry */
+	if (new_dir_entry)
+		f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir);
+
+	/* update directory entry info of old dir inode */
+	f2fs_set_link(old_dir, old_entry, old_page, new_inode);
+
+	down_write(&F2FS_I(old_inode)->i_sem);
+	file_lost_pino(old_inode);
+	up_write(&F2FS_I(old_inode)->i_sem);
+
+	update_inode_page(old_inode);
+
+	old_dir->i_ctime = CURRENT_TIME;
+	if (old_nlink) {
+		down_write(&F2FS_I(old_dir)->i_sem);
+		if (old_nlink < 0)
+			drop_nlink(old_dir);
+		else
+			inc_nlink(old_dir);
+		up_write(&F2FS_I(old_dir)->i_sem);
+	}
+	mark_inode_dirty(old_dir);
+	update_inode_page(old_dir);
+
+	/* update directory entry info of new dir inode */
+	f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+
+	down_write(&F2FS_I(new_inode)->i_sem);
+	file_lost_pino(new_inode);
+	up_write(&F2FS_I(new_inode)->i_sem);
+
+	update_inode_page(new_inode);
+
+	new_dir->i_ctime = CURRENT_TIME;
+	if (new_nlink) {
+		down_write(&F2FS_I(new_dir)->i_sem);
+		if (new_nlink < 0)
+			drop_nlink(new_dir);
+		else
+			inc_nlink(new_dir);
+		up_write(&F2FS_I(new_dir)->i_sem);
+	}
+	mark_inode_dirty(new_dir);
+	update_inode_page(new_dir);
+
+	f2fs_unlock_op(sbi);
+	return 0;
+out_undo:
+	/* Still we may fail to recover name info of f2fs_inode here */
+	update_dent_inode(old_inode, &old_dentry->d_name);
+out_unlock:
+	f2fs_unlock_op(sbi);
+out_new_dir:
+	if (new_dir_entry) {
+		kunmap(new_dir_page);
+		f2fs_put_page(new_dir_page, 0);
+	}
+out_old_dir:
+	if (old_dir_entry) {
+		kunmap(old_dir_page);
+		f2fs_put_page(old_dir_page, 0);
+	}
+out_new:
+	kunmap(new_page);
+	f2fs_put_page(new_page, 0);
+out_old:
+	kunmap(old_page);
+	f2fs_put_page(old_page, 0);
+out:
+	return err;
+}
+
+static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+			struct inode *new_dir, struct dentry *new_dentry,
+			unsigned int flags)
+{
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+		return -EINVAL;
+
+	if (flags & RENAME_EXCHANGE) {
+		return f2fs_cross_rename(old_dir, old_dentry,
+					 new_dir, new_dentry);
+	}
+	/*
+	 * VFS has already handled the new dentry existence case,
+	 * here, we just deal with "RENAME_NOREPLACE" as regular rename.
+	 */
+	return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
 static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
@@ -542,6 +705,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
 	.rmdir		= f2fs_rmdir,
 	.mknod		= f2fs_mknod,
 	.rename		= f2fs_rename,
+	.rename2	= f2fs_rename2,
 	.tmpfile	= f2fs_tmpfile,
 	.getattr	= f2fs_getattr,
 	.setattr	= f2fs_setattr,
-- 
cgit v1.2.3


From dbf20cb259e879e2d939fd3fd5c792732d845195 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Fri, 25 Jul 2014 12:00:57 +0800
Subject: f2fs: avoid use invalid mapping of node_inode when evict meta inode

Andrey Tsyvarev reported:
"Using memory error detector reveals the following use-after-free error
in 3.15.0:

AddressSanitizer: heap-use-after-free in f2fs_evict_inode
Read of size 8 by thread T22279:
  [<ffffffffa02d8702>] f2fs_evict_inode+0x102/0x2e0 [f2fs]
  [<ffffffff812359af>] evict+0x15f/0x290
  [<     inlined    >] iput+0x196/0x280 iput_final
  [<ffffffff812369a6>] iput+0x196/0x280
  [<ffffffffa02dc416>] f2fs_put_super+0xd6/0x170 [f2fs]
  [<ffffffff81210095>] generic_shutdown_super+0xc5/0x1b0
  [<ffffffff812105fd>] kill_block_super+0x4d/0xb0
  [<ffffffff81210a86>] deactivate_locked_super+0x66/0x80
  [<ffffffff81211c98>] deactivate_super+0x68/0x80
  [<ffffffff8123cc88>] mntput_no_expire+0x198/0x250
  [<     inlined    >] SyS_umount+0xe9/0x1a0 SYSC_umount
  [<ffffffff8123f1c9>] SyS_umount+0xe9/0x1a0
  [<ffffffff81cc8df9>] system_call_fastpath+0x16/0x1b

Freed by thread T3:
  [<ffffffffa02dc337>] f2fs_i_callback+0x27/0x30 [f2fs]
  [<     inlined    >] rcu_process_callbacks+0x2d6/0x930 __rcu_reclaim
  [<     inlined    >] rcu_process_callbacks+0x2d6/0x930 rcu_do_batch
  [<     inlined    >] rcu_process_callbacks+0x2d6/0x930 invoke_rcu_callbacks
  [<     inlined    >] rcu_process_callbacks+0x2d6/0x930 __rcu_process_callbacks
  [<ffffffff810fd266>] rcu_process_callbacks+0x2d6/0x930
  [<ffffffff8107cce2>] __do_softirq+0x142/0x380
  [<ffffffff8107cf50>] run_ksoftirqd+0x30/0x50
  [<ffffffff810b2a87>] smpboot_thread_fn+0x197/0x280
  [<ffffffff810a8238>] kthread+0x148/0x160
  [<ffffffff81cc8d4c>] ret_from_fork+0x7c/0xb0

Allocated by thread T22276:
  [<ffffffffa02dc7dd>] f2fs_alloc_inode+0x2d/0x170 [f2fs]
  [<ffffffff81235e2a>] iget_locked+0x10a/0x230
  [<ffffffffa02d7495>] f2fs_iget+0x35/0xa80 [f2fs]
  [<ffffffffa02e2393>] f2fs_fill_super+0xb53/0xff0 [f2fs]
  [<ffffffff81211bce>] mount_bdev+0x1de/0x240
  [<ffffffffa02dbce0>] f2fs_mount+0x10/0x20 [f2fs]
  [<ffffffff81212a85>] mount_fs+0x55/0x220
  [<ffffffff8123c026>] vfs_kern_mount+0x66/0x200
  [<     inlined    >] do_mount+0x2b4/0x1120 do_new_mount
  [<ffffffff812400d4>] do_mount+0x2b4/0x1120
  [<     inlined    >] SyS_mount+0xb2/0x110 SYSC_mount
  [<ffffffff812414a2>] SyS_mount+0xb2/0x110
  [<ffffffff81cc8df9>] system_call_fastpath+0x16/0x1b

The buggy address ffff8800587866c8 is located 48 bytes inside
  of 680-byte region [ffff880058786698, ffff880058786940)

Memory state around the buggy address:
  ffff880058786100: ffffffff ffffffff ffffffff ffffffff
  ffff880058786200: ffffffff ffffffff ffffffrr rrrrrrrr
  ffff880058786300: rrrrrrrr rrffffff ffffffff ffffffff
  ffff880058786400: ffffffff ffffffff ffffffff ffffffff
  ffff880058786500: ffffffff ffffffff ffffffff fffffffr
 >ffff880058786600: rrrrrrrr rrrrrrrr rrrfffff ffffffff
                                                ^
  ffff880058786700: ffffffff ffffffff ffffffff ffffffff
  ffff880058786800: ffffffff ffffffff ffffffff ffffffff
  ffff880058786900: ffffffff rrrrrrrr rrrrrrrr rrrr....
  ffff880058786a00: ........ ........ ........ ........
  ffff880058786b00: ........ ........ ........ ........
Legend:
  f - 8 freed bytes
  r - 8 redzone bytes
  . - 8 allocated bytes
  x=1..7 - x allocated bytes + (8-x) redzone bytes

Investigation shows, that f2fs_evict_inode, when called for
'meta_inode', uses invalidate_mapping_pages() for 'node_inode'.
But 'node_inode' is deleted before 'meta_inode' in f2fs_put_super via
iput().

It seems that in common usage scenario this use-after-free is benign,
because 'node_inode' remains partially valid data even after
kmem_cache_free().
But things may change if, while 'meta_inode' is evicted in one f2fs
filesystem, another (mounted) f2fs filesystem requests inode from cache,
and formely
'node_inode' of the first filesystem is returned."

Nids for both meta_inode and node_inode are reservation, so it's not necessary
for us to invalidate pages which will never be allocated.
To fix this issue, let's skipping needlessly invalidating pages for
{meta,node}_inode in f2fs_evict_inode.

Reported-by: Andrey Tsyvarev <tsyvarev@ispras.ru>
Tested-by: Andrey Tsyvarev <tsyvarev@ispras.ru>
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2cf6962f6cc8..cafba3c9f8d8 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -273,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode)
 
 	if (inode->i_ino == F2FS_NODE_INO(sbi) ||
 			inode->i_ino == F2FS_META_INO(sbi))
-		goto no_delete;
+		goto out_clear;
 
 	f2fs_bug_on(get_dirty_dents(inode));
 	remove_dirty_dir_inode(inode);
@@ -295,6 +295,7 @@ void f2fs_evict_inode(struct inode *inode)
 
 	sb_end_intwrite(inode->i_sb);
 no_delete:
-	clear_inode(inode);
 	invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
+out_clear:
+	clear_inode(inode);
 }
-- 
cgit v1.2.3


From 9d847950770da7102d4efc02d0939ce28e3a7dd0 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Fri, 25 Jul 2014 12:55:09 +0800
Subject: f2fs: fix to put root inode in error path of fill_super

We should put root inode correctly in error path of fill_super, otherwise we
may encounter a leak case of inode resource.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Reviewed-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 870fe199bafb..34649aa66e04 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1033,8 +1033,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_node_inode;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+		iput(root);
 		err = -EINVAL;
-		goto free_root_inode;
+		goto free_node_inode;
 	}
 
 	sb->s_root = d_make_root(root); /* allocate root dentry */
-- 
cgit v1.2.3


From 0f7b2abd188089a44f60e2bf