summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-09 09:31:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-09 09:31:22 -0700
commitbc2c6421cbb420677c4bb56adaf434414770ce8a (patch)
treedc488ded5d21f28c82ca62acd23dbea299aaa5e1 /fs/ext4
parent58f587cb0b603de3d8869e021d4fa704e065afa8 (diff)
parentff95015648df445999c8483270905f7d3dec51e1 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "The first major feature for ext4 this merge window is the largedir feature, which allows ext4 directories to support over 2 billion directory entries (assuming ~64 byte file names; in practice, users will run into practical performance limits first.) This feature was originally written by the Lustre team, and credit goes to Artem Blagodarenko from Seagate for getting this feature upstream. The second major major feature allows ext4 to support extended attribute values up to 64k. This feature was also originally from Lustre, and has been enhanced by Tahsin Erdogan from Google with a deduplication feature so that if multiple files have the same xattr value (for example, Windows ACL's stored by Samba), only one copy will be stored on disk for encoding and caching efficiency. We also have the usual set of bug fixes, cleanups, and optimizations" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (47 commits) ext4: fix spelling mistake: "prellocated" -> "preallocated" ext4: fix __ext4_new_inode() journal credits calculation ext4: skip ext4_init_security() and encryption on ea_inodes fs: generic_block_bmap(): initialize all of the fields in the temp bh ext4: change fast symlink test to not rely on i_blocks ext4: require key for truncate(2) of encrypted file ext4: don't bother checking for encryption key in ->mmap() ext4: check return value of kstrtoull correctly in reserved_clusters_store ext4: fix off-by-one fsmap error on 1k block filesystems ext4: return EFSBADCRC if a bad checksum error is found in ext4_find_entry() ext4: return EIO on read error in ext4_find_entry ext4: forbid encrypting root directory ext4: send parallel discards on commit completions ext4: avoid unnecessary stalls in ext4_evict_inode() ext4: add nombcache mount option ext4: strong binding of xattr inode references ext4: eliminate xattr entry e_hash recalculation for removes ext4: reserve space for xattr entries/names quota: add get_inode_usage callback to transfer multi-inode charges ext4: xattr inode deduplication ...
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/acl.c21
-rw-r--r--fs/ext4/ext4.h63
-rw-r--r--fs/ext4/ext4_jbd2.h23
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/file.c7
-rw-r--r--fs/ext4/fsmap.c4
-rw-r--r--fs/ext4/ialloc.c76
-rw-r--r--fs/ext4/indirect.c3
-rw-r--r--fs/ext4/inline.c2
-rw-r--r--fs/ext4/inode.c92
-rw-r--r--fs/ext4/ioctl.c10
-rw-r--r--fs/ext4/mballoc.c145
-rw-r--r--fs/ext4/mballoc.h6
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/namei.c131
-rw-r--r--fs/ext4/super.c109
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/ext4/xattr.c1699
-rw-r--r--fs/ext4/xattr.h35
20 files changed, 1994 insertions, 441 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3ec0e46de95f..09441ae07a5b 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type)
*/
static int
__ext4_set_acl(handle_t *handle, struct inode *inode, int type,
- struct posix_acl *acl)
+ struct posix_acl *acl, int xattr_flags)
{
int name_index;
void *value = NULL;
@@ -218,7 +218,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
}
error = ext4_xattr_set_handle(handle, inode, name_index, "",
- value, size, 0);
+ value, size, xattr_flags);
kfree(value);
if (!error)
@@ -231,18 +231,23 @@ int
ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
handle_t *handle;
- int error, retries = 0;
+ int error, credits, retries = 0;
+ size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0;
error = dquot_initialize(inode);
if (error)
return error;
retry:
- handle = ext4_journal_start(inode, EXT4_HT_XATTR,
- ext4_jbd2_credits_xattr(inode));
+ error = ext4_xattr_set_credits(inode, acl_size, false /* is_create */,
+ &credits);
+ if (error)
+ return error;
+
+ handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
- error = __ext4_set_acl(handle, inode, type, acl);
+ error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
ext4_journal_stop(handle);
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -267,13 +272,13 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
if (default_acl) {
error = __ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT,
- default_acl);
+ default_acl, XATTR_CREATE);
posix_acl_release(default_acl);
}
if (acl) {
if (!error)
error = __ext4_set_acl(handle, inode, ACL_TYPE_ACCESS,
- acl);
+ acl, XATTR_CREATE);
posix_acl_release(acl);
}
return error;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 32191548abed..9ebde0cd632e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1114,6 +1114,7 @@ struct ext4_inode_info {
/*
* Mount flags set via mount options or defaults
*/
+#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@@ -1444,6 +1445,8 @@ struct ext4_sb_info {
unsigned int *s_mb_maxs;
unsigned int s_group_info_size;
unsigned int s_mb_free_pending;
+ struct list_head s_freed_data_list; /* List of blocks to be freed
+ after commit completed */
/* tunables */
unsigned long s_stripe;
@@ -1516,7 +1519,8 @@ struct ext4_sb_info {
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
- struct mb_cache *s_mb_cache;
+ struct mb_cache *s_ea_block_cache;
+ struct mb_cache *s_ea_inode_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
@@ -1797,10 +1801,12 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_EXTENTS| \
EXT4_FEATURE_INCOMPAT_64BIT| \
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+ EXT4_FEATURE_INCOMPAT_EA_INODE| \
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
+ EXT4_FEATURE_INCOMPAT_LARGEDIR)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -2098,6 +2104,12 @@ static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
}
+static inline bool ext4_is_quota_file(struct inode *inode)
+{
+ return IS_NOQUOTA(inode) &&
+ !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL);
+}
+
/*
* This structure is stuffed into the struct file's private_data field
* for directories. It is where we put information so that we can do
@@ -2126,6 +2138,16 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
*/
#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
+/* htree levels for ext4 */
+#define EXT4_HTREE_LEVEL_COMPAT 2
+#define EXT4_HTREE_LEVEL 3
+
+static inline int ext4_dir_htree_level(struct super_block *sb)
+{
+ return ext4_has_feature_largedir(sb) ?
+ EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
+}
+
/*
* Timeout and state flag for lazy initialization inode thread.
*/
@@ -2389,16 +2411,17 @@ extern int ext4fs_dirhash(const char *name, int len, struct
/* ialloc.c */
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
- uid_t *owner, int handle_type,
- unsigned int line_no, int nblocks);
+ uid_t *owner, __u32 i_flags,
+ int handle_type, unsigned int line_no,
+ int nblocks);
-#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
+#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \
__ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
- 0, 0, 0)
+ i_flags, 0, 0, 0)
#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
type, nblocks) \
__ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
- (type), __LINE__, (nblocks))
+ 0, (type), __LINE__, (nblocks))
extern void ext4_free_inode(handle_t *, struct inode *);
@@ -2433,6 +2456,7 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
/* inode.c */
int ext4_inode_is_fast_symlink(struct inode *inode);
@@ -2704,19 +2728,20 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
extern int ext4_register_li_request(struct super_block *sb,
ext4_group_t first_not_zeroed);
-static inline int ext4_has_group_desc_csum(struct super_block *sb)
-{
- return ext4_has_feature_gdt_csum(sb) ||
- EXT4_SB(sb)->s_chksum_driver != NULL;
-}
-
static inline int ext4_has_metadata_csum(struct super_block *sb)
{
WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
!EXT4_SB(sb)->s_chksum_driver);
- return (EXT4_SB(sb)->s_chksum_driver != NULL);
+ return ext4_has_feature_metadata_csum(sb) &&
+ (EXT4_SB(sb)->s_chksum_driver != NULL);
}
+
+static inline int ext4_has_group_desc_csum(struct super_block *sb)
+{
+ return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
+}
+
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2756,13 +2781,15 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
}
-static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
+static inline loff_t ext4_isize(struct super_block *sb,
+ struct ext4_inode *raw_inode)
{
- if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
+ if (ext4_has_feature_largedir(sb) ||
+ S_ISREG(le16_to_cpu(raw_inode->i_mode)))
return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
le32_to_cpu(raw_inode->i_size_lo);
- else
- return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
+
+ return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
}
static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index f97611171023..dabad1bc8617 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -77,7 +77,14 @@
#define EXT4_RESERVE_TRANS_BLOCKS 12U
-#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
+/*
+ * Number of credits needed if we need to insert an entry into a
+ * directory. For each new index block, we need 4 blocks (old index
+ * block, new index block, bitmap block, bg summary). For normal
+ * htree directories there are 2 levels; if the largedir feature
+ * enabled it's 3 levels.
+ */
+#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 12U
#ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was
@@ -104,20 +111,6 @@
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
-static inline int ext4_jbd2_credits_xattr(struct inode *inode)
-{
- int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
-
- /*
- * In case of inline data, we may push out the data to a block,
- * so we need to reserve credits for this eventuality
- */
- if (ext4_has_inline_data(inode))
- credits += ext4_writepage_trans_blocks(inode) + 1;
- return credits;
-}
-
-
/*
* Ext4 handle operation types -- for logging purposes
*/
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3e36508610b7..e0a8425ff74d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2488,7 +2488,8 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
static inline int get_default_free_blocks_flags(struct inode *inode)
{
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
else if (ext4_should_journal_data(inode))
return EXT4_FREE_BLOCKS_FORGET;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 58e2eeaa0bc4..58294c9a7e1d 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -364,13 +364,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
- if (ext4_encrypted_inode(inode)) {
- int err = fscrypt_get_encryption_info(inode);
- if (err)
- return 0;
- if (!fscrypt_has_encryption_key(inode))
- return -ENOKEY;
- }
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index b19436098837..7ec340898598 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -480,6 +480,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start_fsb;
ext4_fsblk_t end_fsb;
+ ext4_fsblk_t bofs;
ext4_fsblk_t eofs;
ext4_group_t start_ag;
ext4_group_t end_ag;
@@ -487,9 +488,12 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
ext4_grpblk_t last_cluster;
int error = 0;
+ bofs = le32_to_cpu(sbi->s_es->s_first_data_block);
eofs = ext4_blocks_count(sbi->s_es);
if (keys[0].fmr_physical >= eofs)
return 0;
+ else if (keys[0].fmr_physical < bofs)
+ keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1;
start_fsb = keys[0].fmr_physical;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 98ac2f1f23b3..507bfb3344d4 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
* as writing the quota to disk may need the lock as well.
*/
dquot_initialize(inode);
- ext4_xattr_delete_inode(handle, inode);
dquot_free_inode(inode);
dquot_drop(inode);
@@ -743,8 +742,9 @@ out:
*/
struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
umode_t mode, const struct qstr *qstr,
- __u32 goal, uid_t *owner, int handle_type,
- unsigned int line_no, int nblocks)
+ __u32 goal, uid_t *owner, __u32 i_flags,
+ int handle_type, unsigned int line_no,
+ int nblocks)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
@@ -766,30 +766,69 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ sb = dir->i_sb;
+ sbi = EXT4_SB(sb);
+
+ if (unlikely(ext4_forced_shutdown(sbi)))
return ERR_PTR(-EIO);
- if ((ext4_encrypted_inode(dir) ||
- DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
- (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+ if ((ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) &&
+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) &&
+ !(i_flags & EXT4_EA_INODE_FL)) {
err = fscrypt_get_encryption_info(dir);
if (err)
return ERR_PTR(err);
if (!fscrypt_has_encryption_key(dir))
return ERR_PTR(-ENOKEY);
- if (!handle)
- nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
encrypt = 1;
}
- sb = dir->i_sb;
+ if (!handle && sbi->s_journal && !(i_flags & EXT4_EA_INODE_FL)) {
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
+ struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
+
+ if (p) {
+ int acl_size = p->a_count * sizeof(ext4_acl_entry);
+
+ nblocks += (S_ISDIR(mode) ? 2 : 1) *
+ __ext4_xattr_set_credits(sb, NULL /* inode */,
+ NULL /* block_bh */, acl_size,
+ true /* is_create */);
+ posix_acl_release(p);
+ }
+#endif
+
+#ifdef CONFIG_SECURITY
+ {
+ int num_security_xattrs = 1;
+
+#ifdef CONFIG_INTEGRITY
+ num_security_xattrs++;
+#endif
+ /*
+ * We assume that security xattrs are never
+ * more than 1k. In practice they are under
+ * 128 bytes.
+ */
+ nblocks += num_security_xattrs *
+ __ext4_xattr_set_credits(sb, NULL /* inode */,
+ NULL /* block_bh */, 1024,
+ true /* is_create */);
+ }
+#endif
+ if (encrypt)
+ nblocks += __ext4_xattr_set_credits(sb,
+ NULL /* inode */, NULL /* block_bh */,
+ FSCRYPT_SET_CONTEXT_MAX_SIZE,
+ true /* is_create */);
+ }
+
ngroups = ext4_get_groups_count(sb);
trace_ext4_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
ei = EXT4_I(inode);
- sbi = EXT4_SB(sb);
/*
* Initialize owners and quota early so that we don't have to account
@@ -1053,6 +1092,7 @@ got:
/* Don't inherit extent flag from directory, amongst others. */
ei->i_flags =
ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
+ ei->i_flags |= i_flags;
ei->i_file_acl = 0;
ei->i_dtime = 0;
ei->i_block_group = group;
@@ -1109,13 +1149,15 @@ got:
goto fail_free_drop;
}
- err = ext4_init_acl(handle, inode, dir);
- if (err)
- goto fail_free_drop;
+ if (!(ei->i_flags & EXT4_EA_INODE_FL)) {
+ err = ext4_init_acl(handle, inode, dir);
+ if (err)
+ goto fail_free_drop;
- err = ext4_init_security(handle, inode, dir, qstr);
- if (err)
- goto fail_free_drop;
+ err = ext4_init_security(handle, inode, dir, qstr);
+ if (err)
+ goto fail_free_drop;
+ }
if (ext4_has_feature_extents(sb)) {
/* set extent flag only for directory, file and normal symlink*/
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bc15c2c17633..7ffa290cbb8e 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -829,7 +829,8 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
int flags = EXT4_FREE_BLOCKS_VALIDATED;
int err;
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA;
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8d141c0c8ff9..28c5c3abddb3 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
/* Compute min_offs. */
for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
- if (!entry->e_value_block && entry->e_value_size) {
+ if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cf82d03968c..3c600f02673f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -144,16 +144,12 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
/*
* Test whether an inode is a fast symlink.
+ * A fast symlink has its symlink data stored in ext4_inode_info->i_data.
*/
int ext4_inode_is_fast_symlink(struct inode *inode)
{
- int ea_blocks = EXT4_I(inode)->i_file_acl ?
- EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
-
- if (ext4_has_inline_data(inode))
- return 0;
-
- return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
+ return S_ISLNK(inode->i_mode) && inode->i_size &&
+ (inode->i_size < EXT4_N_BLOCKS * 4);
}
/*
@@ -189,6 +185,8 @@ void ext4_evict_inode(struct inode *inode)
{
handle_t *handle;
int err;
+ int extra_credits = 3;
+ struct ext4_xattr_inode_array *ea_inode_array = NULL;
trace_ext4_evict_inode(inode);
@@ -213,7 +211,8 @@ void ext4_evict_inode(struct inode *inode)
*/
if (inode->i_ino != EXT4_JOURNAL_INO &&
ext4_should_journal_data(inode) &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+ inode->i_data.nrpages) {
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
@@ -238,8 +237,12 @@ void ext4_evict_inode(struct inode *inode)
* protection against it
*/
sb_start_intwrite(inode->i_sb);
+
+ if (!IS_NOQUOTA(inode))
+ extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
+
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
- ext4_blocks_for_truncate(inode)+3);
+ ext4_blocks_for_truncate(inode)+extra_credits);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
@@ -254,6 +257,16 @@ void ext4_evict_inode(struct inode *inode)
if (IS_SYNC(inode))
ext4_handle_sync(handle);
+
+ /*
+ * Set inode->i_size to 0 before calling ext4_truncate(). We need
+ * special handling of symlinks here because i_size is used to
+ * determine whether ext4_inode_info->i_data contains symlink data or
+ * block mappings. Setting i_size to 0 will remove its fast symlink
+ * status. Erase i_data so that it becomes a valid empty block map.
+ */
+ if (ext4_inode_is_fast_symlink(inode))
+ memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data));
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
@@ -271,25 +284,17 @@ void ext4_evict_inode(struct inode *inode)
}
}
- /*
- * ext4_ext_truncate() doesn't reserve any slop when it
- * restarts journal transactions; therefore there may not be
- * enough credits left in the handle to remove the inode from
- * the orphan list and set the dtime field.
- */
- if (!ext4_handle_has_enough_credits(handle, 3)) {
- err = ext4_journal_extend(handle, 3);
- if (err > 0)
- err = ext4_journal_restart(handle, 3);
- if (err != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", err);
- stop_handle:
- ext4_journal_stop(handle);
- ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
- goto no_delete;
- }
+ /* Remove xattr references. */
+ err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
+ extra_credits);
+ if (err) {
+ ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
+stop_handle:
+ ext4_journal_stop(handle);
+ ext4_orphan_del(NULL, inode);
+ sb_end_intwrite(inode->i_sb);
+ ext4_xattr_inode_array_free(ea_inode_array);
+ goto no_delete;
}
/*
@@ -317,6 +322,7 @@ void ext4_evict_inode(struct inode *inode)
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb);
+ ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -710,7 +716,7 @@ out_sem:
if (map->m_flags & EXT4_MAP_NEW &&
!(map->m_flags & EXT4_MAP_UNWRITTEN) &&
!(flags & EXT4_GET_BLOCKS_ZERO) &&
- !IS_NOQUOTA(inode) &&
+ !ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
ret = ext4_jbd2_inode_add_wait(handle, inode);
@@ -4712,7 +4718,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (ext4_has_feature_64bit(sb))
ei->i_file_acl |=
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
- inode->i_size = ext4_isize(raw_inode);
+ inode->i_size = ext4_isize(sb, raw_inode);
if ((size = i_size_read(inode)) < 0) {
EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
ret = -EFSCORRUPTED;
@@ -4846,6 +4852,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
brelse(iloc.bh);
ext4_set_inode_flags(inode);
+
+ if (ei->i_flags & EXT4_EA_INODE_FL) {
+ ext4_xattr_inode_set_class(inode);
+
+ inode_lock(inode);
+ inode->i_flags |= S_NOQUOTA;
+ inode_unlock(inode);
+ }
+
unlock_new_inode(inode);
return inode;
@@ -5037,7 +5052,7 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_file_acl_high =
cpu_to_le16(ei->i_file_acl >> 32);
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
- if (ei->i_disksize != ext4_isize(raw_inode)) {
+ if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
ext4_isize_set(raw_inode, ei->i_disksize);
need_datasync = 1;
}
@@ -5287,7 +5302,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = PTR_ERR(handle);
goto err_out;
}
+
+ /* dquot_transfer() calls back ext4_get_inode_usage() which
+ * counts xattr inode references.
+ */
+ down_read(&EXT4_I(inode)->xattr_sem);
error = dquot_transfer(inode, attr);
+ up_read(&EXT4_I(inode)->xattr_sem);
+
if (error) {
ext4_journal_stop(handle);
return error;
@@ -5307,6 +5329,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
loff_t oldsize = inode->i_size;
int shrink = (attr->ia_size <= inode->i_size);
+ if (ext4_encrypted_inode(inode)) {
+ error = fscrypt_get_encryption_info(inode);
+ if (error)
+ return error;
+ if (!fscrypt_has_encryption_key(inode))
+ return -ENOKEY;
+ }
+
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0c21e22acd74..42b3a73143cf 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -218,7 +218,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
unsigned int jflag;
/* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
+ if (ext4_is_quota_file(inode))
goto flags_out;
oldflags = ei->i_flags;
@@ -342,7 +342,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
err = -EPERM;
inode_lock(inode);
/* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
+ if (ext4_is_quota_file(inode))
goto out_unlock;
err = ext4_get_inode_loc(inode, &iloc);
@@ -373,7 +373,13 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
if (!IS_ERR(transfer_to[PRJQUOTA])) {
+
+ /* __dquot_transfer() calls back ext4_get_inode_usage() which
+ * counts xattr inode references.
+ */
+ down_read(&EXT4_I(inode)->xattr_sem);
err = __dquot_transfer(inode, transfer_to);
+ up_read(&EXT4_I(inode)->xattr_sem);
dqput(transfer_to[PRJQUOTA]);
if (err)
goto out_dirty;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b7928cddd539..581e357e8406 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -367,8 +367,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_free_data_callback(struct super_block *sb,
- struct ext4_journal_cb_entry *jce, int rc);
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
@@ -2639,6 +2637,7 @@ int ext4_mb_init(struct super_block *sb)
spin_lock_init(&sbi->s_md_lock);
spin_lock_init(&sbi->s_bal_lock);
sbi->s_mb_free_pending = 0;
+ INIT_LIST_HEAD(&sbi->s_freed_data_list);
sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
@@ -2782,7 +2781,8 @@ int ext4_mb_release(struct super_block *sb)
}
static inline int ext4_issue_discard(struct super_block *sb,
- ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+ ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+ struct bio **biop)
{
ext4_fsblk_t discard_block;
@@ -2791,18 +2791,18 @@ static inline int ext4_issue_discard(struct super_block *sb,
count = EXT4_C2B(EXT4_SB(sb), count);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+ if (biop) {
+ return __blkdev_issue_discard(sb->s_bdev,
+ (sector_t)discard_block << (sb->s_blocksize_bits - 9),
+ (sector_t)count << (sb->s_blocksize_bits - 9),
+ GFP_NOFS, 0, biop);
+ } else
+ return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
}
-/*
- * This function is called by the jbd2 layer once the commit has finished,
- * so we know we can free the blocks that were released with that commit.
- */
-static void ext4_free_data_callback(struct super_block *sb,
- struct ext4_journal_cb_entry *jce,
- int rc)
+static void ext4_free_data_in_buddy(struct super_block *sb,
+ struct ext4_free_data *entry)
{
- struct ext4_free_data *entry = (struct ext4_free_data *)jce;
struct ext4_buddy e4b;
struct ext4_group_info *db;
int err, count = 0, count2 = 0;
@@ -2810,18 +2810,6 @@ static void ext4_free_data_callback(struct super_block *sb,
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->efd_count, entry->efd_group, entry);
- if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, entry->efd_group,
- entry->efd_start_cluster,
- entry->efd_count);
- if (err && err != -EOPNOTSUPP)
- ext4_msg(sb, KERN_WARNING, "discard request in"
- " group:%d block:%d count:%d failed"
- " with %d", entry->efd_group,
- entry->efd_start_cluster,
- entry->efd_count, err);
- }
-
err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
/* we expect to find existing buddy because it's pinned */
BUG_ON(err != 0);
@@ -2862,6 +2850,56 @@ static void ext4_free_data_callback(struct super_block *sb,
mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
}
+/*
+ * This function is called by the jbd2 layer once the commit has finished,
+ * so we know we can free the blocks that were released with that commit.
+ */
+void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_free_data *entry, *tmp;
+ struct bio *discard_bio = NULL;
+ struct list_head freed_data_list;
+ struct list_head *cut_pos = NULL;
+ int err;
+
+ INIT_LIST_HEAD(&freed_data_list);
+
+ spin_lock(&sbi->s_md_lock);
+ list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
+ if (entry->efd_tid != commit_tid)
+ break;
+ cut_pos = &entry->efd_list;
+ }
+ if (cut_pos)
+ list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
+ cut_pos);
+ spin_unlock(&sbi->s_md_lock);
+
+ if (test_opt(sb, DISCARD)) {
+ list_for_each_entry(entry, &freed_data_list, efd_list) {
+ err = ext4_issue_discard(sb, entry->efd_group,
+ entry->efd_start_cluster,
+ entry->efd_count,
+ &discard_bio);
+ if (err && err != -EOPNOTSUPP) {
+ ext4_msg(sb, KERN_WARNING, "discard request in"
+ " group:%d block:%d count:%d failed"
+ " with %d", entry->efd_group,
+ entry->efd_start_cluster,
+ entry->efd_count, err);
+ } else if (err == -EOPNOTSUPP)
+ break;
+ }
+
+ if (discard_bio)
+ submit_bio_wait(discard_bio);
+ }
+
+ list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
+ ext4_free_data_in_buddy(sb, entry);
+}
+
int __init ext4_init_mballoc(void)
{
ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
@@ -3529,7 +3567,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_set_bits(bitmap, start, len);
preallocated += len;