summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 12:59:42 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 12:59:42 -0800
commitae9a8c4bdc91202b4236372eed53c54d2297c71b (patch)
tree4596680ee808334d246ad2f93bdd743d76c3741a /fs
parent32190f0afbf4f1c0a9142e5a886a078ee0b794fd (diff)
parent232530680290ba94ca37852ab10d9556ea28badf (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: - Add support for online resizing of file systems with bigalloc - Fix a two data corruption bugs involving DAX, as well as a corruption bug after a crash during a racing fallocate and delayed allocation. - Finally, a number of cleanups and optimizations. * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: improve smp scalability for inode generation ext4: add support for online resizing with bigalloc ext4: mention noload when recovering on read-only device Documentation: fix little inconsistencies ext4: convert timers to use timer_setup() jbd2: convert timers to use timer_setup() ext4: remove duplicate extended attributes defs ext4: add ext4_should_use_dax() ext4: add sanity check for encryption + DAX ext4: prevent data corruption with journaling + DAX ext4: prevent data corruption with inline data + DAX ext4: fix interaction between i_size, fallocate, and delalloc after a crash ext4: retry allocations conservatively ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA ext4: Add iomap support for inline data iomap: Add IOMAP_F_DATA_INLINE flag iomap: Switch from blkno to disk offset
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/dax.c2
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/balloc.c15
-rw-r--r--fs/ext4/ext4.h50
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/file.c263
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inline.c43
-rw-r--r--fs/ext4/inode.c153
-rw-r--r--fs/ext4/ioctl.c30
-rw-r--r--fs/ext4/mballoc.c28
-rw-r--r--fs/ext4/resize.c104
-rw-r--r--fs/ext4/super.c27
-rw-r--r--fs/iomap.c13
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/xfs/xfs_iomap.c6
19 files changed, 266 insertions, 500 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 32ce01f0f95f..49b7e9bdcd1d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1979,8 +1979,8 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
case IOMAP_MAPPED:
if (offset >= i_size_read(inode))
set_buffer_new(bh);
- bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
- ((offset - iomap->offset) >> inode->i_blkbits);
+ bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
+ inode->i_blkbits;
set_buffer_mapped(bh);
break;
}
diff --git a/fs/dax.c b/fs/dax.c
index f001d8c72a06..f3a44a7c14b3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -938,7 +938,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
{
- return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
+ return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
}
static loff_t
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 1442a4c734c8..9b2ac55ac34f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -821,11 +821,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret == 0) {
iomap->type = IOMAP_HOLE;
- iomap->blkno = IOMAP_NULL_BLOCK;
+ iomap->addr = IOMAP_NULL_ADDR;
iomap->length = 1 << blkbits;
} else {
iomap->type = IOMAP_MAPPED;
- iomap->blkno = (sector_t)bno << (blkbits - 9);
+ iomap->addr = (u64)bno << blkbits;
iomap->length = (u64)ret << blkbits;
iomap->flags |= IOMAP_F_MERGED;
}
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e38039fd96ff..73b850f5659c 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -37,6 +37,7 @@ config EXT4_FS
select CRC16
select CRYPTO
select CRYPTO_CRC32C
+ select FS_IOMAP
help
This is the next generation of the ext3 filesystem.
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d5ddfb96c83c..a943e568292e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -601,22 +601,21 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
* ext4_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
* for the current or committing transaction to complete, and then
- * return TRUE.
- *
- * if the total number of retries exceed three times, return FALSE.
+ * return TRUE. We will only retry once.
*/
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
{
if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
- (*retries)++ > 3 ||
+ (*retries)++ > 1 ||
!EXT4_SB(sb)->s_journal)
return 0;
- jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
-
smp_mb();
- if (EXT4_SB(sb)->s_mb_free_pending)
- jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
+ if (EXT4_SB(sb)->s_mb_free_pending == 0)
+ return 0;
+
+ jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
+ jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
return 1;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 27f38bb5046d..4e091eae38b1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -544,8 +544,8 @@ struct ext4_new_group_data {
__u64 inode_table;
__u32 blocks_count;
__u16 reserved_blocks;
- __u16 unused;
- __u32 free_blocks_count;
+ __u16 mdata_blocks;
+ __u32 free_clusters_count;
};
/* Indexes used to index group tables in ext4_new_group_data */
@@ -643,43 +643,6 @@ enum {
#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
-#ifndef FS_IOC_FSGETXATTR
-/* Until the uapi changes get merged for project quota... */
-
-#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
-#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
-
-/*
- * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
- */
-struct fsxattr {
- __u32 fsx_xflags; /* xflags field value (get/set) */
- __u32 fsx_extsize; /* extsize field value (get/set)*/
- __u32 fsx_nextents; /* nextents field value (get) */
- __u32 fsx_projid; /* project identifier (get/set) */
- unsigned char fsx_pad[12];
-};
-
-/*
- * Flags for the fsx_xflags field
- */
-#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
-#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
-#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
-#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
-#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
-#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
-#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
-#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
-#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
-#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
-#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
-#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
-#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
-#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
-#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
-#endif /* !defined(FS_IOC_FSGETXATTR) */
-
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
@@ -1391,8 +1354,6 @@ struct ext4_sb_info {
int s_first_ino;
unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
- spinlock_t s_next_gen_lock;
- u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
@@ -2514,9 +2475,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
-extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
- unsigned int map_len,
- struct extent_status *result);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -3047,6 +3005,10 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
extern int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
int *has_inline, __u64 start, __u64 len);
+
+struct iomap;
+extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
+
extern int ext4_try_to_evict_inline_data(handle_t *handle,
struct inode *inode,
int needed);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 97f0fd06728d..07bca11749d4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4794,7 +4794,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
+ (offset + len > i_size_read(inode) ||
+ offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len;
ret = inode_newsize_ok(inode, new_size);
if (ret)
@@ -4965,7 +4966,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
+ (offset + len > i_size_read(inode) ||
+ offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len;
ret = inode_newsize_ok(inode, new_size);
if (ret)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b937078bcff3..ad204d2724ac 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,6 +21,7 @@
#include <linux/time.h>
#include <linux/fs.h>
+#include <linux/iomap.h>
#include <linux/mount.h>
#include <linux/path.h>
#include <linux/dax.h>
@@ -424,248 +425,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
}
/*
- * Here we use ext4_map_blocks() to get a block mapping for a extent-based
- * file rather than ext4_ext_walk_space() because we can introduce
- * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
- * function. When extent status tree has been fully implemented, it will
- * track all extent status for a file and we can directly use it to
- * retrieve the offset for SEEK_DATA/SEEK_HOLE.
- */
-
-/*
- * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
- * lookup page cache to check whether or not there has some data between
- * [startoff, endoff] because, if this range contains an unwritten extent,
- * we determine this extent as a data or a hole according to whether the
- * page cache has data or not.
- */
-static int ext4_find_unwritten_pgoff(struct inode *inode,
- int whence,
- ext4_lblk_t end_blk,
- loff_t *offset)
-{
- struct pagevec pvec;
- unsigned int blkbits;
- pgoff_t index;
- pgoff_t end;
- loff_t endoff;
- loff_t startoff;
- loff_t lastoff;
- int found = 0;
-
- blkbits = inode->i_sb->s_blocksize_bits;
- startoff = *offset;
- lastoff = startoff;
- endoff = (loff_t)end_blk << blkbits;
-
- index = startoff >> PAGE_SHIFT;
- end = (endoff - 1) >> PAGE_SHIFT;
-
- pagevec_init(&pvec, 0);
- do {
- int i;
- unsigned long nr_pages;
-
- nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
- &index, end);
- if (nr_pages == 0)
- break;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- struct buffer_head *bh, *head;
-
- /*
- * If current offset is smaller than the page offset,
- * there is a hole at this offset.
- */
- if (whence == SEEK_HOLE && lastoff < endoff &&
- lastoff < page_offset(pvec.pages[i])) {
- found = 1;
- *offset = lastoff;
- goto out;
- }
-
- lock_page(page);
-
- if (unlikely(page->mapping != inode->i_mapping)) {
- unlock_page(page);
- continue;
- }
-
- if (!page_has_buffers(page)) {
- unlock_page(page);
- continue;
- }
-
- if (page_has_buffers(page)) {
- lastoff = page_offset(page);
- bh = head = page_buffers(page);
- do {
- if (lastoff + bh->b_size <= startoff)
- goto next;
- if (buffer_uptodate(bh) ||
- buffer_unwritten(bh)) {
- if (whence == SEEK_DATA)
- found = 1;
- } else {
- if (whence == SEEK_HOLE)
- found = 1;
- }
- if (found) {
- *offset = max_t(loff_t,
- startoff, lastoff);
- unlock_page(page);
- goto out;
- }
-next:
- lastoff += bh->b_size;
- bh = bh->b_this_page;
- } while (bh != head);
- }
-
- lastoff = page_offset(page) + PAGE_SIZE;
- unlock_page(page);
- }
-
- pagevec_release(&pvec);
- } while (index <= end);
-
- /* There are no pages upto endoff - that would be a hole in there. */
- if (whence == SEEK_HOLE && lastoff < endoff) {
- found = 1;
- *offset = lastoff;
- }
-out:
- pagevec_release(&pvec);
- return found;
-}
-
-/*
- * ext4_seek_data() retrieves the offset for SEEK_DATA.
- */
-static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
-{
- struct inode *inode = file->f_mapping->host;
- struct extent_status es;
- ext4_lblk_t start, last, end;
- loff_t dataoff, isize;
- int blkbits;
- int ret;
-
- inode_lock(inode);
-
- isize = i_size_read(inode);
- if (offset < 0 || offset >= isize) {
- inode_unlock(inode);
- return -ENXIO;
- }
-
- blkbits = inode->i_sb->s_blocksize_bits;
- start = offset >> blkbits;
- last = start;
- end = isize >> blkbits;
- dataoff = offset;
-
- do {
- ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
- if (ret <= 0) {
- /* No extent found -> no data */
- if (ret == 0)
- ret = -ENXIO;
- inode_unlock(inode);
- return ret;
- }
-
- last = es.es_lblk;
- if (last != start)
- dataoff = (loff_t)last << blkbits;
- if (!ext4_es_is_unwritten(&es))
- break;
-
- /*
- * If there is a unwritten extent at this offset,
- * it will be as a data or a hole according to page
- * cache that has data or not.
- */
- if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
- es.es_lblk + es.es_len, &dataoff))
- break;
- last += es.es_len;
- dataoff = (loff_t)last << blkbits;
- cond_resched();
- } while (last <= end);
-
- inode_unlock(inode);
-
- if (dataoff > isize)
- return -ENXIO;
-
- return vfs_setpos(file, dataoff, maxsize);
-}
-
-/*
- * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
- */
-static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
-{
- struct inode *inode = file->f_mapping->host;
- struct extent_status es;
- ext4_lblk_t start, last, end;
- loff_t holeoff, isize;
- int blkbits;
- int ret;
-
- inode_lock(inode);
-
- isize = i_size_read(inode);
- if (offset < 0 || offset >= isize) {
- inode_unlock(inode);
- return -ENXIO;
- }
-
- blkbits = inode->i_sb->s_blocksize_bits;
- start = offset >> blkbits;
- last = start;
- end = isize >> blkbits;
- holeoff = offset;
-
- do {
- ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
- if (ret < 0) {
- inode_unlock(inode);
- return ret;
- }
- /* Found a hole? */
- if (ret == 0 || es.es_lblk > last) {
- if (last != start)
- holeoff = (loff_t)last << blkbits;
- break;
- }
- /*
- * If there is a unwritten extent at this offset,
- * it will be as a data or a hole according to page
- * cache that has data or not.
- */
- if (ext4_es_is_unwritten(&es) &&
- ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
- last + es.es_len, &holeoff))
- break;
-
- last += es.es_len;
- holeoff = (loff_t)last << blkbits;
- cond_resched();
- } while (last <= end);
-
- inode_unlock(inode);
-
- if (holeoff > isize)
- holeoff = isize;
-
- return vfs_setpos(file, holeoff, maxsize);
-}
-
-/*
* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
* by calling generic_file_llseek_size() with the appropriate maxbytes
* value for each.
@@ -681,18 +440,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
maxbytes = inode->i_sb->s_maxbytes;
switch (whence) {
- case SEEK_SET:
- case SEEK_CUR:
- case SEEK_END:
+ default:
return generic_file_llseek_size(file, offset, whence,
maxbytes, i_size_read(inode));
- case SEEK_DATA:
- return ext4_seek_data(file, offset, maxbytes);
case SEEK_HOLE:
- return ext4_seek_hole(file, offset, maxbytes);
+ inode_lock_shared(inode);
+ offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
+ inode_unlock_shared(inode);
+ break;
+ case SEEK_DATA:
+ inode_lock_shared(inode);
+ offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
+ inode_unlock_shared(inode);
+ break;
}
- return -EINVAL;
+ if (offset < 0)
+ return offset;
+ return vfs_setpos(file, offset, maxbytes);
}
const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index c5f697a3fad4..b4267d72f249 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1139,9 +1139,7 @@ got:
inode->i_ino);
goto out;
}
- spin_lock(&sbi->s_next_gen_lock);
- inode->i_generation = sbi->s_next_generation++;
- spin_unlock(&sbi->s_next_gen_lock);
+ inode->i_generation = prandom_u32();
/* Precompute checksum seed for inode metadata */
if (ext4_has_metadata_csum(sb)) {
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 28c5c3abddb3..1367553c43bb 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -12,6 +12,7 @@
* GNU General Public License for more details.
*/
+#include <linux/iomap.h>
#include <linux/fiemap.h>
#include "ext4_jbd2.h"
@@ -302,11 +303,6 @@ static int ext4_create_inline_data(handle_t *handle,
EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
- /*
- * Propagate changes to inode->i_flags as well - e.g. S_DAX may
- * get cleared
- */
- ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
@@ -451,11 +447,6 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
}
}
ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
- /*
- * Propagate changes to inode->i_flags as well - e.g. S_DAX may
- * get set.
- */
- ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
@@ -1827,6 +1818,38 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
return ret;
}
+int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap)
+{
+ __u64 addr;
+ int error = -EAGAIN;
+ struct ext4_iloc iloc;
+
+ down_read(&EXT4_I(inode)->xattr_sem);
+ if (!ext4_has_inline_data(inode))
+ goto out;
+
+ error = ext4_get_inode_loc(inode, &iloc);
+ if (error)
+ goto out;
+
+ addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
+ addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
+ addr += offsetof(struct ext4_inode, i_block);
+
+ brelse(iloc.bh);
+
+ iomap->addr = addr;
+ iomap->offset = 0;
+ iomap->length = min_t(loff_t, ext4_get_inline_size(inode),
+ i_size_read(inode));
+ iomap->type = 0;
+ iomap->flags = IOMAP_F_DATA_INLINE;
+
+out:
+ up_read(&EXT4_I(inode)->xattr_sem);
+ return error;
+}
+
int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
int *has_inline, __u64 start, __u64 len)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 168a1b499cdf..2633150e41b9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3394,7 +3394,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
-#ifdef CONFIG_FS_DAX
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap)
{
@@ -3403,17 +3402,54 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned long first_block = offset >> blkbits;
unsigned long last_block = (offset + length - 1) >> blkbits;
struct ext4_map_blocks map;
+ bool delalloc = false;
int ret;
- if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
- return -ERANGE;
+
+ if (flags & IOMAP_REPORT) {
+ if (ext4_has_inline_data(inode)) {
+ ret = ext4_inline_data_iomap(inode, iomap);
+ if (ret != -EAGAIN) {
+ if (ret == 0 && offset >= iomap->length)
+ ret = -ENOENT;
+ return ret;
+ }
+ }
+ } else {
+ if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+ return -ERANGE;
+ }
map.m_lblk = first_block;
map.m_len = last_block - first_block + 1;
- if (!(flags & IOMAP_WRITE)) {
+ if (flags & IOMAP_REPORT) {
ret = ext4_map_blocks(NULL, inode, &map, 0);
- } else {
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0) {
+ ext4_lblk_t end = map.m_lblk + map.m_len - 1;
+ struct extent_status es;
+
+ ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
+
+ if (!es.es_len || es.es_lblk > end) {
+ /* entire range is a hole */
+ } else if (es.es_lblk > map.m_lblk) {
+ /* range starts with a hole */
+ map.m_len = es.es_lblk - map.m_lblk;
+ } else {
+ ext4_lblk_t offs = 0;
+
+ if (es.es_lblk < map.m_lblk)
+ offs = map.m_lblk - es.es_lblk;
+ map.m_lblk = es.es_lblk + offs;
+ map.m_len = es.es_len - offs;
+ delalloc = true;
+ }
+ }
+ } else if (flags & IOMAP_WRITE) {
int dio_credits;
handle_t *handle;
int retries = 0;
@@ -3464,17 +3500,21 @@ retry:
}
}
ext4_journal_stop(handle);
+ } else {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret < 0)
+ return ret;
}
iomap->flags = 0;
iomap->bdev = inode->i_sb->s_bdev;
iomap->dax_dev = sbi->s_daxdev;
iomap->offset = first_block << blkbits;
+ iomap->length = (u64)map.m_len << blkbits;
if (ret == 0) {
- iomap->type = IOMAP_HOLE;
- iomap->blkno = IOMAP_NULL_BLOCK;
- iomap->length = (u64)map.m_len << blkbits;
+ iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
} else {
if (map.m_flags & EXT4_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
@@ -3484,12 +3524,12 @@ retry:
WARN_ON_ONCE(1);
return -EIO;
}
- iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
- iomap->length = (u64)map.m_len << blkbits;
+ iomap->addr = (u64)map.m_pblk << blkbits;
}
if (map.m_flags & EXT4_MAP_NEW)
iomap->flags |= IOMAP_F_NEW;
+
return 0;
}
@@ -3550,8 +3590,6 @@ const struct iomap_ops ext4_iomap_ops = {
.iomap_end = ext4_iomap_end,
};
-#endif
-
static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
{
@@ -4573,6 +4611,21 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}
+static bool ext4_should_use_dax(struct inode *inode)
+{
+ if (!test_opt(inode->i_sb, DAX))
+ return false;
+ if (!S_ISREG(inode->i_mode))
+ return false;
+ if (ext4_should_journal_data(inode))
+ return false;
+ if (ext4_has_inline_data(inode))
+ return false;
+ if (ext4_encrypted_inode(inode))
+ return false;
+ return true;
+}
+
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
@@ -4588,9 +4641,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
- !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
- !(flags & EXT4_ENCRYPT_FL))
+ if (ext4_should_use_dax(inode))
new_fl |= S_DAX;
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
@@ -5966,11 +6017,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
}
ext4_set_aops(inode);
- /*
- * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
- * E.g. S_DAX may get cleared / set.
- */
- ext4_set_inode_flags(inode);
jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
@@ -6106,70 +6152,3 @@ int ext4_filemap_fault(struct vm_fault *vmf)
return err;
}
-
-/*
- * Find the first extent at or after @lblk in an inode that is not a hole.
- * Search for @map_len blocks at most. The extent is returned in @result.
- *
- * The function returns 1 if we found an extent. The function returns 0 in
- * case there is no extent at or after @lblk and in that case also sets
- * @result->es_len to 0. In case of error, the error code is returned.
- */
-int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
- unsigned int map_len, struct extent_status *result)
-{
- struct ext4_map_blocks map;
- struct extent_status es = {};
- int ret;
-
- map.m_lblk = lblk;
- map.m_len = map_len;
-
- /*
- * For non-extent based files this loop may iterate several times since
- * we do not determine full hole size.
- */
- while (map.m_len > 0) {
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret < 0)
- return ret;
- /* There's extent covering m_lblk? Just return it. */
- if (ret > 0) {
- int status;
-
- ext4_es_store_pblock(result, map.m_pblk);
- result->es_lblk = map.m_lblk;
- result->es_len = map.m_len;
- if (map.m_flags & EXT4_MAP_UNWRITTEN)
- status = EXTENT_STATUS_UNWRITTEN;
- else
- status = EXTENT_STATUS_WRITTEN;
- ext4_es_store_status(result, status);
- return 1;
- }
- ext4_es_find_delayed_extent_range(inode, map.m_lblk,
- map.m_lblk + map.m_len - 1,
- &es);
- /* Is delalloc data before next block in extent tree? */
- if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
- ext4_lblk_t offset = 0;
-
- if (es.es_lblk < lblk)
- offset = lblk - es.es_lblk;
- result->es_lblk = es.es_lblk + offset;
- ext4_es_store_pblock(result,
- ext4_es_pblock(&es) + offset);
- result->es_len = es.es_len - offset;
- ext4_es_store_status(result, ext4_es_status(&es));
-
- return 1;
- }
- /* There's a hole at m_lblk, advance us after it */
- map.m_lblk += map.m_len;
- map_len -= map.m_len;
- map.m_len = map_len;
- cond_resched();
- }
- result->es_len = 0;
- return 0;
-}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 75d83471f65c..b7558f292420 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -15,6 +15,7 @@
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/quotaops.h>
+#include <linux/random.h>
#include <linux/uuid.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
@@ -99,7 +100,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
int err;
struct inode *inode_bl;
struct ext4_inode_info *ei_bl;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode))
return -EINVAL;
@@ -158,10 +158,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
inode->i_ctime = inode_bl->i_ctime = current_time(inode);
- spin_lock(&sbi->s_next_gen_lock);
- inode->i_generation = sbi->s_next_generation++;
- inode_bl->i_generation = sbi->s_next_generation++;
- spin_unlock(&sbi->s_next_gen_lock);
+ inode->i_generation = prandom_u32();
+ inode_bl->i_generation = prandom_u32();
ext4_discard_preallocations(inode);
@@ -291,10 +289,20 @@ flags_err:
if (err)
goto flags_out;
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ /*
+ * Changes to the journaling mode can cause unsafe changes to
+ * S_DAX if we are using the DAX mount option.
+ */
+ if (test_opt(inode->i_sb, DAX)) {
+ err = -EBUSY;
+ goto flags_out;
+ }
+
err = ext4_change_inode_journal_flag(inode, jflag);
- if (err)
- goto flags_out;
+ if (err)
+ goto flags_out;
+ }
if (migrate) {
if (flags & EXT4_EXTENTS_FL)
err = ext4_ext_migrate(inode);
@@ -862,12 +870,6 @@ group_add_out:
int err = 0, err2 = 0;
ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
- if (ext4_has_feature_bigalloc(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not (yet) supported with bigalloc");
- return -EOPNOTSUPP;
- }
-
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
sizeof(__u64))) {
return -EFAULT;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 701085620cd8..d9f8b90a93ed 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4994,8 +4994,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_buddy e4b;
- int err = 0, ret, blk_free_count;
- ext4_grpblk_t blocks_freed;
+ int err = 0, ret, free_clusters_count;
+ ext4_grpblk_t clusters_freed;
+ ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
+ ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
+ unsigned long cluster_count = last_cluster - first_cluster + 1;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
@@ -5007,8 +5010,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* Check to see if we are freeing blocks across a group
* boundary.
*/