summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorGao Xiang <hsiangkao@aol.com>2019-08-23 05:36:59 +0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-08-24 14:20:10 +0200
commit47e4937a4a7ca4184fd282791dfee76c6799966a (patch)
treefc68338c13a00ac74ac9f1a838491bd3f7649c28 /drivers
parentf401441deda68326852560bf70d59e95f585bbb3 (diff)
erofs: move erofs out of staging
EROFS filesystem has been merged into linux-staging for a year. EROFS is designed to be a better solution of saving extra storage space with guaranteed end-to-end performance for read-only files with the help of reduced metadata, fixed-sized output compression and decompression inplace technologies. In the past year, EROFS was greatly improved by many people as a staging driver, self-tested, betaed by a large number of our internal users, successfully applied to almost all in-service HUAWEI smartphones as the part of EMUI 9.1 and proven to be stable enough to be moved out of staging. EROFS is a self-contained filesystem driver. Although there are still some TODOs to be more generic, we have a dedicated team actively keeping on working on EROFS in order to make it better with the evolution of Linux kernel as the other in-kernel filesystems. As Pavel suggested, it's better to do as one commit since git can do moves and all histories will be saved in this way. Let's promote it from staging and enhance it more actively as a "real" part of kernel for more wider scenarios! Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Pavel Machek <pavel@denx.de> Cc: David Sterba <dsterba@suse.cz> Cc: Amir Goldstein <amir73il@gmail.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Darrick J . Wong <darrick.wong@oracle.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Richard Weinberger <richard@nod.at> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Chao Yu <yuchao0@huawei.com> Cc: Miao Xie <miaoxie@huawei.com> Cc: Li Guifu <bluce.liguifu@huawei.com> Cc: Fang Wei <fangwei1@huawei.com> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Link: https://lore.kernel.org/r/20190822213659.5501-1-hsiangkao@aol.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/staging/Kconfig2
-rw-r--r--drivers/staging/Makefile1
-rw-r--r--drivers/staging/erofs/Documentation/filesystems/erofs.txt223
-rw-r--r--drivers/staging/erofs/Kconfig98
-rw-r--r--drivers/staging/erofs/Makefile13
-rw-r--r--drivers/staging/erofs/TODO46
-rw-r--r--drivers/staging/erofs/compress.h62
-rw-r--r--drivers/staging/erofs/data.c425
-rw-r--r--drivers/staging/erofs/decompressor.c360
-rw-r--r--drivers/staging/erofs/dir.c141
-rw-r--r--drivers/staging/erofs/erofs_fs.h310
-rw-r--r--drivers/staging/erofs/include/trace/events/erofs.h256
-rw-r--r--drivers/staging/erofs/inode.c334
-rw-r--r--drivers/staging/erofs/internal.h554
-rw-r--r--drivers/staging/erofs/namei.c253
-rw-r--r--drivers/staging/erofs/super.c671
-rw-r--r--drivers/staging/erofs/tagptr.h110
-rw-r--r--drivers/staging/erofs/utils.c335
-rw-r--r--drivers/staging/erofs/xattr.c705
-rw-r--r--drivers/staging/erofs/xattr.h94
-rw-r--r--drivers/staging/erofs/zdata.c1434
-rw-r--r--drivers/staging/erofs/zdata.h195
-rw-r--r--drivers/staging/erofs/zmap.c468
-rw-r--r--drivers/staging/erofs/zpvec.h159
24 files changed, 0 insertions, 7249 deletions
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 7c96a01eef6c..d972ec8e71fb 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -112,8 +112,6 @@ source "drivers/staging/gasket/Kconfig"
source "drivers/staging/axis-fifo/Kconfig"
-source "drivers/staging/erofs/Kconfig"
-
source "drivers/staging/fieldbus/Kconfig"
source "drivers/staging/kpc2000/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index fcaac9693b83..6018b9a4a077 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -46,7 +46,6 @@ obj-$(CONFIG_DMA_RALINK) += ralink-gdma/
obj-$(CONFIG_SOC_MT7621) += mt7621-dts/
obj-$(CONFIG_STAGING_GASKET_FRAMEWORK) += gasket/
obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/
-obj-$(CONFIG_EROFS_FS) += erofs/
obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/
obj-$(CONFIG_KPC2000) += kpc2000/
obj-$(CONFIG_ISDN_CAPI) += isdn/
diff --git a/drivers/staging/erofs/Documentation/filesystems/erofs.txt b/drivers/staging/erofs/Documentation/filesystems/erofs.txt
deleted file mode 100644
index 0eab600ca7ca..000000000000
--- a/drivers/staging/erofs/Documentation/filesystems/erofs.txt
+++ /dev/null
@@ -1,223 +0,0 @@
-Overview
-========
-
-EROFS file-system stands for Enhanced Read-Only File System. Different
-from other read-only file systems, it aims to be designed for flexibility,
-scalability, but be kept simple and high performance.
-
-It is designed as a better filesystem solution for the following scenarios:
- - read-only storage media or
-
- - part of a fully trusted read-only solution, which means it needs to be
- immutable and bit-for-bit identical to the official golden image for
- their releases due to security and other considerations and
-
- - hope to save some extra storage space with guaranteed end-to-end performance
- by using reduced metadata and transparent file compression, especially
- for those embedded devices with limited memory (ex, smartphone);
-
-Here is the main features of EROFS:
- - Little endian on-disk design;
-
- - Currently 4KB block size (nobh) and therefore maximum 16TB address space;
-
- - Metadata & data could be mixed by design;
-
- - 2 inode versions for different requirements:
- v1 v2
- Inode metadata size: 32 bytes 64 bytes
- Max file size: 4 GB 16 EB (also limited by max. vol size)
- Max uids/gids: 65536 4294967296
- File creation time: no yes (64 + 32-bit timestamp)
- Max hardlinks: 65536 4294967296
- Metadata reserved: 4 bytes 14 bytes
-
- - Support extended attributes (xattrs) as an option;
-
- - Support xattr inline and tail-end data inline for all files;
-
- - Support POSIX.1e ACLs by using xattrs;
-
- - Support transparent file compression as an option:
- LZ4 algorithm with 4 KB fixed-output compression for high performance;
-
-The following git tree provides the file system user-space tools under
-development (ex, formatting tool mkfs.erofs):
->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
-
-Bugs and patches are welcome, please kindly help us and send to the following
-linux-erofs mailing list:
->> linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
-
-Note that EROFS is still working in progress as a Linux staging driver,
-Cc the staging mailing list as well is highly recommended:
->> Linux Driver Project Developer List <devel@driverdev.osuosl.org>
-
-Mount options
-=============
-
-fault_injection=%d Enable fault injection in all supported types with
- specified injection rate. Supported injection type:
- Type_Name Type_Value
- FAULT_KMALLOC 0x000000001
- FAULT_READ_IO 0x000000002
-(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled
- by default if CONFIG_EROFS_FS_XATTR is selected.
-(no)acl Setup POSIX Access Control List. Note: acl is enabled
- by default if CONFIG_EROFS_FS_POSIX_ACL is selected.
-cache_strategy=%s Select a strategy for cached decompression from now on:
- disabled: In-place I/O decompression only;
- readahead: Cache the last incomplete compressed physical
- cluster for further reading. It still does
- in-place I/O decompression for the rest
- compressed physical clusters;
- readaround: Cache the both ends of incomplete compressed
- physical clusters for further reading.
- It still does in-place I/O decompression
- for the rest compressed physical clusters.
-
-Module parameters
-=================
-use_vmap=[0|1] Use vmap() instead of vm_map_ram() (default 0).
-
-On-disk details
-===============
-
-Summary
--------
-Different from other read-only file systems, an EROFS volume is designed
-to be as simple as possible:
-
- |-> aligned with the block size
- ____________________________________________________________
- | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data |
- |_|__|_|_____|__________|_____|______|__________|_____|______|
- 0 +1K
-
-All data areas should be aligned with the block size, but metadata areas
-may not. All metadatas can be now observed in two different spaces (views):
- 1. Inode metadata space
- Each valid inode should be aligned with an inode slot, which is a fixed
- value (32 bytes) and designed to be kept in line with v1 inode size.
-
- Each inode can be directly found with the following formula:
- inode offset = meta_blkaddr * block_size + 32 * nid
-
- |-> aligned with 8B
- |-> followed closely
- + meta_blkaddr blocks |-> another slot
- _____________________________________________________________________
- | ... | inode | xattrs | extents | data inline | ... | inode ...
- |________|_______|(optional)|(optional)|__(optional)_|_____|__________
- |-> aligned with the inode slot size
- . .
- . .
- . .
- . .
- . .
- . .
- .____________________________________________________|-> aligned with 4B
- | xattr_ibody_header | shared xattrs | inline xattrs |
- |____________________|_______________|_______________|
- |-> 12 bytes <-|->x * 4 bytes<-| .
- . . .
- . . .
- . . .
- ._______________________________.______________________.
- | id | id | id | id | ... | id | ent | ... | ent| ... |
- |____|____|____|____|______|____|_____|_____|____|_____|
- |-> aligned with 4B
- |-> aligned with 4B
-
- Inode could be 32 or 64 bytes, which can be distinguished from a common
- field which all inode versions have -- i_advise:
-
- __________________ __________________
- | i_advise | | i_advise |
- |__________________| |__________________|
- | ... | | ... |
- | | | |
- |__________________| 32 bytes | |
- | |
- |__________________| 64 bytes
-
- Xattrs, extents, data inline are followed by the corresponding inode with
- proper alignes, and they could be optional for different data mappings,
- _currently_ there are totally 3 valid data mappings supported:
-
- 1) flat file data without data inline (no extent);
- 2) fixed-output size data compression (must have extents);
- 3) flat file data with tail-end data inline (no extent);
-
- The size of the optional xattrs is indicated by i_xattr_count in inode
- header. Large xattrs or xattrs shared by many different files can be
- stored in shared xattrs metadata rather than inlined right after inode.
-
- 2. Shared xattrs metadata space
- Shared xattrs space is similar to the above inode space, started with
- a specific block indicated by xattr_blkaddr, organized one by one with
- proper align.
-
- Each share xattr can also be directly found by the following formula:
- xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
-
- |-> aligned by 4 bytes
- + xattr_blkaddr blocks |-> aligned with 4 bytes
- _________________________________________________________________________
- | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ...
- |________|_____________|_____________|_____|______________|_______________
-
-Directories
------------
-All directories are now organized in a compact on-disk format. Note that
-each directory block is divided into index and name areas in order to support
-random file lookup, and all directory entries are _strictly_ recorded in
-alphabetical order in order to support improved prefix binary search
-algorithm (could refer to the related source code).
-
- ___________________________
- / |
- / ______________|________________
- / / | nameoff1 | nameoffN-1
- ____________.______________._______________v________________v__________
-| dirent | dirent | ... | dirent | filename | filename | ... | filename |
-|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
- \ ^
- \ | * could have
- \ | trailing '\0'
- \________________________| nameoff0
-
- Directory block
-
-Note that apart from the offset of the first filename, nameoff0 also indicates
-the total number of directory entries in this block since it is no need to
-introduce another on-disk field at all.
-
-Compression
------------
-Currently, EROFS supports 4KB fixed-output clustersize transparent file
-compression, as illustrated below:
-
- |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
- clusterofs clusterofs clusterofs
- | | | logical data
-_________v_______________________________v_____________________v_______________
-... | . | | . | | . | ...
-____|____.________|_____________|________.____|_____________|__.__________|____
- |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
- size size size size size
- . . . .
- . . . .
- . . . .
- _______._____________._____________._____________._____________________
- ... | | | | ... physical data
- _______|_____________|_____________|_____________|_____________________
- |-> cluster <-|-> cluster <-|-> cluster <-|
- size size size
-
-Currently each on-disk physical cluster can contain 4KB (un)compressed data
-at most. For each logical cluster, there is a corresponding on-disk index to
-describe its cluster type, physical cluster address, etc.
-
-See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
-
diff --git a/drivers/staging/erofs/Kconfig b/drivers/staging/erofs/Kconfig
deleted file mode 100644
index 16316d1adca3..000000000000
--- a/drivers/staging/erofs/Kconfig
+++ /dev/null
@@ -1,98 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-config EROFS_FS
- tristate "EROFS filesystem support"
- depends on BLOCK
- help
- EROFS (Enhanced Read-Only File System) is a lightweight
- read-only file system with modern designs (eg. page-sized
- blocks, inline xattrs/data, etc.) for scenarios which need
- high-performance read-only requirements, e.g. Android OS
- for mobile phones and LIVECDs.
-
- It also provides fixed-sized output compression support,
- which improves storage density, keeps relatively higher
- compression ratios, which is more useful to achieve high
- performance for embedded devices with limited memory.
-
- If unsure, say N.
-
-config EROFS_FS_DEBUG
- bool "EROFS debugging feature"
- depends on EROFS_FS
- help
- Print debugging messages and enable more BUG_ONs which check
- filesystem consistency and find potential issues aggressively,
- which can be used for Android eng build, for example.
-
- For daily use, say N.
-
-config EROFS_FAULT_INJECTION
- bool "EROFS fault injection facility"
- depends on EROFS_FS
- help
- Test EROFS to inject faults such as ENOMEM, EIO, and so on.
- If unsure, say N.
-
-config EROFS_FS_XATTR
- bool "EROFS extended attributes"
- depends on EROFS_FS
- default y
- help
- Extended attributes are name:value pairs associated with inodes by
- the kernel or by users (see the attr(5) manual page, or visit
- <http://acl.bestbits.at/> for details).
-
- If unsure, say N.
-
-config EROFS_FS_POSIX_ACL
- bool "EROFS Access Control Lists"
- depends on EROFS_FS_XATTR
- select FS_POSIX_ACL
- default y
- help
- Posix Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
-
- To learn more about Access Control Lists, visit the POSIX ACLs for
- Linux website <http://acl.bestbits.at/>.
-
- If you don't know what Access Control Lists are, say N.
-
-config EROFS_FS_SECURITY
- bool "EROFS Security Labels"
- depends on EROFS_FS_XATTR
- default y
- help
- Security labels provide an access control facility to support Linux
- Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
- Linux. This option enables an extended attribute handler for file
- security labels in the erofs filesystem, so that it requires enabling
- the extended attribute support in advance.
-
- If you are not using a security module, say N.
-
-config EROFS_FS_ZIP
- bool "EROFS Data Compression Support"
- depends on EROFS_FS
- select LZ4_DECOMPRESS
- default y
- help
- Enable fixed-sized output compression for EROFS.
-
- If you don't want to enable compression feature, say N.
-
-config EROFS_FS_CLUSTER_PAGE_LIMIT
- int "EROFS Cluster Pages Hard Limit"
- depends on EROFS_FS_ZIP
- range 1 256
- default "1"
- help
- Indicates maximum # of pages of a compressed
- physical cluster.
-
- For example, if files in a image were compressed
- into 8k-unit, hard limit should not be configured
- less than 2. Otherwise, the image will be refused
- to mount on this kernel.
-
diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile
deleted file mode 100644
index 5cdae21cb5af..000000000000
--- a/drivers/staging/erofs/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-EROFS_VERSION = "1.0pre1"
-
-ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\"
-
-obj-$(CONFIG_EROFS_FS) += erofs.o
-# staging requirement: to be self-contained in its own directory
-ccflags-y += -I $(srctree)/$(src)/include
-erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
-erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
-
diff --git a/drivers/staging/erofs/TODO b/drivers/staging/erofs/TODO
deleted file mode 100644
index a8608b2f72bd..000000000000
--- a/drivers/staging/erofs/TODO
+++ /dev/null
@@ -1,46 +0,0 @@
-
-EROFS is still working in progress, thus it is not suitable
-for all productive uses. play at your own risk :)
-
-TODO List:
- - add the missing error handling code
- (mainly existed in xattr and decompression submodules);
-
- - finalize erofs ondisk format design (which means that
- minor on-disk revisions could happen later);
-
- - documentation and detailed technical analysis;
-
- - general code review and clean up
- (including confusing variable names and code snippets);
-
- - support larger compressed clustersizes for selection
- (currently erofs only works as expected with the page-sized
- compressed cluster configuration, usually 4KB);
-
- - support more lossless data compression algorithms
- in addition to LZ4 algorithms in VLE approach;
-
- - data deduplication and other useful features.
-
-The following git tree provides the file system user-space
-tools under development (ex, formatting tool mkfs.erofs):
->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
-
-The open-source development of erofs-utils is at the early stage.
-Contact the original author Li Guifu <bluce.liguifu@huawei.com> and
-the co-maintainer Fang Wei <fangwei1@huawei.com> for the latest news
-and more details.
-
-Code, suggestions, etc, are welcome. Please feel free to
-ask and send patches,
-
-To:
- linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
- Gao Xiang <gaoxiang25@huawei.com>
- Chao Yu <yuchao0@huawei.com>
-
-Cc: (for linux-kernel upstream patches)
- Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- linux-staging mailing list <devel@driverdev.osuosl.org>
-
diff --git a/drivers/staging/erofs/compress.h b/drivers/staging/erofs/compress.h
deleted file mode 100644
index 043013f9ef1b..000000000000
--- a/drivers/staging/erofs/compress.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/drivers/staging/erofs/compress.h
- *
- * Copyright (C) 2019 HUAWEI, Inc.
- * http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_FS_COMPRESS_H
-#define __EROFS_FS_COMPRESS_H
-
-#include "internal.h"
-
-enum {
- Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
- Z_EROFS_COMPRESSION_RUNTIME_MAX
-};
-
-struct z_erofs_decompress_req {
- struct super_block *sb;
- struct page **in, **out;
-
- unsigned short pageofs_out;
- unsigned int inputsize, outputsize;
-
- /* indicate the algorithm will be used for decompression */
- unsigned int alg;
- bool inplace_io, partial_decoding;
-};
-
-/*
- * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
- * used to mark temporary allocated pages from other
- * file/cached pages and NULL mapping pages.
- */
-#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
-
-/* check if a page is marked as staging */
-static inline bool z_erofs_page_is_staging(struct page *page)
-{
- return page->mapping == Z_EROFS_MAPPING_STAGING;
-}
-
-static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
- struct page *page)
-{
- if (!z_erofs_page_is_staging(page))
- return false;
-
- /* staging pages should not be used by others at the same time */
- if (page_ref_count(page) > 1)
- put_page(page);
- else
- list_add(&page->lru, pagepool);
- return true;
-}
-
-int z_erofs_decompress(struct z_erofs_decompress_req *rq,
- struct list_head *pagepool);
-
-#endif
-
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
deleted file mode 100644
index 72c4b4c5296b..000000000000
--- a/drivers/staging/erofs/data.c
+++ /dev/null
@@ -1,425 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/data.c
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- * http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "internal.h"
-#include <linux/prefetch.h>
-
-#include <trace/events/erofs.h>
-
-static inline void read_endio(struct bio *bio)
-{
- struct super_block *const sb = bio->bi_private;
- struct bio_vec *bvec;
- blk_status_t err = bio->bi_status;
- struct bvec_iter_all iter_all;
-
- if (time_to_inject(EROFS_SB(sb), FAULT_READ_IO)) {
- erofs_show_injection_info(FAULT_READ_IO);
- err = BLK_STS_IOERR;
- }
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
-
- /* page is already locked */
- DBG_BUGON(PageUptodate(page));
-
- if (unlikely(err))
- SetPageError(page);
- else
- SetPageUptodate(page);
-
- unlock_page(page);
- /* page could be reclaimed now */
- }
- bio_put(bio);
-}
-
-/* prio -- true is used for dir */
-struct page *__erofs_get_meta_page(struct super_block *sb,
- erofs_blk_t blkaddr, bool prio, bool nofail)
-{
- struct inode *const bd_inode = sb->s_bdev->bd_inode;
- struct address_space *const mapping = bd_inode->i_mapping;
- /* prefer retrying in the allocator to blindly looping below */
- const gfp_t gfp = mapping_gfp_constraint(mapping, ~__GFP_FS) |
- (nofail ? __GFP_NOFAIL : 0);
- unsigned int io_retries = nofail ? EROFS_IO_MAX_RETRIES_NOFAIL : 0;
- struct page *page;
- int err;
-
-repeat:
- page = find_or_create_page(mapping, blkaddr, gfp);
- if (unlikely(!page)) {
- DBG_BUGON(nofail);
- return ERR_PTR(-ENOMEM);
- }
- DBG_BUGON(!PageLocked(page));
-
- if (!PageUptodate(page)) {
- struct bio *bio;
-
- bio = erofs_grab_bio(sb, blkaddr, 1, sb, read_endio, nofail);
- if (IS_ERR(bio)) {
- DBG_BUGON(nofail);
- err = PTR_ERR(bio);
- goto err_out;
- }
-
- err = bio_add_page(bio, page, PAGE_SIZE, 0);
- if (unlikely(err != PAGE_SIZE)) {
- err = -EFAULT;
- goto err_out;
- }
-
- __submit_bio(bio, REQ_OP_READ,
- REQ_META | (prio ? REQ_PRIO : 0));
-
- lock_page(page);
-
- /* this page has been truncated by others */
- if (unlikely(page->mapping != mapping)) {
-unlock_repeat:
- unlock_page(page);
- put_page(page);
- goto repeat;
- }
-
- /* more likely a read error */
- if (unlikely(!PageUptodate(page))) {
- if (io_retries) {
- --io_retries;
- goto unlock_repeat;
- }
- err = -EIO;
- goto err_out;
- }
- }
- return page;
-
-err_out:
- unlock_page(page);
- put_page(page);
- return ERR_PTR(err);
-}
-
-static int erofs_map_blocks_flatmode(struct inode *inode,
- struct erofs_map_blocks *map,
- int flags)
-{
- int err = 0;
- erofs_blk_t nblocks, lastblk;
- u64 offset = map->m_la;
- struct erofs_vnode *vi = EROFS_V(inode);
-
- trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
-
- nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
- lastblk = nblocks - is_inode_flat_inline(inode);
-
- if (unlikely(offset >= inode->i_size)) {
- /* leave out-of-bound access unmapped */
- map->m_flags = 0;
- map->m_plen = 0;
- goto out;
- }
-
- /* there is no hole in flatmode */
- map->m_flags = EROFS_MAP_MAPPED;
-
- if (offset < blknr_to_addr(lastblk)) {
- map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
- map->m_plen = blknr_to_addr(lastblk) - offset;
- } else if (is_inode_flat_inline(inode)) {
- /* 2 - inode inline B: inode, [xattrs], inline last blk... */
- struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
-
- map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
- vi->xattr_isize + erofs_blkoff(map->m_la);
- map->m_plen = inode->i_size - offset;
-
- /* inline data should be located in one meta block */
- if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
- errln("inline data cross block boundary @ nid %llu",
- vi->nid);
- DBG_BUGON(1);
- err = -EFSCORRUPTED;
- goto err_out;
- }
-
- map->m_flags |= EROFS_MAP_META;
- } else {
- errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
- vi->nid, inode->i_size, map->m_la);
- DBG_BUGON(1);
- err = -EIO;
- goto err_out;
- }
-
-out:
- map->m_llen = map->m_plen;
-
-err_out:
- trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
- return err;
-}
-
-int erofs_map_blocks(struct inode *inode,
- struct erofs_map_blocks *map, int flags)
-{
- if (unlikely(is_inode_layout_compression(inode))) {
- int err = z_erofs_map_blocks_iter(inode, map, flags);
-
- if (map->mpage) {
- put_page(map->mpage);
- map->mpage = NULL;
- }
- return err;
- }
- return erofs_map_blocks_flatmode(inode, map, flags);
-}
-
-static inline struct bio *erofs_read_raw_page(struct bio *bio,
- struct address_space *mapping,
- struct page *page,
- erofs_off_t *last_block,
- unsigned int nblocks,
- bool ra)
-{
- struct inode *const inode = mapping->host;
- struct super_block *const sb = inode->i_sb;
- erofs_off_t current_block = (erofs_off_t)page->index;
- int err;
-
- DBG_BUGON(!nblocks);
-
- if (PageUptodate(page)) {
- err = 0;
- goto has_updated;
- }
-
- /* note that for readpage case, bio also equals to NULL */
- if (bio &&
- /* not continuous */
- *last_block + 1 != current_block) {
-submit_bio_retry:
- __submit_bio(bio, REQ_OP_READ, 0);
- bio = NULL;
- }
-
- if (!bio) {
- struct erofs_map_blocks map = {
- .m_la = blknr_to_addr(current_block),
- };
- erofs_blk_t blknr;
- unsigned int blkoff;
-
- err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
- if (unlikely(err))
- goto err_out;
-
- /* zero out the holed page */
- if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
-
- /* imply err = 0, see erofs_map_blocks */
- goto has_updated;
- }
-
- /* for RAW access mode, m_plen must be equal to m_llen */
- DBG_BUGON(map.m_plen != map.m_llen);
-
- blknr = erofs_blknr(map.m_pa);
- blkoff = erofs_blkoff(map.m_pa);
-
- /* deal with inline page */
- if (map.m_flags & EROFS_MAP_META) {
- void *vsrc, *vto;
- struct page *ipage;
-
- DBG_BUGON(map.m_plen > PAGE_SIZE);
-
- ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
-
- if (IS_ERR(ipage)) {
- err = PTR_ERR(ipage);
- goto err_out;
- }
-
- vsrc = kmap_atomic(ipage);
- vto = kmap_atomic(page);
- memcpy(vto, vsrc + blkoff, map.m_plen);
- memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
- kunmap_atomic(vto);
- kunmap_atomic(vsrc);
- flush_dcache_page(page);
-
- SetPageUptodate(page);
- /* TODO: could we unlock the page earlier? */
- unlock_page(ipage);
- put_page(ipage);
-
- /* imply err = 0, see erofs_map_blocks */
- goto has_updated;
- }
-
- /* pa must be block-aligned for raw reading */
- DBG_BUGON(erofs_blkoff(map.m_pa));
-
- /* max # of continuous pages */
- if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
- nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
- if (nblocks > BIO_MAX_PAGES)
- nblocks = BIO_MAX_PAGES;
-
- bio = erofs_grab_bio(sb, blknr, nblocks, sb,
- read_endio, false);
- if (IS_ERR(bio)) {
- err = PTR_ERR(bio);
- bio = NULL;
- goto err_out;
- }
- }
-
- err = bio_add_page(bio, page, PAGE_SIZE, 0);
- /* out of the extent or bio is full */
- if (err < PAGE_SIZE)
- goto submit_bio_retry;
-
- *last_block = current_block;
-
- /* shift in advance in case of it followed by too many gaps */
- if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
- /* err should reassign to 0 after submitting */
- err = 0;
- goto submit_bio_out;
- }
-
- return bio;
-
-err_out:
- /* for sync reading, set page error immediately */
- if (!ra) {
- SetPageError(page);
- ClearPageUptodate(page);
- }
-has_updated:
- unlock_page(page);
-
- /* if updated manually, continuous pages has a gap */
- if (bio)
-submit_bio_out:
- __submit_bio(bio, REQ_OP_READ, 0);
-
- return unlikely(err) ? ERR_PTR(err) : NULL;
-}
-
-/*
- * since we dont have write or truncate flows, so no inode
- * locking needs to be held at the moment.
- */
-static int erofs_raw_access_readpage(struct file *file, struct page *page)
-{
- erofs_off_t last_block;