summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-16 15:02:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-16 15:02:57 -0700
commit9c1be0c4712fe760d8969427ef91107e9c062d91 (patch)
tree01210aba49c120116bb99ba031ff86a525ffb63d
parent42fdd144a40f3afaccaa7ea538268bad3596439e (diff)
parent0d7eff873caaeac84de01a1acdca983d2c7ba3fe (diff)
Merge branch 'for_linus' of git://git.infradead.org/~dedekind/ubifs-2.6
* 'for_linus' of git://git.infradead.org/~dedekind/ubifs-2.6: UBIFS: include to compilation UBIFS: add new flash file system UBIFS: add brief documentation MAINTAINERS: add UBIFS section do_mounts: allow UBI root device name VFS: export sync_sb_inodes VFS: move inode_lock into sync_sb_inodes
-rw-r--r--Documentation/filesystems/ubifs.txt164
-rw-r--r--MAINTAINERS10
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/fs-writeback.c22
-rw-r--r--fs/ubifs/Kconfig72
-rw-r--r--fs/ubifs/Makefile9
-rw-r--r--fs/ubifs/budget.c731
-rw-r--r--fs/ubifs/commit.c677
-rw-r--r--fs/ubifs/compress.c253
-rw-r--r--fs/ubifs/debug.c2289
-rw-r--r--fs/ubifs/debug.h403
-rw-r--r--fs/ubifs/dir.c1240
-rw-r--r--fs/ubifs/file.c1275
-rw-r--r--fs/ubifs/find.c975
-rw-r--r--fs/ubifs/gc.c773
-rw-r--r--fs/ubifs/io.c914
-rw-r--r--fs/ubifs/ioctl.c204
-rw-r--r--fs/ubifs/journal.c1387
-rw-r--r--fs/ubifs/key.h533
-rw-r--r--fs/ubifs/log.c805
-rw-r--r--fs/ubifs/lprops.c1357
-rw-r--r--fs/ubifs/lpt.c2243
-rw-r--r--fs/ubifs/lpt_commit.c1648
-rw-r--r--fs/ubifs/master.c387
-rw-r--r--fs/ubifs/misc.h342
-rw-r--r--fs/ubifs/orphan.c958
-rw-r--r--fs/ubifs/recovery.c1519
-rw-r--r--fs/ubifs/replay.c1075
-rw-r--r--fs/ubifs/sb.c629
-rw-r--r--fs/ubifs/scan.c362
-rw-r--r--fs/ubifs/shrinker.c322
-rw-r--r--fs/ubifs/super.c1951
-rw-r--r--fs/ubifs/tnc.c2956
-rw-r--r--fs/ubifs/tnc_commit.c1103
-rw-r--r--fs/ubifs/tnc_misc.c494
-rw-r--r--fs/ubifs/ubifs-media.h745
-rw-r--r--fs/ubifs/ubifs.h1649
-rw-r--r--fs/ubifs/xattr.c581
-rw-r--r--include/linux/fs.h2
-rw-r--r--init/do_mounts.c3
41 files changed, 33055 insertions, 11 deletions
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
new file mode 100644
index 000000000000..540e9e7f59c5
--- /dev/null
+++ b/Documentation/filesystems/ubifs.txt
@@ -0,0 +1,164 @@
+Introduction
+=============
+
+UBIFS file-system stands for UBI File System. UBI stands for "Unsorted
+Block Images". UBIFS is a flash file system, which means it is designed
+to work with flash devices. It is important to understand, that UBIFS
+is completely different to any traditional file-system in Linux, like
+Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems
+which work with MTD devices, not block devices. The other Linux
+file-system of this class is JFFS2.
+
+To make it more clear, here is a small comparison of MTD devices and
+block devices.
+
+1 MTD devices represent flash devices and they consist of eraseblocks of
+ rather large size, typically about 128KiB. Block devices consist of
+ small blocks, typically 512 bytes.
+2 MTD devices support 3 main operations - read from some offset within an
+ eraseblock, write to some offset within an eraseblock, and erase a whole
+ eraseblock. Block devices support 2 main operations - read a whole
+ block and write a whole block.
+3 The whole eraseblock has to be erased before it becomes possible to
+ re-write its contents. Blocks may be just re-written.
+4 Eraseblocks become worn out after some number of erase cycles -
+ typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC
+ NAND flashes. Blocks do not have the wear-out property.
+5 Eraseblocks may become bad (only on NAND flashes) and software should
+ deal with this. Blocks on hard drives typically do not become bad,
+ because hardware has mechanisms to substitute bad blocks, at least in
+ modern LBA disks.
+
+It should be quite obvious why UBIFS is very different to traditional
+file-systems.
+
+UBIFS works on top of UBI. UBI is a separate software layer which may be
+found in drivers/mtd/ubi. UBI is basically a volume management and
+wear-leveling layer. It provides so called UBI volumes which is a higher
+level abstraction than a MTD device. The programming model of UBI devices
+is very similar to MTD devices - they still consist of large eraseblocks,
+they have read/write/erase operations, but UBI devices are devoid of
+limitations like wear and bad blocks (items 4 and 5 in the above list).
+
+In a sense, UBIFS is a next generation of JFFS2 file-system, but it is
+very different and incompatible to JFFS2. The following are the main
+differences.
+
+* JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on
+ top of UBI volumes.
+* JFFS2 does not have on-media index and has to build it while mounting,
+ which requires full media scan. UBIFS maintains the FS indexing
+ information on the flash media and does not require full media scan,
+ so it mounts many times faster than JFFS2.
+* JFFS2 is a write-through file-system, while UBIFS supports write-back,
+ which makes UBIFS much faster on writes.
+
+Similarly to JFFS2, UBIFS supports on-the-flight compression which makes
+it possible to fit quite a lot of data to the flash.
+
+Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts.
+It does not need stuff like ckfs.ext2. UBIFS automatically replays its
+journal and recovers from crashes, ensuring that the on-flash data
+structures are consistent.
+
+UBIFS scales logarithmically (most of the data structures it uses are
+trees), so the mount time and memory consumption do not linearly depend
+on the flash size, like in case of JFFS2. This is because UBIFS
+maintains the FS index on the flash media. However, UBIFS depends on
+UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly.
+Nevertheless, UBI/UBIFS scales considerably better than JFFS2.
+
+The authors of UBIFS believe, that it is possible to develop UBI2 which
+would scale logarithmically as well. UBI2 would support the same API as UBI,
+but it would be binary incompatible to UBI. So UBIFS would not need to be
+changed to use UBI2
+
+
+Mount options
+=============
+
+(*) == default.
+
+norm_unmount (*) commit on unmount; the journal is committed
+ when the file-system is unmounted so that the
+ next mount does not have to replay the journal
+ and it becomes very fast;
+fast_unmount do not commit on unmount; this option makes
+ unmount faster, but the next mount slower
+ because of the need to replay the journal.
+
+
+Quick usage instructions
+========================
+
+The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax,
+where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is
+UBI volume name.
+
+Mount volume 0 on UBI device 0 to /mnt/ubifs:
+$ mount -t ubifs ubi0_0 /mnt/ubifs
+
+Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume
+name):
+$ mount -t ubifs ubi0:rootfs /mnt/ubifs
+
+The following is an example of the kernel boot arguments to attach mtd0
+to UBI and mount volume "rootfs":
+ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
+
+
+Module Parameters for Debugging
+===============================
+
+When UBIFS has been compiled with debugging enabled, there are 3 module
+parameters that are available to control aspects of testing and debugging.
+The parameters are unsigned integers where each bit controls an option.
+The parameters are:
+
+debug_msgs Selects which debug messages to display, as follows:
+
+ Message Type Flag value
+
+ General messages 1
+ Journal messages 2
+ Mount messages 4
+ Commit messages 8
+ LEB search messages 16
+ Budgeting messages 32
+ Garbage collection messages 64
+ Tree Node Cache (TNC) messages 128
+ LEB properties (lprops) messages 256
+ Input/output messages 512
+ Log messages 1024
+ Scan messages 2048
+ Recovery messages 4096
+
+debug_chks Selects extra checks that UBIFS can do while running:
+
+ Check Flag value
+
+ General checks 1
+ Check Tree Node Cache (TNC) 2
+ Check indexing tree size 4
+ Check orphan area 8
+ Check old indexing tree 16
+ Check LEB properties (lprops) 32
+ Check leaf nodes and inodes 64
+
+debug_tsts Selects a mode of testing, as follows:
+
+ Test mode Flag value
+
+ Force in-the-gaps method 2
+ Failure mode for recovery testing 4
+
+For example, set debug_msgs to 5 to display General messages and Mount
+messages.
+
+
+References
+==========
+
+UBIFS documentation and FAQ/HOWTO at the MTD web site:
+http://www.linux-mtd.infradead.org/doc/ubifs.html
+http://www.linux-mtd.infradead.org/faq/ubifs.html
diff --git a/MAINTAINERS b/MAINTAINERS
index 633bda666e45..2e535e8de44b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2336,6 +2336,16 @@ L: linux-mtd@lists.infradead.org
W: http://www.linux-mtd.infradead.org/doc/jffs2.html
S: Maintained
+UBI FILE SYSTEM (UBIFS)
+P: Artem Bityutskiy
+M: dedekind@infradead.org
+P: Adrian Hunter
+M: ext-adrian.hunter@nokia.com
+L: linux-mtd@lists.infradead.org
+T: git git://git.infradead.org/~dedekind/ubifs-2.6.git
+W: http://www.linux-mtd.infradead.org/doc/ubifs.html
+S: Maintained
+
JFS FILESYSTEM
P: Dave Kleikamp
M: shaggy@austin.ibm.com
diff --git a/fs/Kconfig b/fs/Kconfig
index 84ab76a206a0..17216ba99c85 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1375,6 +1375,9 @@ config JFFS2_CMODE_FAVOURLZO
endchoice
+# UBIFS File system configuration
+source "fs/ubifs/Kconfig"
+
config CRAMFS
tristate "Compressed ROM file system support (cramfs)"
depends on BLOCK
diff --git a/fs/Makefile b/fs/Makefile
index 277b079dec9e..3b2178b4bb66 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -101,6 +101,7 @@ obj-$(CONFIG_NTFS_FS) += ntfs/
obj-$(CONFIG_UFS_FS) += ufs/
obj-$(CONFIG_EFS_FS) += efs/
obj-$(CONFIG_JFFS2_FS) += jffs2/
+obj-$(CONFIG_UBIFS_FS) += ubifs/
obj-$(CONFIG_AFFS_FS) += affs/
obj-$(CONFIG_ROMFS_FS) += romfs/
obj-$(CONFIG_QNX4FS_FS) += qnx4/
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ae45f77765c0..25adfc3c693a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -424,8 +424,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
* that it can be located for waiting on in __writeback_single_inode().
*
- * Called under inode_lock.
- *
* If `bdi' is non-zero then we're being asked to writeback a specific queue.
* This function assumes that the blockdev superblock's inodes are backed by
* a variety of queues, so all inodes are searched. For other superblocks,
@@ -441,11 +439,12 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* on the writer throttling path, and we get decent balancing between many
* throttled threads: we don't want them all piling up on inode_sync_wait.
*/
-static void
-sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
+void generic_sync_sb_inodes(struct super_block *sb,
+ struct writeback_control *wbc)
{
const unsigned long start = jiffies; /* livelock avoidance */
+ spin_lock(&inode_lock);
if (!wbc->for_kupdate || list_empty(&sb->s_io))
queue_io(sb, wbc->older_than_this);
@@ -524,8 +523,16 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
if (!list_empty(&sb->s_more_io))
wbc->more_io = 1;
}
+ spin_unlock(&inode_lock);
return; /* Leave any unwritten inodes on s_io */
}
+EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
+
+static void sync_sb_inodes(struct super_block *sb,
+ struct writeback_control *wbc)
+{
+ generic_sync_sb_inodes(sb, wbc);
+}
/*
* Start writeback of dirty pagecache data against all unlocked inodes.
@@ -565,11 +572,8 @@ restart:
* be unmounted by the time it is released.
*/
if (down_read_trylock(&sb->s_umount)) {
- if (sb->s_root) {
- spin_lock(&inode_lock);
+ if (sb->s_root)
sync_sb_inodes(sb, wbc);
- spin_unlock(&inode_lock);
- }
up_read(&sb->s_umount);
}
spin_lock(&sb_lock);
@@ -607,9 +611,7 @@ void sync_inodes_sb(struct super_block *sb, int wait)
(inodes_stat.nr_inodes - inodes_stat.nr_unused) +
nr_dirty + nr_unstable;
wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */
- spin_lock(&inode_lock);
sync_sb_inodes(sb, &wbc);
- spin_unlock(&inode_lock);
}
/*
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
new file mode 100644
index 000000000000..91ceeda7e5bf
--- /dev/null
+++ b/fs/ubifs/Kconfig
@@ -0,0 +1,72 @@
+config UBIFS_FS
+ tristate "UBIFS file system support"
+ select CRC16
+ select CRC32
+ select CRYPTO if UBIFS_FS_ADVANCED_COMPR
+ select CRYPTO if UBIFS_FS_LZO
+ select CRYPTO if UBIFS_FS_ZLIB
+ select CRYPTO_LZO if UBIFS_FS_LZO
+ select CRYPTO_DEFLATE if UBIFS_FS_ZLIB
+ depends on MTD_UBI
+ help
+ UBIFS is a file system for flash devices which works on top of UBI.
+
+config UBIFS_FS_XATTR
+ bool "Extended attributes support"
+ depends on UBIFS_FS
+ help
+ This option enables support of extended attributes.
+
+config UBIFS_FS_ADVANCED_COMPR
+ bool "Advanced compression options"
+ depends on UBIFS_FS
+ help
+ This option allows to explicitly choose which compressions, if any,
+ are enabled in UBIFS. Removing compressors means inbility to read
+ existing file systems.
+
+ If unsure, say 'N'.
+
+config UBIFS_FS_LZO
+ bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR
+ depends on UBIFS_FS
+ default y
+ help
+ LZO compressor is generally faster then zlib but compresses worse.
+ Say 'Y' if unsure.
+
+config UBIFS_FS_ZLIB
+ bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR
+ depends on UBIFS_FS
+ default y
+ help
+ Zlib copresses better then LZO but it is slower. Say 'Y' if unsure.
+
+# Debugging-related stuff
+config UBIFS_FS_DEBUG
+ bool "Enable debugging"
+ depends on UBIFS_FS
+ select DEBUG_FS
+ select KALLSYMS_ALL
+ help
+ This option enables UBIFS debugging.
+
+config UBIFS_FS_DEBUG_MSG_LVL
+ int "Default message level (0 = no extra messages, 3 = lots)"
+ depends on UBIFS_FS_DEBUG
+ default "0"
+ help
+ This controls the amount of debugging messages produced by UBIFS.
+ If reporting bugs, please try to have available a full dump of the
+ messages at level 1 while the misbehaviour was occurring. Level 2
+ may become necessary if level 1 messages were not enough to find the
+ bug. Generally Level 3 should be avoided.
+
+config UBIFS_FS_DEBUG_CHKS
+ bool "Enable extra checks"
+ depends on UBIFS_FS_DEBUG
+ help
+ If extra checks are enabled UBIFS will check the consistency of its
+ internal data structures during operation. However, UBIFS performance
+ is dramatically slower when this option is selected especially if the
+ file system is large.
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
new file mode 100644
index 000000000000..80e93c35e496
--- /dev/null
+++ b/fs/ubifs/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_UBIFS_FS) += ubifs.o
+
+ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o
+ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
+ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
+ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o
+
+ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o
+ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
new file mode 100644
index 000000000000..d81fb9ed2b8e
--- /dev/null
+++ b/fs/ubifs/budget.c
@@ -0,0 +1,731 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ * Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements the budgeting sub-system which is responsible for UBIFS
+ * space management.
+ *
+ * Factors such as compression, wasted space at the ends of LEBs, space in other
+ * journal heads, the effect of updates on the index, and so on, make it
+ * impossible to accurately predict the amount of space needed. Consequently
+ * approximations are used.
+ */
+
+#include "ubifs.h"
+#include <linux/writeback.h>
+#include <asm/div64.h>
+
+/*
+ * When pessimistic budget calculations say that there is no enough space,
+ * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
+ * or committing. The below constants define maximum number of times UBIFS
+ * repeats the operations.
+ */
+#define MAX_SHRINK_RETRIES 8
+#define MAX_GC_RETRIES 4
+#define MAX_CMT_RETRIES 2
+#define MAX_NOSPC_RETRIES 1
+
+/*
+ * The below constant defines amount of dirty pages which should be written
+ * back at when trying to shrink the liability.
+ */
+#define NR_TO_WRITE 16
+
+/**
+ * struct retries_info - information about re-tries while making free space.
+ * @prev_liability: previous liability
+ * @shrink_cnt: how many times the liability was shrinked
+ * @shrink_retries: count of liability shrink re-tries (increased when
+ * liability does not shrink)
+ * @try_gc: GC should be tried first
+ * @gc_retries: how many times GC was run
+ * @cmt_retries: how many times commit has been done
+ * @nospc_retries: how many times GC returned %-ENOSPC
+ *
+ * Since we consider budgeting to be the fast-path, and this structure has to
+ * be allocated on stack and zeroed out, we make it smaller using bit-fields.
+ */
+struct retries_info {
+ long long prev_liability;
+ unsigned int shrink_cnt;
+ unsigned int shrink_retries:5;
+ unsigned int try_gc:1;
+ unsigned int gc_retries:4;
+ unsigned int cmt_retries:3;
+ unsigned int nospc_retries:1;
+};
+
+/**
+ * shrink_liability - write-back some dirty pages/inodes.
+ * @c: UBIFS file-system description object
+ * @nr_to_write: how many dirty pages to write-back
+ *
+ * This function shrinks UBIFS liability by means of writing back some amount
+ * of dirty inodes and their pages. Returns the amount of pages which were
+ * written back. The returned value does not include dirty inodes which were
+ * synchronized.
+ *
+ * Note, this function synchronizes even VFS inodes which are locked
+ * (@i_mutex) by the caller of the budgeting function, because write-back does
+ * not touch @i_mutex.
+ */
+static int shrink_liability(struct ubifs_info *c, int nr_to_write)
+{
+ int nr_written;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ .range_end = LLONG_MAX,
+ .nr_to_write = nr_to_write,
+ };
+
+ generic_sync_sb_inodes(c->vfs_sb, &wbc);
+ nr_written = nr_to_write - wbc.nr_to_write;
+
+ if (!nr_written) {
+ /*
+ * Re-try again but wait on pages/inodes which are being
+ * written-back concurrently (e.g., by pdflush).
+ */
+ memset(&wbc, 0, sizeof(struct writeback_control));
+ wbc.sync_mode = WB_SYNC_ALL;
+ wbc.range_end = LLONG_MAX;
+ wbc.nr_to_write = nr_to_write;
+ generic_sync_sb_inodes(c->vfs_sb, &wbc);
+ nr_written = nr_to_write - wbc.nr_to_write;
+ }
+
+ dbg_budg("%d pages were written back", nr_written);
+ return nr_written;
+}
+
+
+/**
+ * run_gc - run garbage collector.
+ * @c: UBIFS file-system description object
+ *
+ * This function runs garbage collector to make some more free space. Returns
+ * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a
+ * negative error code in case of failure.
+ */
+static int run_gc(struct ubifs_info *c)
+{
+ int err, lnum;
+
+ /* Make some free space by garbage-collecting dirty space */
+ down_read(&c->commit_sem);
+ lnum = ubifs_garbage_collect(c, 1);
+ up_read(&c->commit_sem);
+ if (lnum < 0)
+ return lnum;
+
+ /* GC freed one LEB, return it to lprops */
+ dbg_budg("GC freed LEB %d", lnum);
+ err = ubifs_return_leb(c, lnum);
+ if (err)
+ return err;
+ return 0;
+}
+
+/**
+ * make_free_space - make more free space on the file-system.
+ * @c: UBIFS file-system description object
+ * @ri: information about previous invocations of this function
+ *
+ * This function is called when an operation cannot be budgeted because there
+ * is supposedly no free space. But in most cases there is some free space:
+ * o budgeting is pessimistic, so it always budgets more then it is actually
+ * needed, so shrinking the liability is one way to make free space - the
+ * cached data will take less space then it was budgeted for;
+ * o GC may turn some dark space into free space (budgeting treats dark space
+ * as not available);
+ * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs.
+ *
+ * So this function tries to do the above. Returns %-EAGAIN if some free space
+ * was presumably made and the caller has to re-try budgeting the operation.
+ * Returns %-ENOSPC if it couldn't do more free space, and other negative error
+ * codes on failures.
+ */
+static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
+{
+ int err;
+
+ /*
+ * If we have some dirty pages and inodes (liability), try to write
+ * them back unless this was tried too many times without effect
+ * already.
+ */
+ if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
+ long long liability;
+
+ spin_lock(&c->space_lock);
+ liability = c->budg_idx_growth + c->budg_data_growth +
+ c->budg_dd_growth;
+ spin_unlock(&c->space_lock);
+
+ if (ri->prev_liability >= liability) {
+ /* Liability does not shrink, next time try GC then */
+ ri->shrink_retries += 1;
+ if (ri->gc_retries < MAX_GC_RETRIES)
+ ri->try_gc = 1;
+ dbg_budg("liability did not shrink: retries %d of %d",
+ ri->shrink_retries, MAX_SHRINK_RETRIES);
+ }
+
+ dbg_budg("force write-back (count %d)", ri->shrink_cnt);
+ shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt);
+
+ ri->prev_liability = liability;
+ ri->shrink_cnt += 1;
+ return -EAGAIN;
+ }
+
+ /*
+ * Try to run garbage collector unless it was already tried too many
+ * times.
+ */
+ if (ri->gc_retries < MAX_GC_RETRIES) {
+ ri->gc_retries += 1;
+ dbg_budg("run GC, retries %d of %d",
+ ri->gc_retries, MAX_GC_RETRIES);
+
+ ri->try_gc = 0;
+ err = run_gc(c);
+ if (!err)
+ return -EAGAIN;
+
+ if (err == -EAGAIN) {
+ dbg_budg("GC asked to commit");
+ err = ubifs_run_commit(c);
+ if (err)
+ return err;
+ return -EAGAIN;
+ }
+
+ if (err != -ENOSPC)
+ return err;
+
+ /*
+ * GC could not make any progress. If this is the first time,
+ * then it makes sense to try to commit, because it might make
+ * some dirty space.
+ */
+ dbg_budg("GC returned -ENOSPC, retries %d",
+ ri->nospc_retries);
+ if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
+ return err;
+ ri->nospc_retries += 1;
+ }
+
+ /* Neither GC nor write-back helped, try to commit */
+ if (ri->cmt_retries < MAX_CMT_RETRIES) {
+ ri->cmt_retries += 1;
+ dbg_budg("run commit, retries %d of %d",
+ ri->cmt_retries, MAX_CMT_RETRIES);
+ err = ubifs_run_commit(c);
+ if (err)
+ return err;
+ return -EAGAIN;
+ }
+ return -ENOSPC;
+}
+
+/**
+ * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
+ * @c: UBIFS file-system description object
+ *
+ * This function calculates and returns the number of eraseblocks which should
+ * be kept for index usage.
+ */
+int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
+{
+ int ret;
+ uint64_t idx_size;
+
+ idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
+
+ /* And make sure we have twice the index size of space reserved */
+ idx_size <<= 1;
+
+ /*
+ * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
+ * pair, nor similarly the two variables for the new index size, so we
+ * have to do this costly 64-bit division on fast-path.
+ */
+ if (do_div(idx_size, c->leb_size - c->max_idx_node_sz))
+ ret = idx_size + 1;
+ else
+ ret = idx_size;
+ /*
+ * The index head is not available for the in-the-gaps method, so add an
+ * extra LEB to compensate.
+ */
+ ret += 1;
+ /*
+ * At present the index needs at least 2 LEBs: one for the index head
+ * and one for in-the-gaps method (which currently does not cater for
+ * the index head and so excludes it from consideration).
+ */
+ if (ret < 2)
+ ret = 2;
+ return ret;
+}
+
+/**
+ * ubifs_calc_available - calculate available FS space.
+ * @c: UBIFS file-system description object
+ * @min_idx_lebs: minimum number of LEBs reserved for the index
+ *
+ * This function calculates and returns amount of FS space available for use.
+ */
+long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
+{
+ int subtract_lebs;
+ long long available;
+
+ /*
+ * Force the amount available to the total size reported if the used
+ * space is zero.
+ */
+ if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
+ c->budg_data_growth + c->budg_dd_growth == 0) {
+ /* Do the same calculation as for c->block_cnt */
+ available = c->main_lebs - 2;
+ available *= c->leb_size - c->dark_wm;
+ return available;
+ }
+
+ available = c->main_bytes - c->lst.total_used;
+
+ /*
+ * Now 'available' contains theoretically available flash space
+ * assuming there is no index, so we have to subtract the space which
+ * is reserved for the index.
+ */
+ subtract_lebs = min_idx_lebs;
+
+ /* Take into account that GC reserves one LEB for its own needs */
+ subtract_lebs += 1;
+
+ /*
+ * The GC journal head LEB is not really accessible. And since
+ * different write types go to different heads, we may count only on
+ * one head's space.
+ */
+ subtract_lebs += c->jhead_cnt - 1;
+
+ /* We also reserve one LEB for deletions, which bypass budgeting */
+ subtract_lebs += 1;
+
+ available -= (long long)subtract_lebs * c->leb_size;
+
+ /* Subtract the dead space which is not available for use */
+ available -= c->lst.total_dead;
+
+ /*
+ * Subtract dark space, which might or might not be usable - it depends
+ * on the data which we have on the media and which will be written. If
+ * this is a lot of uncompressed or not-compressible data, the dark
+ * space cannot be used.
+ */
+ available -= c->lst.total_dark;
+
+ /*
+ * However, there is more dark space. The index may be bigger than
+ * @min_idx_lebs. Those extra LEBs are assumed to be available, but
+ * their dark space is not included in total_dark, so it is subtracted
+ * here.
+ */
+ if (c->lst.idx_lebs > min_idx_lebs) {
+ subtract_lebs = c->lst.idx_lebs - min_idx_lebs;
+ available -= subtract_lebs * c->dark_wm;
+ }
+
+ /* The calculations are rough and may end up with a negative number */
+ return available > 0 ? available : 0;
+}
+
+/**
+ * can_use_rp - check whether the user is allowed to use reserved pool.
+ * @c: UBIFS file-system description object
+ *
+ * UBIFS has so-called "reserved pool" which is flash space reserved
+ * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock.
+ * This function checks whether current user is allowed to use reserved pool.
+ * Returns %1 current user is allowed to use reserved pool and %0 otherwise.
+ */
+static int can_use_rp(struct ubifs_info *c)
+{
+ if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
+ (c->rp_gid != 0 && in_group_p(c->rp_gid)))
+ return 1;
+ return 0;
+}
+