summaryrefslogtreecommitdiffstats
path: root/fs/reiserfs/reiserfs.h
diff options
context:
space:
mode:
Diffstat (limited to 'fs/reiserfs/reiserfs.h')
-rw-r--r--fs/reiserfs/reiserfs.h2923
1 files changed, 2923 insertions, 0 deletions
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
new file mode 100644
index 000000000000..a59d27126338
--- /dev/null
+++ b/fs/reiserfs/reiserfs.h
@@ -0,0 +1,2923 @@
+/*
+ * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
+ */
+
+#include <linux/reiserfs_fs.h>
+
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/bug.h>
+#include <linux/workqueue.h>
+#include <asm/unaligned.h>
+#include <linux/bitops.h>
+#include <linux/proc_fs.h>
+#include <linux/buffer_head.h>
+
+/* the 32 bit compat definitions with int argument */
+#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int)
+#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
+#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
+
+struct reiserfs_journal_list;
+
+/** bitmasks for i_flags field in reiserfs-specific part of inode */
+typedef enum {
+ /** this says what format of key do all items (but stat data) of
+ an object have. If this is set, that format is 3.6 otherwise
+ - 3.5 */
+ i_item_key_version_mask = 0x0001,
+ /** If this is unset, object has 3.5 stat data, otherwise, it has
+ 3.6 stat data with 64bit size, 32bit nlink etc. */
+ i_stat_data_version_mask = 0x0002,
+ /** file might need tail packing on close */
+ i_pack_on_close_mask = 0x0004,
+ /** don't pack tail of file */
+ i_nopack_mask = 0x0008,
+ /** If those is set, "safe link" was created for this file during
+ truncate or unlink. Safe link is used to avoid leakage of disk
+ space on crash with some files open, but unlinked. */
+ i_link_saved_unlink_mask = 0x0010,
+ i_link_saved_truncate_mask = 0x0020,
+ i_has_xattr_dir = 0x0040,
+ i_data_log = 0x0080,
+} reiserfs_inode_flags;
+
+struct reiserfs_inode_info {
+ __u32 i_key[4]; /* key is still 4 32 bit integers */
+ /** transient inode flags that are never stored on disk. Bitmasks
+ for this field are defined above. */
+ __u32 i_flags;
+
+ __u32 i_first_direct_byte; // offset of first byte stored in direct item.
+
+ /* copy of persistent inode flags read from sd_attrs. */
+ __u32 i_attrs;
+
+ int i_prealloc_block; /* first unused block of a sequence of unused blocks */
+ int i_prealloc_count; /* length of that sequence */
+ struct list_head i_prealloc_list; /* per-transaction list of inodes which
+ * have preallocated blocks */
+
+ unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks
+ * for the contents of this directory should be
+ * displaced */
+
+ /* we use these for fsync or O_SYNC to decide which transaction
+ ** needs to be committed in order for this inode to be properly
+ ** flushed */
+ unsigned int i_trans_id;
+ struct reiserfs_journal_list *i_jl;
+ atomic_t openers;
+ struct mutex tailpack;
+#ifdef CONFIG_REISERFS_FS_XATTR
+ struct rw_semaphore i_xattr_sem;
+#endif
+ struct inode vfs_inode;
+};
+
+typedef enum {
+ reiserfs_attrs_cleared = 0x00000001,
+} reiserfs_super_block_flags;
+
+/* struct reiserfs_super_block accessors/mutators
+ * since this is a disk structure, it will always be in
+ * little endian format. */
+#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count))
+#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
+#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks))
+#define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v))
+#define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block))
+#define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v))
+
+#define sb_jp_journal_1st_block(sbp) \
+ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block))
+#define set_sb_jp_journal_1st_block(sbp,v) \
+ ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v))
+#define sb_jp_journal_dev(sbp) \
+ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev))
+#define set_sb_jp_journal_dev(sbp,v) \
+ ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v))
+#define sb_jp_journal_size(sbp) \
+ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size))
+#define set_sb_jp_journal_size(sbp,v) \
+ ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v))
+#define sb_jp_journal_trans_max(sbp) \
+ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max))
+#define set_sb_jp_journal_trans_max(sbp,v) \
+ ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v))
+#define sb_jp_journal_magic(sbp) \
+ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic))
+#define set_sb_jp_journal_magic(sbp,v) \
+ ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v))
+#define sb_jp_journal_max_batch(sbp) \
+ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch))
+#define set_sb_jp_journal_max_batch(sbp,v) \
+ ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v))
+#define sb_jp_jourmal_max_commit_age(sbp) \
+ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age))
+#define set_sb_jp_journal_max_commit_age(sbp,v) \
+ ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v))
+
+#define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize))
+#define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v))
+#define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize))
+#define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v))
+#define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize))
+#define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v))
+#define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state))
+#define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v))
+#define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state))
+#define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v))
+#define sb_hash_function_code(sbp) \
+ (le32_to_cpu((sbp)->s_v1.s_hash_function_code))
+#define set_sb_hash_function_code(sbp,v) \
+ ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v))
+#define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height))
+#define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v))
+#define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr))
+#define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v))
+#define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version))
+#define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v))
+
+#define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count))
+#define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v))
+
+#define sb_reserved_for_journal(sbp) \
+ (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal))
+#define set_sb_reserved_for_journal(sbp,v) \
+ ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v))
+
+/* LOGGING -- */
+
+/* These all interelate for performance.
+**
+** If the journal block count is smaller than n transactions, you lose speed.
+** I don't know what n is yet, I'm guessing 8-16.
+**
+** typical transaction size depends on the application, how often fsync is
+** called, and how many metadata blocks you dirty in a 30 second period.
+** The more small files (<16k) you use, the larger your transactions will
+** be.
+**
+** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
+** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough
+** to prevent wrapping before dirty meta blocks get to disk.
+**
+** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
+** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
+**
+** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
+**
+*/
+
+/* don't mess with these for a while */
+ /* we have a node size define somewhere in reiserfs_fs.h. -Hans */
+#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */
+#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */
+#define JOURNAL_HASH_SIZE 8192
+#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */
+
+/* One of these for every block in every transaction
+** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a
+** hash of all the in memory transactions.
+** next and prev are used by the current transaction (journal_hash).
+** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash
+** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging
+** to a given transaction.
+*/
+struct reiserfs_journal_cnode {
+ struct buffer_head *bh; /* real buffer head */
+ struct super_block *sb; /* dev of real buffer head */
+ __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */
+ unsigned long state;
+ struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */
+ struct reiserfs_journal_cnode *next; /* next in transaction list */
+ struct reiserfs_journal_cnode *prev; /* prev in transaction list */
+ struct reiserfs_journal_cnode *hprev; /* prev in hash list */
+ struct reiserfs_journal_cnode *hnext; /* next in hash list */
+};
+
+struct reiserfs_bitmap_node {
+ int id;
+ char *data;
+ struct list_head list;
+};
+
+struct reiserfs_list_bitmap {
+ struct reiserfs_journal_list *journal_list;
+ struct reiserfs_bitmap_node **bitmaps;
+};
+
+/*
+** one of these for each transaction. The most important part here is the j_realblock.
+** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
+** real buffer heads dirty once all the commits hit the disk,
+** and to make sure every real block in a transaction is on disk before allowing the log area
+** to be overwritten */
+struct reiserfs_journal_list {
+ unsigned long j_start;
+ unsigned long j_state;
+ unsigned long j_len;
+ atomic_t j_nonzerolen;
+ atomic_t j_commit_left;
+ atomic_t j_older_commits_done; /* all commits older than this on disk */
+ struct mutex j_commit_mutex;
+ unsigned int j_trans_id;
+ time_t j_timestamp;
+ struct reiserfs_list_bitmap *j_list_bitmap;
+ struct buffer_head *j_commit_bh; /* commit buffer head */
+ struct reiserfs_journal_cnode *j_realblock;
+ struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */
+ /* time ordered list of all active transactions */
+ struct list_head j_list;
+
+ /* time ordered list of all transactions we haven't tried to flush yet */
+ struct list_head j_working_list;
+
+ /* list of tail conversion targets in need of flush before commit */
+ struct list_head j_tail_bh_list;
+ /* list of data=ordered buffers in need of flush before commit */
+ struct list_head j_bh_list;
+ int j_refcount;
+};
+
+struct reiserfs_journal {
+ struct buffer_head **j_ap_blocks; /* journal blocks on disk */
+ struct reiserfs_journal_cnode *j_last; /* newest journal block */
+ struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */
+
+ struct block_device *j_dev_bd;
+ fmode_t j_dev_mode;
+ int j_1st_reserved_block; /* first block on s_dev of reserved area journal */
+
+ unsigned long j_state;
+ unsigned int j_trans_id;
+ unsigned long j_mount_id;
+ unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */
+ unsigned long j_len; /* length of current waiting commit */
+ unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */
+ atomic_t j_wcount; /* count of writers for current commit */
+ unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */
+ unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */
+ unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */
+ struct buffer_head *j_header_bh;
+
+ time_t j_trans_start_time; /* time this transaction started */
+ struct mutex j_mutex;
+ struct mutex j_flush_mutex;
+ wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */
+ atomic_t j_jlock; /* lock for j_join_wait */
+ int j_list_bitmap_index; /* number of next list bitmap to use */
+ int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */
+ int j_next_full_flush; /* next journal_end will flush all journal list */
+ int j_next_async_flush; /* next journal_end will flush all async commits */
+
+ int j_cnode_used; /* number of cnodes on the used list */
+ int j_cnode_free; /* number of cnodes on the free list */
+
+ unsigned int j_trans_max; /* max number of blocks in a transaction. */
+ unsigned int j_max_batch; /* max number of blocks to batch into a trans */
+ unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */
+ unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */
+ unsigned int j_default_max_commit_age; /* the default for the max commit age */
+
+ struct reiserfs_journal_cnode *j_cnode_free_list;
+ struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */
+
+ struct reiserfs_journal_list *j_current_jl;
+ int j_free_bitmap_nodes;
+ int j_used_bitmap_nodes;
+
+ int j_num_lists; /* total number of active transactions */
+ int j_num_work_lists; /* number that need attention from kreiserfsd */
+
+ /* debugging to make sure things are flushed in order */
+ unsigned int j_last_flush_id;
+
+ /* debugging to make sure things are committed in order */
+ unsigned int j_last_commit_id;
+
+ struct list_head j_bitmap_nodes;
+ struct list_head j_dirty_buffers;
+ spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */
+
+ /* list of all active transactions */
+ struct list_head j_journal_list;
+ /* lists that haven't been touched by writeback attempts */
+ struct list_head j_working_list;
+
+ struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */
+ struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */
+ struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all
+ the transactions */
+ struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */
+ int j_persistent_trans;
+ unsigned long j_max_trans_size;
+ unsigned long j_max_batch_size;
+
+ int j_errno;
+
+ /* when flushing ordered buffers, throttle new ordered writers */
+ struct delayed_work j_work;
+ struct super_block *j_work_sb;
+ atomic_t j_async_throttle;
+};
+
+enum journal_state_bits {
+ J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */
+ J_WRITERS_QUEUED, /* set when log is full due to too many writers */
+ J_ABORTED, /* set when log is aborted */
+};
+
+#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */
+
+typedef __u32(*hashf_t) (const signed char *, int);
+
+struct reiserfs_bitmap_info {
+ __u32 free_count;
+};
+
+struct proc_dir_entry;
+
+#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO )
+typedef unsigned long int stat_cnt_t;
+typedef struct reiserfs_proc_info_data {
+ spinlock_t lock;
+ int exiting;
+ int max_hash_collisions;
+
+ stat_cnt_t breads;
+ stat_cnt_t bread_miss;
+ stat_cnt_t search_by_key;
+ stat_cnt_t search_by_key_fs_changed;
+ stat_cnt_t search_by_key_restarted;
+
+ stat_cnt_t insert_item_restarted;
+ stat_cnt_t paste_into_item_restarted;
+ stat_cnt_t cut_from_item_restarted;
+ stat_cnt_t delete_solid_item_restarted;
+ stat_cnt_t delete_item_restarted;
+
+ stat_cnt_t leaked_oid;
+ stat_cnt_t leaves_removable;
+
+ /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */
+ stat_cnt_t balance_at[5]; /* XXX */
+ /* sbk == search_by_key */
+ stat_cnt_t sbk_read_at[5]; /* XXX */
+ stat_cnt_t sbk_fs_changed[5];
+ stat_cnt_t sbk_restarted[5];
+ stat_cnt_t items_at[5]; /* XXX */
+ stat_cnt_t free_at[5]; /* XXX */
+ stat_cnt_t can_node_be_removed[5]; /* XXX */
+ long int lnum[5]; /* XXX */
+ long int rnum[5]; /* XXX */
+ long int lbytes[5]; /* XXX */
+ long int rbytes[5]; /* XXX */
+ stat_cnt_t get_neighbors[5];
+ stat_cnt_t get_neighbors_restart[5];
+ stat_cnt_t need_l_neighbor[5];
+ stat_cnt_t need_r_neighbor[5];
+
+ stat_cnt_t free_block;
+ struct __scan_bitmap_stats {
+ stat_cnt_t call;
+ stat_cnt_t wait;
+ stat_cnt_t bmap;
+ stat_cnt_t retry;
+ stat_cnt_t in_journal_hint;
+ stat_cnt_t in_journal_nohint;
+ stat_cnt_t stolen;
+ } scan_bitmap;
+ struct __journal_stats {
+ stat_cnt_t in_journal;
+ stat_cnt_t in_journal_bitmap;
+ stat_cnt_t in_journal_reusable;
+ stat_cnt_t lock_journal;
+ stat_cnt_t lock_journal_wait;
+ stat_cnt_t journal_being;
+ stat_cnt_t journal_relock_writers;
+ stat_cnt_t journal_relock_wcount;
+ stat_cnt_t mark_dirty;
+ stat_cnt_t mark_dirty_already;
+ stat_cnt_t mark_dirty_notjournal;
+ stat_cnt_t restore_prepared;
+ stat_cnt_t prepare;
+ stat_cnt_t prepare_retry;
+ } journal;
+} reiserfs_proc_info_data_t;
+#else
+typedef struct reiserfs_proc_info_data {
+} reiserfs_proc_info_data_t;
+#endif
+
+/* reiserfs union of in-core super block data */
+struct reiserfs_sb_info {
+ struct buffer_head *s_sbh; /* Buffer containing the super block */
+ /* both the comment and the choice of
+ name are unclear for s_rs -Hans */
+ struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */
+ struct reiserfs_bitmap_info *s_ap_bitmap;
+ struct reiserfs_journal *s_journal; /* pointer to journal information */
+ unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
+
+ /* Serialize writers access, replace the old bkl */
+ struct mutex lock;
+ /* Owner of the lock (can be recursive) */
+ struct task_struct *lock_owner;
+ /* Depth of the lock, start from -1 like the bkl */
+ int lock_depth;
+
+ /* Comment? -Hans */
+ void (*end_io_handler) (struct buffer_head *, int);
+ hashf_t s_hash_function; /* pointer to function which is used
+ to sort names in directory. Set on
+ mount */
+ unsigned long s_mount_opt; /* reiserfs's mount options are set
+ here (currently - NOTAIL, NOLOG,
+ REPLAYONLY) */
+
+ struct { /* This is a structure that describes block allocator options */
+ unsigned long bits; /* Bitfield for enable/disable kind of options */
+ unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */
+ int border; /* percentage of disk, border takes */
+ int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */
+ int preallocsize; /* Number of blocks we try to prealloc when file
+ reaches preallocmin size (in blocks) or
+ prealloc_list is empty. */
+ } s_alloc_options;
+
+ /* Comment? -Hans */
+ wait_queue_head_t s_wait;
+ /* To be obsoleted soon by per buffer seals.. -Hans */
+ atomic_t s_generation_counter; // increased by one every time the
+ // tree gets re-balanced
+ unsigned long s_properties; /* File system properties. Currently holds
+ on-disk FS format */
+
+ /* session statistics */
+ int s_disk_reads;
+ int s_disk_writes;
+ int s_fix_nodes;
+ int s_do_balance;
+ int s_unneeded_left_neighbor;
+ int s_good_search_by_key_reada;
+ int s_bmaps;
+ int s_bmaps_without_search;
+ int s_direct2indirect;
+ int s_indirect2direct;
+ /* set up when it's ok for reiserfs_read_inode2() to read from
+ disk inode with nlink==0. Currently this is only used during
+ finish_unfinished() processing at mount time */
+ int s_is_unlinked_ok;
+ reiserfs_proc_info_data_t s_proc_info_data;
+ struct proc_dir_entry *procdir;
+ int reserved_blocks; /* amount of blocks reserved for further allocations */
+ spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */
+ struct dentry *priv_root; /* root of /.reiserfs_priv */
+ struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */
+ int j_errno;
+#ifdef CONFIG_QUOTA
+ char *s_qf_names[MAXQUOTAS];
+ int s_jquota_fmt;
+#endif
+ char *s_jdev; /* Stored jdev for mount option showing */
+#ifdef CONFIG_REISERFS_CHECK
+
+ struct tree_balance *cur_tb; /*
+ * Detects whether more than one
+ * copy of tb exists per superblock
+ * as a means of checking whether
+ * do_balance is executing concurrently
+ * against another tree reader/writer
+ * on a same mount point.
+ */
+#endif
+};
+
+/* Definitions of reiserfs on-disk properties: */
+#define REISERFS_3_5 0
+#define REISERFS_3_6 1
+#define REISERFS_OLD_FORMAT 2
+
+enum reiserfs_mount_options {
+/* Mount options */
+ REISERFS_LARGETAIL, /* large tails will be created in a session */
+ REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */
+ REPLAYONLY, /* replay journal and return 0. Use by fsck */
+ REISERFS_CONVERT, /* -o conv: causes conversion of old
+ format super block to the new
+ format. If not specified - old
+ partition will be dealt with in a
+ manner of 3.5.x */
+
+/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting
+** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option
+** is not required. If the normal autodection code can't determine which
+** hash to use (because both hashes had the same value for a file)
+** use this option to force a specific hash. It won't allow you to override
+** the existing hash on the FS, so if you have a tea hash disk, and mount
+** with -o hash=rupasov, the mount will fail.
+*/
+ FORCE_TEA_HASH, /* try to force tea hash on mount */
+ FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */
+ FORCE_R5_HASH, /* try to force rupasov hash on mount */
+ FORCE_HASH_DETECT, /* try to detect hash function on mount */
+
+ REISERFS_DATA_LOG,
+ REISERFS_DATA_ORDERED,
+ REISERFS_DATA_WRITEBACK,
+
+/* used for testing experimental features, makes benchmarking new
+ features with and without more convenient, should never be used by
+ users in any code shipped to users (ideally) */
+
+ REISERFS_NO_BORDER,
+ REISERFS_NO_UNHASHED_RELOCATION,
+ REISERFS_HASHED_RELOCATION,
+ REISERFS_ATTRS,
+ REISERFS_XATTRS_USER,
+ REISERFS_POSIXACL,
+ REISERFS_EXPOSE_PRIVROOT,
+ REISERFS_BARRIER_NONE,
+ REISERFS_BARRIER_FLUSH,
+
+ /* Actions on error */
+ REISERFS_ERROR_PANIC,
+ REISERFS_ERROR_RO,
+ REISERFS_ERROR_CONTINUE,
+
+ REISERFS_USRQUOTA, /* User quota option specified */
+ REISERFS_GRPQUOTA, /* Group quota option specified */
+
+ REISERFS_TEST1,
+ REISERFS_TEST2,
+ REISERFS_TEST3,
+ REISERFS_TEST4,
+ REISERFS_UNSUPPORTED_OPT,
+};
+
+#define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH))
+#define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH))
+#define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH))
+#define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT))
+#define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER))
+#define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
+#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
+#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4))
+
+#define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL))
+#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL))
+#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY))
+#define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS))
+#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5))
+#define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT))
+#define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG))
+#define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED))
+#define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
+#define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
+#define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
+#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
+#define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
+#define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
+#define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
+
+#define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC))
+#define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO))
+
+void reiserfs_file_buffer(struct buffer_head *bh, int list);
+extern struct file_system_type reiserfs_fs_type;
+int reiserfs_resize(struct super_block *, unsigned long);
+
+#define CARRY_ON 0
+#define SCHEDULE_OCCURRED 1
+
+#define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh)
+#define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal)
+#define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block)
+#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free)
+#define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap)
+
+#define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->)
+
+/* A safe version of the "bdevname", which returns the "s_id" field of
+ * a superblock or else "Null superblock" if the super block is NULL.
+ */
+static inline char *reiserfs_bdevname(struct super_block *s)
+{
+ return (s == NULL) ? "Null superblock" : s->s_id;
+}
+
+#define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal)))
+static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal
+ *journal)
+{
+ return test_bit(J_ABORTED, &journal->j_state);
+}
+
+/*
+ * Locking primitives. The write lock is a per superblock
+ * special mutex that has properties close to the Big Kernel Lock
+ * which was used in the previous locking scheme.
+ */
+void reiserfs_write_lock(struct super_block *s);
+void reiserfs_write_unlock(struct super_block *s);
+int reiserfs_write_lock_once(struct super_block *s);
+void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *s);
+#else
+static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
+#endif
+
+/*
+ * Several mutexes depend on the write lock.
+ * However sometimes we want to relax the write lock while we hold
+ * these mutexes, according to the release/reacquire on schedule()
+ * properties of the Bkl that were used.
+ * Reiserfs performances and locking were based on this scheme.
+ * Now that the write lock is a mutex and not the bkl anymore, doing so
+ * may result in a deadlock:
+ *
+ * A acquire write_lock
+ * A acquire j_commit_mutex
+ * A release write_lock and wait for something
+ * B acquire write_lock
+ * B can't acquire j_commit_mutex and sleep
+ * A can't acquire write lock anymore
+ * deadlock
+ *
+ * What we do here is avoiding such deadlock by playing the same game
+ * than the Bkl: if we can't acquire a mutex that depends on the write lock,
+ * we release the write lock, wait a bit and then retry.
+ *
+ * The mutexes concerned by this hack are:
+ * - The commit mutex of a journal list
+ * - The flush mutex
+ * - The journal lock
+ * - The inode mutex
+ */
+static inline void reiserfs_mutex_lock_safe(struct mutex *m,
+ struct super_block *s)
+{
+ reiserfs_lock_check_recursive(s);
+ reiserfs_write_unlock(s);
+ mutex_lock(m);
+ reiserfs_write_lock(s);
+}
+
+static inline void
+reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
+ struct super_block *s)
+{
+ reiserfs_lock_check_recursive(s);
+ reiserfs_write_unlock(s);
+ mutex_lock_nested(m, subclass);
+ reiserfs_write_lock(s);
+}
+
+static inline void
+reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
+{
+ reiserfs_lock_check_recursive(s);
+ reiserfs_write_unlock(s);
+ down_read(sem);
+ reiserfs_write_lock(s);
+}
+
+/*
+ * When we schedule, we usually want to also release the write lock,
+ * according to the previous bkl based locking scheme of reiserfs.
+ */
+static inline void reiserfs_cond_resched(struct super_block *s)
+{
+ if (need_resched()) {
+ reiserfs_write_unlock(s);
+ schedule();
+ reiserfs_write_lock(s);
+ }
+}
+
+struct fid;
+
+/* in reading the #defines, it may help to understand that they employ
+ the following abbreviations:
+
+ B = Buffer
+ I = Item header
+ H = Height within the tree (should be changed to LEV)
+ N = Number of the item in the node
+ STAT = stat data
+ DEH = Directory Entry Header
+ EC = Entry Count
+ E = Entry number
+ UL = Unsigned Long
+ BLKH = BLocK Header
+ UNFM = UNForMatted node
+ DC = Disk Child
+ P = Path
+
+ These #defines are named by concatenating these abbreviations,
+ where first comes the arguments, and last comes the return value,
+ of the macro.
+
+*/
+
+#define USE_INODE_GENERATION_COUNTER
+
+#define REISERFS_PREALLOCATE
+#define DISPLACE_NEW_PACKING_LOCALITIES
+#define PREALLOCATION_SIZE 9
+
+/* n must be power of 2 */
+#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
+
+// to be ok for alpha and others we have to align structures to 8 byte
+// boundary.
+// FIXME: do not change 4 by anything else: there is code which relies on that
+#define ROUND_UP(x) _ROUND_UP(x,8LL)
+
+/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug
+** messages.
+*/
+#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */
+
+void __reiserfs_warning(struct super_block *s, const char *id,
+ const char *func, const char *fmt, ...);
+#define reiserfs_warning(s, id, fmt, args...) \
+ __reiserfs_warning(s, id, __func__, fmt, ##args)
+/* assertions handling */
+
+/** always check a condition and panic if it's false. */
+#define __RASSERT(cond, scond, format, args...) \
+do { \
+ if (!(cond)) \
+ reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
+ __FILE__ ":%i:%s: " format "\n", \
+ in_interrupt() ? -1 : task_pid_nr(current), \
+ __LINE__, __func__ , ##args); \
+} while (0)
+
+#define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
+
+#if defined( CONFIG_REISERFS_CHECK )
+#define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args)
+#else
+#define RFALSE( cond, format, args... ) do {;} while( 0 )
+#endif
+
+#define CONSTF __attribute_const__
+/*
+ * Disk Data Structures
+ */
+
+/***************************************************************************/
+/* SUPER BLOCK */
+/***************************************************************************/
+
+/*
+ * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs
+ * the version in RAM is part of a larger structure containing fields never written to disk.
+ */
+#define UNSET_HASH 0 // read_super will guess about, what hash names
+ // in directories were sorted with
+#define TEA_HASH 1
+#define YURA_HASH 2
+#define R5_HASH 3
+#define DEFAULT_HASH R5_HASH
+
+struct journal_params {
+ __le32 jp_journal_1st_block; /* where does journal start from on its
+ * device */
+ __le32 jp_journal_dev; /* journal device st_rdev */
+ __le32 jp_journal_size; /* size of the journal */
+ __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */
+ __le32 jp_journal_magic; /* random value made on fs creation (this
+ * was sb_journal_block_count) */
+ __le32 jp_journal_max_batch; /* max number of blocks to batch into a
+ * trans */
+ __le32 jp_journal_max_commit_age; /* in seconds, how old can an async
+ * commit be */
+ __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction
+ * be */
+};
+
+/* this is the super from 3.5.X, where X >= 10 */
+struct reiserfs_super_block_v1 {
+ __le32 s_block_count; /* blocks count */
+ __le32 s_free_blocks; /* free blocks count */
+ __le32 s_root_block; /* root block number */
+ struct journal_params s_journal;
+ __le16 s_blocksize; /* block size */
+ __le16 s_oid_maxsize; /* max size of object id array, see
+ * get_objectid() commentary */
+ __le16 s_oid_cursize; /* current size of object id array */
+ __le16 s_umount_state; /* this is set to 1 when filesystem was
+ * umounted, to 2 - when not */
+ char s_magic[10]; /* reiserfs magic string indicates that
+ * file system is reiserfs:
+ * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
+ __le16 s_fs_state; /* it is set to used by fsck to mark which
+ * phase of rebuilding is done */
+ __le32 s_hash_function_code; /* indicate, what hash function is being use
+ * to sort names in a directory*/
+ __le16 s_tree_height; /* height of disk tree */
+ __le16 s_bmap_nr; /* amount of bitmap blocks needed to address
+ * each block of file system */
+ __le16 s_version; /* this field is only reliable on filesystem
+ * with non-standard journal */
+ __le16 s_reserved_for_journal; /* size in blocks of journal area on main
+ * device, we need to keep after
+ * making fs with non-standard journal */
+} __attribute__ ((__packed__));
+
+#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
+
+/* this is the on disk super block */
+struct reiserfs_super_block {
+ struct reiserfs_super_block_v1 s_v1;
+ __le32 s_inode_generation;
+ __le32 s_flags; /* Right now used only by inode-attributes, if enabled */
+ unsigned char s_uuid[16]; /* filesystem unique identifier */
+ unsigned char s_label[16]; /* filesystem volume label */
+ __le16 s_mnt_count; /* Count of mounts since last fsck */
+ __le16 s_max_mnt_count; /* Maximum mounts before check */
+ __le32 s_lastcheck; /* Timestamp of last fsck */
+ __le32 s_check_interval; /* Interval between checks */
+ char s_unused[76]; /* zero filled by mkreiserfs and
+ * reiserfs_convert_objectid_map_v1()
+ * so any additions must be updated
+ * there as well. */
+} __attribute__ ((__packed__));
+
+#define SB_SIZE (sizeof(struct reiserfs_super_block))
+
+#define REISERFS_VERSION_1 0
+#define REISERFS_VERSION_2 2
+
+// on-disk super block fields converted to cpu form
+#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
+#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
+#define SB_BLOCKSIZE(s) \
+ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize))
+#define SB_BLOCK_COUNT(s) \
+ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count))
+#define SB_FREE_BLOCKS(s) \
+ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks))
+#define SB_REISERFS_MAGIC(s) \
+ (SB_V1_DISK_SUPER_BLOCK(s)->s_magic)
+#define SB_ROOT_BLOCK(s) \
+ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block))
+#define SB_TREE_HEIGHT(s) \
+ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height))
+#define SB_REISERFS_STATE(s) \
+ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state))
+#define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version))
+#define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr))
+
+#define PUT_SB_BLOCK_COUNT(s, val) \
+ do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
+#define PUT_SB_FREE_BLOCKS(s, val) \
+ do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
+#define PUT_SB_ROOT_BLOCK(s, val) \
+ do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
+#define PUT_SB_TREE_HEIGHT(s, val) \
+ do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
+#define PUT_SB_REISERFS_STATE(s, val) \
+ do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0)
+#define PUT_SB_VERSION(s, val) \
+ do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
+#define PUT_SB_BMAP_NR(s, val) \
+ do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
+
+#define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal)
+#define SB_ONDISK_JOURNAL_SIZE(s) \
+ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size))
+#define SB_ONDISK_JOURNAL_1st_BLOCK(s) \
+ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block))
+#define SB_ONDISK_JOURNAL_DEVICE(s) \
+ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev))
+#define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \
+ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal))
+
+#define is_block_in_log_or_reserved_area(s, block) \
+ block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \
+ && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \
+ ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \
+ SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s)))
+
+int is_reiserfs_3_5(struct reiserfs_super_block *rs);
+int is_reiserfs_3_6(struct reiserfs_super_block *rs);
+int is_reiserfs_jr(struct reiserfs_super_block *rs);
+
+/* ReiserFS leaves the first 64k unused, so that partition labels have
+ enough space. If someone wants to write a fancy bootloader that
+ needs more than 64k, let us know, and this will be increased in size.
+ This number must be larger than than the largest block size on any
+ platform, or code will break. -Hans */
+#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
+#define REISERFS_FIRST_BLOCK unused_define
+#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
+
+/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */
+#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
+
+/* reiserfs internal error code (used by search_by_key and fix_nodes)) */
<