13 files changed, 806 insertions, 462 deletions
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 7bc906677b0d..6c04f5eda135 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -175,31 +175,22 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 {
 	s64 lblock64 = lblock;
 	int rc = 0;
-	int take_locks;
 	xad_t xad;
 	s64 xaddr;
 	int xflag;
 	s32 xlen;
 
 	/*
-	 * If this is a special inode (imap, dmap)
-	 * the lock should already be taken
-	 */
-	take_locks = (JFS_IP(ip)->fileset != AGGREGATE_I);
-
-	/*
 	 * Take appropriate lock on inode
 	 */
-	if (take_locks) {
-		if (create)
-			IWRITE_LOCK(ip);
-		else
-			IREAD_LOCK(ip);
-	}
+	if (create)
+		IWRITE_LOCK(ip);
+	else
+		IREAD_LOCK(ip);
 
 	if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
-	    (xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)
-	     == 0) && xlen) {
+	    (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) &&
+	    xlen) {
 		if (xflag & XAD_NOTRECORDED) {
 			if (!create)
 				/*
@@ -258,12 +249,10 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 	/*
 	 * Release lock on inode
 	 */
-	if (take_locks) {
-		if (create)
-			IWRITE_UNLOCK(ip);
-		else
-			IREAD_UNLOCK(ip);
-	}
+	if (create)
+		IWRITE_UNLOCK(ip);
+	else
+		IREAD_UNLOCK(ip);
 	return rc;
 }
 
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index d86e467c6e42..69007fd546ef 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -471,6 +471,7 @@ dbUpdatePMap(struct inode *ipbmap,
 	struct metapage *mp;
 	struct jfs_log *log;
 	int lsn, difft, diffp;
+	unsigned long flags;
 
 	/* the blocks better be within the mapsize. */
 	if (blkno + nblocks > bmp->db_mapsize) {
@@ -504,6 +505,7 @@ dbUpdatePMap(struct inode *ipbmap,
 					   0);
 			if (mp == NULL)
 				return -EIO;
+			metapage_wait_for_io(mp);
 		}
 		dp = (struct dmap *) mp->data;
 
@@ -578,34 +580,32 @@ dbUpdatePMap(struct inode *ipbmap,
 		if (mp->lsn != 0) {
 			/* inherit older/smaller lsn */
 			logdiff(diffp, mp->lsn, log);
+			LOGSYNC_LOCK(log, flags);
 			if (difft < diffp) {
 				mp->lsn = lsn;
 
 				/* move bp after tblock in logsync list */
-				LOGSYNC_LOCK(log);
 				list_move(&mp->synclist, &tblk->synclist);
-				LOGSYNC_UNLOCK(log);
 			}
 
 			/* inherit younger/larger clsn */
-			LOGSYNC_LOCK(log);
 			logdiff(difft, tblk->clsn, log);
 			logdiff(diffp, mp->clsn, log);
 			if (difft > diffp)
 				mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log);
+			LOGSYNC_UNLOCK(log, flags);
 		} else {
 			mp->log = log;
 			mp->lsn = lsn;
 
 			/* insert bp after tblock in logsync list */
-			LOGSYNC_LOCK(log);
+			LOGSYNC_LOCK(log, flags);
 
 			log->count++;
 			list_add(&mp->synclist, &tblk->synclist);
 
 			mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log);
+			LOGSYNC_UNLOCK(log, flags);
 		}
 	}
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 6a0aa7e2cbef..7acff2ce3c80 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -502,7 +502,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 
 	}
 
-	ip->i_mapping->a_ops = &jfs_aops;
+	ip->i_mapping->a_ops = &jfs_metapage_aops;
 	mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
 
 	/* Allocations to metadata inodes should not affect quotas */
@@ -2791,6 +2791,7 @@ diUpdatePMap(struct inode *ipimap,
 	u32 mask;
 	struct jfs_log *log;
 	int lsn, difft, diffp;
+	unsigned long flags;
 
 	imap = JFS_IP(ipimap)->i_imap;
 	/* get the iag number containing the inode */
@@ -2807,6 +2808,7 @@ diUpdatePMap(struct inode *ipimap,
 	IREAD_UNLOCK(ipimap);
 	if (rc)
 		return (rc);
+	metapage_wait_for_io(mp);
 	iagp = (struct iag *) mp->data;
 	/* get the inode number and extent number of the inode within
 	 * the iag and the inode number within the extent.
@@ -2870,30 +2872,28 @@ diUpdatePMap(struct inode *ipimap,
 		/* inherit older/smaller lsn */
 		logdiff(difft, lsn, log);
 		logdiff(diffp, mp->lsn, log);
+		LOGSYNC_LOCK(log, flags);
 		if (difft < diffp) {
 			mp->lsn = lsn;
 			/* move mp after tblock in logsync list */
-			LOGSYNC_LOCK(log);
 			list_move(&mp->synclist, &tblk->synclist);
-			LOGSYNC_UNLOCK(log);
 		}
 		/* inherit younger/larger clsn */
-		LOGSYNC_LOCK(log);
 		assert(mp->clsn);
 		logdiff(difft, tblk->clsn, log);
 		logdiff(diffp, mp->clsn, log);
 		if (difft > diffp)
 			mp->clsn = tblk->clsn;
-		LOGSYNC_UNLOCK(log);
+		LOGSYNC_UNLOCK(log, flags);
 	} else {
 		mp->log = log;
 		mp->lsn = lsn;
 		/* insert mp after tblock in logsync list */
-		LOGSYNC_LOCK(log);
+		LOGSYNC_LOCK(log, flags);
 		log->count++;
 		list_add(&mp->synclist, &tblk->synclist);
 		mp->clsn = tblk->clsn;
-		LOGSYNC_UNLOCK(log);
+		LOGSYNC_UNLOCK(log, flags);
 	}
 	write_metapage(mp);
 	return (0);
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index ebd77c1bed66..c0fd7b3eadc6 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -165,6 +165,7 @@ struct jfs_sb_info {
         /* Formerly in ipbmap */
 	struct bmap	*bmap;		/* incore bmap descriptor	*/
 	struct nls_table *nls_tab;	/* current codepage		*/
+	struct inode *direct_inode;	/* metadata inode */
 	uint		state;		/* mount/recovery state	*/
 	unsigned long	flag;		/* mount time flags */
 	uint		p_state;	/* state prior to going no integrity */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index e0f867ddfd10..cfcdad3459dd 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -234,6 +234,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	int lsn;
 	int diffp, difft;
 	struct metapage *mp = NULL;
+	unsigned long flags;
 
 	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 		 log, tblk, lrd, tlck);
@@ -254,7 +255,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	 */
 	lsn = log->lsn;
 
-	LOGSYNC_LOCK(log);
+	LOGSYNC_LOCK(log, flags);
 
 	/*
 	 * initialize page lsn if first log write of the page
@@ -310,7 +311,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		}
 	}
 
-	LOGSYNC_UNLOCK(log);
+	LOGSYNC_UNLOCK(log, flags);
 
 	/*
 	 *      write the log record
@@ -334,7 +335,6 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return lsn;
 }
 
-
 /*
  * NAME:	lmWriteRecord()
  *
@@ -945,6 +945,15 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 	struct lrd lrd;
 	int lsn;
 	struct logsyncblk *lp;
+	struct jfs_sb_info *sbi;
+	unsigned long flags;
+
+	/* push dirty metapages out to disk */
+	list_for_each_entry(sbi, &log->sb_list, log_list) {
+		filemap_flush(sbi->ipbmap->i_mapping);
+		filemap_flush(sbi->ipimap->i_mapping);
+		filemap_flush(sbi->direct_inode->i_mapping);
+	}
 
 	/*
 	 *      forward syncpt
@@ -954,10 +963,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 	 */
 
 	if (log->sync == log->syncpt) {
-		LOGSYNC_LOCK(log);
-		/* ToDo: push dirty metapages out to disk */
-//              bmLogSync(log);
-
+		LOGSYNC_LOCK(log, flags);
 		if (list_empty(&log->synclist))
 			log->sync = log->lsn;
 		else {
@@ -965,7 +971,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 					struct logsyncblk, synclist);
 			log->sync = lp->lsn;
 		}
-		LOGSYNC_UNLOCK(log);
+		LOGSYNC_UNLOCK(log, flags);
 
 	}
 
@@ -974,27 +980,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 	 * reset syncpt = sync
 	 */
 	if (log->sync != log->syncpt) {
-		struct jfs_sb_info *sbi;
-
-		/*
-		 * We need to make sure all of the "written" metapages
-		 * actually make it to disk
-		 */
-		list_for_each_entry(sbi, &log->sb_list, log_list) {
-			if (sbi->flag & JFS_NOINTEGRITY)
-				continue;
-			filemap_fdatawrite(sbi->ipbmap->i_mapping);
-			filemap_fdatawrite(sbi->ipimap->i_mapping);
-			filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping);
-		}
-		list_for_each_entry(sbi, &log->sb_list, log_list) {
-			if (sbi->flag & JFS_NOINTEGRITY)
-				continue;
-			filemap_fdatawait(sbi->ipbmap->i_mapping);
-			filemap_fdatawait(sbi->ipimap->i_mapping);
-			filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping);
-		}
-
 		lrd.logtid = 0;
 		lrd.backchain = 0;
 		lrd.type = cpu_to_le16(LOG_SYNCPT);
@@ -1547,6 +1532,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 {
 	int i;
 	struct tblock *target = NULL;
+	struct jfs_sb_info *sbi;
 
 	/* jfs_write_inode may call us during read-only mount */
 	if (!log)
@@ -1608,12 +1594,18 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 	if (wait < 2)
 		return;
 
+	list_for_each_entry(sbi, &log->sb_list, log_list) {
+		filemap_fdatawrite(sbi->ipbmap->i_mapping);
+		filemap_fdatawrite(sbi->ipimap->i_mapping);
+		filemap_fdatawrite(sbi->direct_inode->i_mapping);
+	}
+
 	/*
 	 * If there was recent activity, we may need to wait
 	 * for the lazycommit thread to catch up
 	 */
 	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
-		for (i = 0; i < 800; i++) {	/* Too much? */
+		for (i = 0; i < 200; i++) {	/* Too much? */
 			msleep(250);
 			if (list_empty(&log->cqueue) &&
 			    list_empty(&log->synclist))
@@ -1621,7 +1613,24 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 		}
 	}
 	assert(list_empty(&log->cqueue));
-	assert(list_empty(&log->synclist));
+	if (!list_empty(&log->synclist)) {
+		struct logsyncblk *lp;
+
+		list_for_each_entry(lp, &log->synclist, synclist) {
+			if (lp->xflag & COMMIT_PAGE) {
+				struct metapage *mp = (struct metapage *)lp;
+				dump_mem("orphan metapage", lp,
+					 sizeof(struct metapage));
+				dump_mem("page", mp->page, sizeof(struct page));
+			}
+			else
+				dump_mem("orphan tblock", lp,
+					 sizeof(struct tblock));
+		}
+//		current->state = TASK_INTERRUPTIBLE;
+//		schedule();
+	}
+	//assert(list_empty(&log->synclist));
 	clear_bit(log_FLUSH, &log->flag);
 }
 
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index f67146684b7f..f4c121098d4f 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -490,8 +490,9 @@ struct logsyncblk {
  */
 
 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
-#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock)
-#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock)
+#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
+#define LOGSYNC_UNLOCK(log, flags) \
+	spin_unlock_irqrestore(&(log)->synclock, flags)
 
 /* compute the difference in bytes of lsn from sync point */
 #define logdiff(diff, lsn, log)\
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 4c0a3ac75c08..41bf078dce05 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -1,5 +1,5 @@
 /*
- *   Copyright (C) International Business Machines Corp., 2000-2003
+ *   Copyright (C) International Business Machines Corp., 2000-2005
  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
@@ -18,10 +18,11 @@
  */
 
 #include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
 #include <linux/init.h>
 #include <linux/buffer_head.h>
 #include <linux/mempool.h>
-#include <linux/delay.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_filsys.h"
@@ -29,8 +30,6 @@
 #include "jfs_txnmgr.h"
 #include "jfs_debug.h"
 
-static DEFINE_SPINLOCK(meta_lock);
-
 #ifdef CONFIG_JFS_STATISTICS
 static struct {
 	uint	pagealloc;	/* # of page allocations */
@@ -39,22 +38,8 @@ static struct {
 } mpStat;
 #endif
 
-
-#define HASH_BITS 10		/* This makes hash_table 1 4K page */
-#define HASH_SIZE (1 << HASH_BITS)
-static struct metapage **hash_table = NULL;
-static unsigned long hash_order;
-
-
-static inline int metapage_locked(struct metapage *mp)
-{
-	return test_bit(META_locked, &mp->flag);
-}
-
-static inline int trylock_metapage(struct metapage *mp)
-{
-	return test_and_set_bit(META_locked, &mp->flag);
-}
+#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
+#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag)
 
 static inline void unlock_metapage(struct metapage *mp)
 {
@@ -62,26 +47,26 @@ static inline void unlock_metapage(struct metapage *mp)
 	wake_up(&mp->wait);
 }
 
-static void __lock_metapage(struct metapage *mp)
+static inline void __lock_metapage(struct metapage *mp)
 {
 	DECLARE_WAITQUEUE(wait, current);
-
 	INCREMENT(mpStat.lockwait);
-
 	add_wait_queue_exclusive(&mp->wait, &wait);
 	do {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (metapage_locked(mp)) {
-			spin_unlock(&meta_lock);
+			unlock_page(mp->page);
 			schedule();
-			spin_lock(&meta_lock);
+			lock_page(mp->page);
 		}
 	} while (trylock_metapage(mp));
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&mp->wait, &wait);
 }
 
-/* needs meta_lock */
+/*
+ * Must have mp->page locked
+ */
 static inline void lock_metapage(struct metapage *mp)
 {
 	if (trylock_metapage(mp))
@@ -92,6 +77,110 @@ static inline void lock_metapage(struct metapage *mp)
 static kmem_cache_t *metapage_cache;
 static mempool_t *metapage_mempool;
 
+#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
+
+#if MPS_PER_PAGE > 1
+
+struct meta_anchor {
+	int mp_count;
+	atomic_t io_count;
+	struct metapage *mp[MPS_PER_PAGE];
+};
+#define mp_anchor(page) ((struct meta_anchor *)page->private)
+
+static inline struct metapage *page_to_mp(struct page *page, uint offset)
+{
+	if (!PagePrivate(page))
+		return NULL;
+	return mp_anchor(page)->mp[offset >> L2PSIZE];
+}
+
+static inline int insert_metapage(struct page *page, struct metapage *mp)
+{
+	struct meta_anchor *a;
+	int index;
+	int l2mp_blocks;	/* log2 blocks per metapage */
+
+	if (PagePrivate(page))
+		a = mp_anchor(page);
+	else {
+		a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS);
+		if (!a)
+			return -ENOMEM;
+		memset(a, 0, sizeof(struct meta_anchor));
+		page->private = (unsigned long)a;
+		SetPagePrivate(page);
+		kmap(page);
+	}
+
+	if (mp) {
+		l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
+		index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
+		a->mp_count++;
+		a->mp[index] = mp;
+	}
+
+	return 0;
+}
+
+static inline void remove_metapage(struct page *page, struct metapage *mp)
+{
+	struct meta_anchor *a = mp_anchor(page);
+	int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
+	int index;
+
+	index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
+
+	BUG_ON(a->mp[index] != mp);
+
+	a->mp[index] = NULL;
+	if (--a->mp_count == 0) {
+		kfree(a);
+		page->private = 0;
+		ClearPagePrivate(page);
+		kunmap(page);
+	}
+}
+
+static inline void inc_io(struct page *page)
+{
+	atomic_inc(&mp_anchor(page)->io_count);
+}
+
+static inline void dec_io(struct page *page, void (*handler) (struct page *))
+{
+	if (atomic_dec_and_test(&mp_anchor(page)->io_count))
+		handler(page);
+}
+
+#else
+static inline struct metapage *page_to_mp(struct page *page, uint offset)
+{
+	return PagePrivate(page) ? (struct metapage *)page->private : NULL;
+}
+
+static inline int insert_metapage(struct page *page, struct metapage *mp)
+{
+	if (mp) {
+		page->private = (unsigned long)mp;
+		SetPagePrivate(page);
+		kmap(page);
+	}
+	return 0;
+}
+
+static inline void remove_metapage(struct page *page, struct metapage *mp)
+{
+	page->private = 0;
+	ClearPagePrivate(page);
+	kunmap(page);
+}
+
+#define inc_io(page) do {} while(0)
+#define dec_io(page, handler) handler(page)
+
+#endif
+
 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
 	struct metapage *mp = (struct metapage *)foo;
@@ -139,16 +228,6 @@ int __init metapage_init(void)
 		kmem_cache_destroy(metapage_cache);
 		return -ENOMEM;
 	}
-	/*
-	 * Now the hash list
-	 */
-	for (hash_order = 0;
-	     ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
-	     hash_order++);
-	hash_table =
-	    (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
-	assert(hash_table);
-	memset(hash_table, 0, PAGE_SIZE << hash_order);
 
 	return 0;
 }
@@ -159,73 +238,388 @@ void metapage_exit(void)
 	kmem_cache_destroy(metapage_cache);
 }
 
+static inline void drop_metapage(struct page *page, struct metapage *mp)
+{
+	if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
+	    test_bit(META_io, &mp->flag))
+		return;
+	remove_metapage(page, mp);
+	INCREMENT(mpStat.pagefree);
+	free_metapage(mp);
+}
+
 /*
- * Basically same hash as in pagemap.h, but using our hash table
+ * Metapage address space operations
  */
-static struct metapage **meta_hash(struct address_space *mapping,
-				   unsigned long index)
+
+static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
+				    unsigned int *len)
 {
-#define i (((unsigned long)mapping)/ \
-	   (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
-#define s(x) ((x) + ((x) >> HASH_BITS))
-	return hash_table + (s(i + index) & (HASH_SIZE - 1));
-#undef i
-#undef s
+	int rc = 0;
+	int xflag;
+	s64 xaddr;
+	sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >>
+			       inode->i_blkbits;
+
+	if (lblock >= file_blocks)
+		return 0;
+	if (lblock + *len > file_blocks)
+		*len = file_blocks - lblock;
+
+	if (inode->i_ino) {
+		rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
+		if ((rc == 0) && *len)
+			lblock = (sector_t)xaddr;
+		else
+			lblock = 0;
+	} /* else no mapping */
+
+	return lblock;
 }
 
-static struct metapage *search_hash(struct metapage ** hash_ptr,
-				    struct address_space *mapping,
-			       unsigned long index)
+static void last_read_complete(struct page *page)
 {
-	struct metapage *ptr;
+	if (!PageError(page))
+		SetPageUptodate(page);
+	unlock_page(page);
+}
+
+static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
+				int err)
+{
+	struct page *page = bio->bi_private;
+
+	if (bio->bi_size)
+		return 1;
 
-	for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
-		if ((ptr->mapping == mapping) && (ptr->index == index))
-			return ptr;
+	if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+		printk(KERN_ERR "metapage_read_end_io: I/O error\n");
+		SetPageError(page);
 	}
 
-	return NULL;
+	dec_io(page, last_read_complete);
+	bio_put(bio);
+
+	return 0;
 }
 
-static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
+static void remove_from_logsync(struct metapage *mp)
 {
-	if (*hash_ptr)
-		(*hash_ptr)->hash_prev = mp;
+	struct jfs_log *log = mp->log;
+	unsigned long flags;
+/*
+ * This can race.  Recheck that log hasn't been set to null, and after
+ * acquiring logsync lock, recheck lsn
+ */
+	if (!log)
+		return;
+
+	LOGSYNC_LOCK(log, flags);
+	if (mp->lsn) {
+		mp->log = NULL;
+		mp->lsn = 0;
+		mp->clsn = 0;
+		log->count--;
+		list_del(&mp->synclist);
+	}
+	LOGSYNC_UNLOCK(log, flags);
+}
 
-	mp->hash_prev = NULL;
-	mp->hash_next = *hash_ptr;
-	*hash_ptr = mp;
+static void last_write_complete(struct page *page)
+{
+	struct metapage *mp;
+	unsigned int offset;
+
+	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+		mp = page_to_mp(page, offset);
+		if (mp && test_bit(META_io, &mp->flag)) {
+			if (mp->lsn)
+				remove_from_logsync(mp);
+			clear_bit(META_io, &mp->flag);
+		}
+		/*
+		 * I'd like to call drop_metapage here, but I don't think it's
+		 * safe unless I have the page locked
+		 */
+	}
+	end_page_writeback(page);
 }
 
-static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
+static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done,
+				 int err)
 {
-	if (mp->hash_prev)
-		mp->hash_prev->hash_next = mp->hash_next;
-	else {
-		assert(*hash_ptr == mp);
-		*hash_ptr = mp->hash_next;
+	struct page *page = bio->bi_private;
+
+	BUG_ON(!PagePrivate(page));
+
+	if (bio->bi_size)
+		return 1;
+
+	if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+		printk(KERN_ERR "metapage_write_end_io: I/O error\n");
+		SetPageError(page);
+	}
+	dec_io(page, last_write_complete);
+	bio_put(bio);
+	return 0;
+}
+
+static int metapage_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct bio *bio = NULL;
+	unsigned int block_offset;	/* block offset of mp within page */
+	struct inode *inode = page->mapping->host;
+	unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
+	unsigned int len;
+	unsigned int xlen;
+	struct metapage *mp;
+	int redirty = 0;
+	sector_t lblock;
+	sector_t pblock;
+	sector_t next_block = 0;
+	sector_t page_start;
+	unsigned long bio_bytes = 0;
+	unsigned long bio_offset = 0;
+	unsigned int offset;
+
+	page_start = (sector_t)page->index <<
+		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	BUG_ON(!PageLocked(page));
+	BUG_ON(PageWriteback(page));
+
+	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+		mp = page_to_mp(page, offset);
+
+		if (!mp || !test_bit(META_dirty, &mp->flag))
+			continue;
+
+		if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
+			redirty = 1;
+			continue;
+		}
+
+		clear_bit(META_dirty, &mp->flag);
+		block_offset = offset >> inode->i_blkbits;
+		lblock = page_start + block_offset;
+		if (bio) {
+			if (xlen && lblock == next_block) {
+				/* Contiguous, in memory & on disk */
+				len = min(xlen, blocks_per_mp);
+				xlen -= len;
+				bio_bytes += len << inode->i_blkbits;
+				set_bit(META_io, &mp->flag);
+				continue;
+			}
+			/* Not contiguous */
+			if (bio_add_page(bio, page, bio_bytes, bio_offset) <
+			    bio_bytes)
+				goto add_failed;
+			/*
+			 * Increment counter before submitting i/o to keep
+			 * count from hitting zero before we're through
+			 */
+			inc_io(page);
+			if (!bio->bi_size)
+				goto dump_bio;
+			submit_bio(WRITE, bio);
+			bio = NULL;
+		} else {
+			set_page_writeback(page);
+			inc_io(page);
+		}
+		xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
+		pblock = metapage_get_blocks(inode, lblock, &xlen);
+		if (!pblock) {
+			/* Need better error handling */
+			printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
+			dec_io(page, last_write_complete);
+			continue;
+		}
+		set_bit(META_io, &mp->flag);
+		len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage);
+
+		bio = bio_alloc(GFP_NOFS, 1);
+		bio->bi_bdev = inode->i_sb->s_bdev;
+		bio->bi_sector = pblock << (inode->i_blkbits - 9);
+		bio->bi_end_io = metapage_write_end_io;
+		bio->bi_private = page;
+
+		/* Don't call bio_add_page yet, we may add to this vec */
+		bio_offset = offset;
+		bio_bytes = len << inode->i_blkbits;
+
+		xlen -= len;
+		next_block = lblock + len;
+	}
+	if (bio) {
+		if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
+				goto add_failed;
+		if (!bio->bi_size)
+			goto dump_bio;
+		
+		submit_bio(WRITE, bio);
+	}
+	if (redirty)
+		redirty_page_for_writepage(wbc, page);
+
+	unlock_page(page);
+
+	return 0;
+add_failed:
+	/* We should never reach here, since we're only adding one vec */
+	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
+	goto skip;
+dump_bio:
+	dump_mem("bio", bio, sizeof(*bio));
+skip:
+	bio_put(bio);
+	unlock_page(page);
+	dec_io(page, last_write_complete);
+
+	return -EIO;
+}
+
+static int metapage_readpage(struct file *fp, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct bio *bio = NULL;
+	unsigned int block_offset;
+	unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
+	sector_t page_start;	/* address of page in fs blocks */
+	sector_t pblock;
+	unsigned int xlen;
+	unsigned int len;
+	unsigned int offset;
+
+	BUG_ON(!PageLocked(page));
+	page_start = (sector_t)page->index <<
+		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+	block_offset = 0;
+	while (block_offset < blocks_per_page) {
+		xlen = blocks_per_page - block_offset;
+		pblock = metapage_get_blocks(inode, page_start + block_offset,
+					     &xlen);
+		if (pblock) {
+			if (!PagePrivate(page))
+				insert_metapage(page, NULL);
+			inc_io(page);
+			if (bio)
+				submit_bio(READ, bio);
+
+			bio = bio_alloc(GFP_NOFS, 1);
+			bio->bi_bdev = inode->i_sb->s_bdev;
+			bio->bi_sector = pblock << (inode->i_blkbits - 9);
+			bio->bi_end_io = metapage_read_end_io;
+			bio->bi_private = page;
+			len = xlen << inode->i_blkbits;
+			offset = block_offset << inode->i_blkbits;
+			if (bio_add_page(bio, page, len, offset) < len)
+				goto add_failed;
+			block_offset += xlen;
+		} else
+			block_offset++;
 	}
+	if (bio)
+		submit_bio(READ, bio);
+	else
+		unlock_page(page);
+
+	return 0;
 
-	if (mp->hash_next)
-		mp->hash_next->hash_prev = mp->hash_prev;
+add_failed:
+	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
+	bio_put(bio);
+	dec_io(page, last_read_complete);
+	return -EIO;
 }
 
+static int metapage_releasepage(struct page *page, int gfp_mask)
+{
+	struct metapage *mp;
+	int busy = 0;
+	unsigned int offset;
+
+	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+		mp = page_to_mp(page, offset);
+
+		if (!mp)
+			continue;
+
+		jfs_info("metapage_releasepage: mp = 0x%p", mp);
+		if (mp->count || mp->nohomeok) {
+			jfs_info("count = %ld, nohomeok = %d", mp->count,
+				 mp->nohomeok);
+			busy = 1;
+			continue;
+		}
+		wait_on_page_writeback(page);
+		//WARN_ON(test_bit(META_dirty, &mp->flag));
+		if (test_bit(META_dirty, &mp->flag)) {
+			dump_mem("dirty mp in metapage_releasepage", mp,
+				 sizeof(struct metapage));
+			dump_mem("page", page, sizeof(struct page));
+			dump_stack();
+		}
+		WARN_ON(mp->lsn);
+		if (mp->lsn)
+			remove_from_logsync(mp);
+		remove_metapage(page, mp);
+		INCREMENT(mpStat.pagefree);
+		free_metapage(mp);
+	}
+	if (busy)
+		return -1;
+
+	return 0;
+}
+
+static int metapage_invalidatepage(struct page *page, unsigned long offset)
+{
+	BUG_ON(offset);
+
+	if (PageWriteback(page))
+		return 0;
+
+	return metapage_releasepage(page, 0);
+}
+
+struct address_space_operations jfs_metapage_aops = {
+	.readpage	= metapage_readpage,
+	.writepage	= metapage_writepage,
+	.sync_page	= block_sync_page,
+	.releasepage	= metapage_releasepage,
+	.invalidatepage	= metapage_invalidatepage,
+	.set_page_dirty	= __set_page_dirty_nobuffers,
+};
+
 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 				unsigned int size, int absolute,
 				unsigned long new)
 {
-	struct metapage **hash_ptr;
 	int l2BlocksPerPage;
 	int l2bsize;
 	struct address_space *mapping;
-	struct metapage *mp;
+	struct metapage *mp = NULL;
+	struct page *page;
 	unsigned long page_index;
 	unsigned long page_offset;
 
-	jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
-
+	jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
+		 inode->i_ino, lblock, absolute);
+
+	l2bsize = inode->i_blkbits;
+	l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
+	page_index = lblock >> l2BlocksPerPage;
+	page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
+	if ((page_offset + size) > PAGE_CACHE_SIZE) {
+		jfs_err("MetaData crosses page boundary!!");
+		jfs_err("lblock = %lx, size  = %d", lblock, size);
+		dump_stack();
+		return NULL;
+	}
 	if (absolute)
-		mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
+		mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
 	else {
 		/*
 		 * If an nfs client tries to read an inode that is larger
@@ -237,312 +631,212 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 		mapping = inode->i_mapping;
 	}
 
-	hash_ptr = meta_hash(mapping, lblock);
-again:
-	spin_lock(&meta_lock);
-	mp = search_hash(hash_ptr, mapping, lblock);
+	if (new && (PSIZE == PAGE_CACHE_SIZE)) {
+		page = grab_cache_page(mapping, page_index);
+		if (!page) {
+			jfs_err("grab_cache_page failed!");
+			return NULL;
+		}
+		SetPageUptodate(page);
+	} else {
+		page = read_cache_page(mapping, page_index,
+			    (filler_t *)mapping->a_ops->readpage, NULL);
+		if (IS_ERR(page)) {
+			jfs_err("read_cache_page failed!");
+			return NULL;
+		}
+		lock_page(page);
+	}
+
+	mp = page_to_mp(page, page_offset);
 	if (mp) {
-	      page_found:
-		if (test_bit(META_stale, &mp->flag)) {
-			spin_unlock(&meta_lock);
-			msleep(1);
-			goto again;
+		if (mp->logical_size != size) {
+			jfs_error(inode->i_sb,
+				  "__get_metapage: mp->logical_size != size");
+			jfs_err("logical_size = %d, size = %d",
+				mp->logical_size, size);
+			dump_stack();
+			goto unlock; 
 		}
 		mp->count++;
 		lock_metapage(mp);
-		spin_unlock(&meta_lock);
 		if (test_bit(META_discard, &mp->flag)) {
 			if (!new) {
 				jfs_error(inode->i_sb,
 					  "__get_metapage: using a "
 					  "discarded metapage");
-				release_metapage(mp);
-				return NULL;
+				discard_metapage(mp);
+				goto unlock; 
 			}
 			clear_bit(META_discard, &mp->flag);
 		}
-		jfs_info("__get_metapage: found 0x%p, in hash", mp);
-		if (mp->logical_size != size) {
-			jfs_error(inode->i_sb,
-				  "__get_metapage: mp->logical_size != size");
-			release_metapage(mp);
-			return NULL;
-		}
 	} else {
-		l2bsize = inode->i_blkbits;
-		l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
-		page_index = lblock >> l2BlocksPerPage;
-		page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
-		    l2bsize;
-		if ((page_offset + size) > PAGE_CACHE_SIZE) {
-			spin_unlock(&meta_lock);
-			jfs_err("MetaData crosses page boundary!!");
-			return NULL;
-		}
-		
-		/*
-		 * Locks held on aggregate inode pages are usually
-		 * not held long, and they are taken in critical code
-		 * paths (committing dirty inodes, txCommit thread) 
-		 * 
-		 * Attempt to get metapage without blocking, tapping into
-		 * reserves if necessary.
-		 */
-		mp = NULL;
-		if (JFS_IP(inode)->fileset == AGGREGATE_I) {
-			mp = alloc_metapage(GFP_ATOMIC);
-			if (!mp) {
-				/*
-				 * mempool is supposed to protect us from
-				 * failing here.  We will try a blocking
-				 * call, but a deadlock is possible here
-				 */
-				printk(KERN_WARNING
-				       "__get_metapage: atomic call to mempool_alloc failed.\n");
-				printk(KERN_WAR