summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ocfs2/alloc.c57
-rw-r--r--fs/ocfs2/alloc.h3
-rw-r--r--fs/ocfs2/dir.c1908
-rw-r--r--fs/ocfs2/dir.h21
-rw-r--r--fs/ocfs2/inode.c12
-rw-r--r--fs/ocfs2/journal.c30
-rw-r--r--fs/ocfs2/journal.h51
-rw-r--r--fs/ocfs2/namei.c45
-rw-r--r--fs/ocfs2/ocfs2.h16
-rw-r--r--fs/ocfs2/ocfs2_fs.h102
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/xattr.c8
-rw-r--r--fs/ocfs2/xattr.h2
13 files changed, 2157 insertions, 104 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 19e3a96aa02c..678a067d9251 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -294,6 +294,55 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
};
+static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 blkno)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ return le64_to_cpu(dx_root->dr_last_eb_blk);
+}
+
+static void ocfs2_dx_root_update_clusters(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ le32_add_cpu(&dx_root->dr_clusters, clusters);
+}
+
+static int ocfs2_dx_root_sanity_check(struct inode *inode,
+ struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
+
+ return 0;
+}
+
+static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ et->et_root_el = &dx_root->dr_list;
+}
+
+static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
+ .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk,
+ .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk,
+ .eo_update_clusters = ocfs2_dx_root_update_clusters,
+ .eo_sanity_check = ocfs2_dx_root_sanity_check,
+ .eo_fill_root_el = ocfs2_dx_root_fill_root_el,
+};
+
static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
@@ -339,6 +388,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
&ocfs2_xattr_value_et_ops);
}
+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
+ struct inode *inode,
+ struct buffer_head *bh)
+{
+ __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr,
+ NULL, &ocfs2_dx_root_et_ops);
+}
+
static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 new_last_eb_blk)
{
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index cceff5c37f47..353254ba29e1 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -75,6 +75,9 @@ struct ocfs2_xattr_value_buf;
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct ocfs2_xattr_value_buf *vb);
+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
+ struct inode *inode,
+ struct buffer_head *bh);
/*
* Read an extent block into *bh. If *bh is NULL, a bh will be
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 76ffb5c10b3e..0b8c88b47a4e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -41,6 +41,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/quotaops.h>
+#include <linux/sort.h>
#define MLOG_MASK_PREFIX ML_NAMEI
#include <cluster/masklog.h>
@@ -58,6 +59,7 @@
#include "namei.h"
#include "suballoc.h"
#include "super.h"
+#include "sysfile.h"
#include "uptodate.h"
#include "buffer_head_io.h"
@@ -71,11 +73,6 @@ static unsigned char ocfs2_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
-static int ocfs2_extend_dir(struct ocfs2_super *osb,
- struct inode *dir,
- struct buffer_head *parent_fe_bh,
- unsigned int blocks_wanted,
- struct buffer_head **new_de_bh);
static int ocfs2_do_extend_dir(struct super_block *sb,
handle_t *handle,
struct inode *dir,
@@ -155,6 +152,105 @@ static void ocfs2_init_dir_trailer(struct inode *inode,
void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
{
brelse(res->dl_leaf_bh);
+ brelse(res->dl_dx_leaf_bh);
+}
+
+static int ocfs2_dir_indexed(struct inode *inode)
+{
+ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL)
+ return 1;
+ return 0;
+}
+
+/*
+ * Hashing code adapted from ext3
+ */
+#define DELTA 0x9E3779B9
+
+static void TEA_transform(__u32 buf[4], __u32 const in[])
+{
+ __u32 sum = 0;
+ __u32 b0 = buf[0], b1 = buf[1];
+ __u32 a = in[0], b = in[1], c = in[2], d = in[3];
+ int n = 16;
+
+ do {
+ sum += DELTA;
+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
+ } while (--n);
+
+ buf[0] += b0;
+ buf[1] += b1;
+}
+
+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+{
+ __u32 pad, val;
+ int i;
+
+ pad = (__u32)len | ((__u32)len << 8);
+ pad |= pad << 16;
+
+ val = pad;
+ if (len > num*4)
+ len = num * 4;
+ for (i = 0; i < len; i++) {
+ if ((i % 4) == 0)
+ val = pad;
+ val = msg[i] + (val << 8);
+ if ((i % 4) == 3) {
+ *buf++ = val;
+ val = pad;
+ num--;
+ }
+ }
+ if (--num >= 0)
+ *buf++ = val;
+ while (--num >= 0)
+ *buf++ = pad;
+}
+
+static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
+ struct ocfs2_dx_hinfo *hinfo)
+{
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+ const char *p;
+ __u32 in[8], buf[4];
+
+ /*
+ * XXX: Is this really necessary, if the index is never looked
+ * at by readdir? Is a hash value of '0' a bad idea?
+ */
+ if ((len == 1 && !strncmp(".", name, 1)) ||
+ (len == 2 && !strncmp("..", name, 2))) {
+ buf[0] = buf[1] = 0;
+ goto out;
+ }
+
+#ifdef OCFS2_DEBUG_DX_DIRS
+ /*
+ * This makes it very easy to debug indexing problems. We
+ * should never allow this to be selected without hand editing
+ * this file though.
+ */
+ buf[0] = buf[1] = len;
+ goto out;
+#endif
+
+ memcpy(buf, osb->osb_dx_seed, sizeof(buf));
+
+ p = name;
+ while (len > 0) {
+ str2hashbuf(p, len, in, 4);
+ TEA_transform(buf, in);
+ len -= 16;
+ p += 16;
+ }
+
+out:
+ hinfo->major_hash = buf[0];
+ hinfo->minor_hash = buf[1];
}
/*
@@ -317,6 +413,52 @@ static int ocfs2_validate_dir_block(struct super_block *sb,
}
/*
+ * Validate a directory trailer.
+ *
+ * We check the trailer here rather than in ocfs2_validate_dir_block()
+ * because that function doesn't have the inode to test.
+ */
+static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh)
+{
+ int rc = 0;
+ struct ocfs2_dir_block_trailer *trailer;
+
+ trailer = ocfs2_trailer_from_bh(bh, dir->i_sb);
+ if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
+ rc = -EINVAL;
+ ocfs2_error(dir->i_sb,
+ "Invalid dirblock #%llu: "
+ "signature = %.*s\n",
+ (unsigned long long)bh->b_blocknr, 7,
+ trailer->db_signature);
+ goto out;
+ }
+ if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) {
+ rc = -EINVAL;
+ ocfs2_error(dir->i_sb,
+ "Directory block #%llu has an invalid "
+ "db_blkno of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(trailer->db_blkno));
+ goto out;
+ }
+ if (le64_to_cpu(trailer->db_parent_dinode) !=
+ OCFS2_I(dir)->ip_blkno) {
+ rc = -EINVAL;
+ ocfs2_error(dir->i_sb,
+ "Directory block #%llu on dinode "
+ "#%llu has an invalid parent_dinode "
+ "of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)OCFS2_I(dir)->ip_blkno,
+ (unsigned long long)le64_to_cpu(trailer->db_blkno));
+ goto out;
+ }
+out:
+ return rc;
+}
+
+/*
* This function forces all errors to -EIO for consistency with its
* predecessor, ocfs2_bread(). We haven't audited what returning the
* real error codes would do to callers. We log the real codes with
@@ -327,7 +469,6 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
{
int rc = 0;
struct buffer_head *tmp = *bh;
- struct ocfs2_dir_block_trailer *trailer;
rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
ocfs2_validate_dir_block);
@@ -336,42 +477,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
goto out;
}
- /*
- * We check the trailer here rather than in
- * ocfs2_validate_dir_block() because that function doesn't have
- * the inode to test.
- */
if (!(flags & OCFS2_BH_READAHEAD) &&
ocfs2_dir_has_trailer(inode)) {
- trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb);
- if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
- rc = -EINVAL;
- ocfs2_error(inode->i_sb,
- "Invalid dirblock #%llu: "
- "signature = %.*s\n",
- (unsigned long long)tmp->b_blocknr, 7,
- trailer->db_signature);
- goto out;
- }
- if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) {
- rc = -EINVAL;
- ocfs2_error(inode->i_sb,
- "Directory block #%llu has an invalid "
- "db_blkno of %llu",
- (unsigned long long)tmp->b_blocknr,
- (unsigned long long)le64_to_cpu(trailer->db_blkno));
- goto out;
- }
- if (le64_to_cpu(trailer->db_parent_dinode) !=
- OCFS2_I(inode)->ip_blkno) {
- rc = -EINVAL;
- ocfs2_error(inode->i_sb,
- "Directory block #%llu on dinode "
- "#%llu has an invalid parent_dinode "
- "of %llu",
- (unsigned long long)tmp->b_blocknr,
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- (unsigned long long)le64_to_cpu(trailer->db_blkno));
+ rc = ocfs2_check_dir_trailer(inode, tmp);
+ if (rc) {
+ if (!*bh)
+ brelse(tmp);
+ mlog_errno(rc);
goto out;
}
}
@@ -384,6 +496,141 @@ out:
return rc ? -EIO : 0;
}
+/*
+ * Read the block at 'phys' which belongs to this directory
+ * inode. This function does no virtual->physical block translation -
+ * what's passed in is assumed to be a valid directory block.
+ */
+static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
+ struct buffer_head **bh)
+{
+ int ret;
+ struct buffer_head *tmp = *bh;
+
+ ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (ocfs2_supports_dir_trailer(dir)) {
+ ret = ocfs2_check_dir_trailer(dir, tmp);
+ if (ret) {
+ if (!*bh)
+ brelse(tmp);
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ if (!ret && !*bh)
+ *bh = tmp;
+out:
+ return ret;
+}
+
+static int ocfs2_validate_dx_root(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int ret;
+ struct ocfs2_dx_root_block *dx_root;
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ dx_root = (struct ocfs2_dx_root_block *) bh->b_data;
+
+ ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check);
+ if (ret) {
+ mlog(ML_ERROR,
+ "Checksum failed for dir index root block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+ return ret;
+ }
+
+ if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) {
+ ocfs2_error(sb,
+ "Dir Index Root # %llu has bad signature %.*s",
+ (unsigned long long)le64_to_cpu(dx_root->dr_blkno),
+ 7, dx_root->dr_signature);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
+ struct buffer_head **dx_root_bh)
+{
+ int ret;
+ u64 blkno = le64_to_cpu(di->i_dx_root);
+ struct buffer_head *tmp = *dx_root_bh;
+
+ ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!ret && !*dx_root_bh)
+ *dx_root_bh = tmp;
+
+ return ret;
+}
+
+static int ocfs2_validate_dx_leaf(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int ret;
+ struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data;
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check);
+ if (ret) {
+ mlog(ML_ERROR,
+ "Checksum failed for dir index leaf block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+ return ret;
+ }
+
+ if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) {
+ ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s",
+ 7, dx_leaf->dl_signature);
+ return -EROFS;
+ }
+
+ return 0;
+}
+
+static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
+ struct buffer_head **dx_leaf_bh)
+{
+ int ret;
+ struct buffer_head *tmp = *dx_leaf_bh;
+
+ ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!ret && !*dx_leaf_bh)
+ *dx_leaf_bh = tmp;
+
+ return ret;
+}
+
+/*
+ * Read a series of dx_leaf blocks. This expects all buffer_head
+ * pointers to be NULL on function entry.
+ */
+static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
+ struct buffer_head **dx_leaf_bhs)
+{
+ int ret;
+
+ ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0,
+ ocfs2_validate_dx_leaf);
+ if (ret)
+ mlog_errno(ret);
+
+ return ret;
+}
+
static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
@@ -485,6 +732,270 @@ cleanup_and_exit:
return ret;
}
+static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
+ struct ocfs2_extent_list *el,
+ u32 major_hash,
+ u32 *ret_cpos,
+ u64 *ret_phys_blkno,
+ unsigned int *ret_clen)
+{
+ int ret = 0, i, found;
+ struct buffer_head *eb_bh = NULL;
+ struct ocfs2_extent_block *eb;
+ struct ocfs2_extent_rec *rec = NULL;
+
+ if (el->l_tree_depth) {
+ ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+ el = &eb->h_list;
+
+ if (el->l_tree_depth) {
+ ocfs2_error(inode->i_sb,
+ "Inode %lu has non zero tree depth in "
+ "btree tree block %llu\n", inode->i_ino,
+ (unsigned long long)eb_bh->b_blocknr);
+ ret = -EROFS;
+ goto out;
+ }
+ }
+
+ found = 0;
+ for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
+ rec = &el->l_recs[i];
+
+ if (le32_to_cpu(rec->e_cpos) <= major_hash) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
+ "record (%u, %u, 0) in btree", inode->i_ino,
+ le32_to_cpu(rec->e_cpos),
+ ocfs2_rec_clusters(el, rec));
+ ret = -EROFS;
+ goto out;
+ }
+
+ if (ret_phys_blkno)
+ *ret_phys_blkno = le64_to_cpu(rec->e_blkno);
+ if (ret_cpos)
+ *ret_cpos = le32_to_cpu(rec->e_cpos);
+ if (ret_clen)
+ *ret_clen = le16_to_cpu(rec->e_leaf_clusters);
+
+out:
+ brelse(eb_bh);
+ return ret;
+}
+
+/*
+ * Returns the block index, from the start of the cluster which this
+ * hash belongs too.
+ */
+static unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
+ struct ocfs2_dx_hinfo *hinfo)
+{
+ u32 minor_hash = hinfo->minor_hash;
+ return minor_hash & osb->osb_dx_mask;
+}
+
+static int ocfs2_dx_dir_lookup(struct inode *inode,
+ struct ocfs2_extent_list *el,
+ struct ocfs2_dx_hinfo *hinfo,
+ u32 *ret_cpos,
+ u64 *ret_phys_blkno)
+{
+ int ret = 0;
+ unsigned int cend, uninitialized_var(clen);
+ u32 uninitialized_var(cpos);
+ u64 uninitialized_var(blkno);
+ u32 name_hash = hinfo->major_hash;
+
+ ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno,
+ &clen);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ cend = cpos + clen;
+ if (name_hash >= cend) {
+ /* We want the last cluster */
+ blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1);
+ cpos += clen - 1;
+ } else {
+ blkno += ocfs2_clusters_to_blocks(inode->i_sb,
+ name_hash - cpos);
+ cpos = name_hash;
+ }
+
+ /*
+ * We now have the cluster which should hold our entry. To
+ * find the exact block from the start of the cluster to
+ * search, we take the lower bits of the hash.
+ */
+ blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo);
+
+ if (ret_phys_blkno)
+ *ret_phys_blkno = blkno;
+ if (ret_cpos)
+ *ret_cpos = cpos;
+
+out:
+
+ return ret;
+}
+
+static int ocfs2_dx_dir_search(const char *name, int namelen,
+ struct inode *dir,
+ struct ocfs2_extent_list *dr_el,
+ struct ocfs2_dir_lookup_result *res)
+{
+ int ret, i, found;
+ u64 uninitialized_var(phys);
+ struct buffer_head *dx_leaf_bh = NULL;
+ struct ocfs2_dx_leaf *dx_leaf;
+ struct ocfs2_dx_entry *dx_entry = NULL;
+ struct buffer_head *dir_ent_bh = NULL;
+ struct ocfs2_dir_entry *dir_ent = NULL;
+ struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
+
+ ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo);
+
+ ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ mlog(0, "Dir %llu: name: \"%.*s\", lookup of hash: %u.0x%x "
+ "returns: %llu\n",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno,
+ namelen, name, hinfo->major_hash, hinfo->minor_hash,
+ (unsigned long long)phys);
+
+ ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data;
+
+ mlog(0, "leaf info: num_used: %d, count: %d\n",
+ le16_to_cpu(dx_leaf->dl_list.de_num_used),
+ le16_to_cpu(dx_leaf->dl_list.de_count));
+
+ /*
+ * Empty leaf is legal, so no need to check for that.
+ */
+ found = 0;
+ for (i = 0; i < le16_to_cpu(dx_leaf->dl_list.de_num_used); i++) {
+ dx_entry = &dx_leaf->dl_list.de_entries[i];
+
+ if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
+ || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
+ continue;
+
+ /*
+ * Search unindexed leaf block now. We're not
+ * guaranteed to find anything.
+ */
+ ret = ocfs2_read_dir_block_direct(dir,
+ le64_to_cpu(dx_entry->dx_dirent_blk),
+ &dir_ent_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * XXX: We should check the unindexed block here,
+ * before using it.
+ */
+
+ found = ocfs2_search_dirblock(dir_ent_bh, dir, name, namelen,
+ 0, dir_ent_bh->b_data,
+ dir->i_sb->s_blocksize, &dir_ent);
+ if (found == 1)
+ break;
+
+ if (found == -1) {
+ /* This means we found a bad directory entry. */
+ ret = -EIO;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ brelse(dir_ent_bh);
+ dir_ent_bh = NULL;
+ }
+
+ if (found <= 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ res->dl_leaf_bh = dir_ent_bh;
+ res->dl_entry = dir_ent;
+ res->dl_dx_leaf_bh = dx_leaf_bh;
+ res->dl_dx_entry = dx_entry;
+
+ ret = 0;
+out:
+ if (ret) {
+ brelse(dx_leaf_bh);
+ brelse(dir_ent_bh);
+ }
+ return ret;
+}
+
+static int ocfs2_find_entry_dx(const char *name, int namelen,
+ struct inode *dir,
+ struct ocfs2_dir_lookup_result *lookup)
+{
+ int ret;
+ struct buffer_head *di_bh = NULL;
+ struct ocfs2_dinode *di;
+ struct buffer_head *dx_root_bh = NULL;
+ struct ocfs2_dx_root_block *dx_root;
+
+ ret = ocfs2_read_inode_block(dir, &di_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ di = (struct ocfs2_dinode *)di_bh->b_data;
+
+ ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
+
+ ret = ocfs2_dx_dir_search(name, namelen, dir, &dx_root->dr_list,
+ lookup);
+ if (ret) {
+ if (ret != -ENOENT)
+ mlog_errno(ret);
+ goto out;
+ }
+
+out:
+ brelse(di_bh);
+ brelse(dx_root_bh);
+ return ret;
+}
+
/*
* Try to find an entry of the provided name within 'dir'.
*
@@ -493,10 +1004,11 @@ cleanup_and_exit:
* other directory manipulation functions.
*
* Caller can NOT assume anything about the contents of the
- * buffer_heads - they are passed back only so that it can be passed into
- * any one of the manipulation functions (add entry, delete entry,
- * etc). As an example, bh in the extent directory case is a data
- * block, in the inline-data case it actually points to an inode.
+ * buffer_heads - they are passed back only so that it can be passed
+ * into any one of the manipulation functions (add entry, delete
+ * entry, etc). As an example, bh in the extent directory case is a
+ * data block, in the inline-data case it actually points to an inode,
+ * in the indexed directory case, multiple buffers are involved.
*/
int ocfs2_find_entry(const char *name, int namelen,
struct inode *dir, struct ocfs2_dir_lookup_result *lookup)
@@ -504,6 +1016,14 @@ int ocfs2_find_entry(const char *name, int namelen,
struct buffer_head *bh;
struct ocfs2_dir_entry *res_dir = NULL;
+ if (ocfs2_dir_indexed(dir))
+ return ocfs2_find_entry_dx(name, namelen, dir, lookup);
+
+ /*
+ * The unindexed dir code only uses part of the lookup
+ * structure, so there's no reason to push it down further
+ * than this.
+ */
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir);
else
@@ -553,6 +1073,10 @@ out:
return ret;
}
+/*
+ * __ocfs2_delete_entry deletes a directory entry by merging it with the
+ * previous entry
+ */
static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
struct ocfs2_dir_entry *de_del,
struct buffer_head *bh, char *first_de,
@@ -602,6 +1126,79 @@ bail:
return status;
}
+static void ocfs2_dx_leaf_remove_entry(struct ocfs2_dx_leaf *dx_leaf, int index)
+{
+ struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
+ int num_used = le16_to_cpu(dl_list->de_num_used);
+
+ if (num_used == 1 || index == (num_used - 1))
+ goto clear;
+
+ memmove(&dl_list->de_entries[index], &dl_list->de_entries[index + 1],
+ (num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
+clear:
+ num_used--;
+ memset(&dl_list->de_entries[num_used], 0,
+ sizeof(struct ocfs2_dx_entry));
+ dl_list->de_num_used = cpu_to_le16(num_used);
+}
+
+static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
+ struct ocfs2_dir_lookup_result *lookup)
+{
+ int ret, index;
+ struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
+ struct ocfs2_dx_leaf *dx_leaf;
+ struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
+
+ dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
+ /* Neither of these are a disk corruption - that should have
+ * been caught by lookup, before we got here. */
+ BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_count) <= 0);
+ BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_num_used) <= 0);
+
+ index = (char *)dx_entry - (char *)dx_leaf->dl_list.de_entries;
+ index /= sizeof(*dx_entry);
+
+ if (index >= le16_to_cpu(dx_leaf->dl_list.de_num_used)) {
+ mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno, index, dx_leaf,
+ dx_entry);
+ return -EIO;
+ }
+
+ mlog(0, "Dir %llu: delete entry at index: %d\n",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno, index);
+
+ /*
+ * Add the index leaf into the journal before removing the
+ * unindexed entry. If we get an error return from
+ * __ocfs2_delete_entry(), then it hasn't removed the entry
+ * yet. Likewise, successful return means we *must* remove the
+ * indexed entry.
+ */
+ ret = ocfs2_journal_access_dl(handle, dir, lookup->dl_dx_leaf_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
+ leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ocfs2_dx_leaf_remove_entry(dx_leaf, index);
+
+ ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);
+
+out:
+ return ret;
+}
+
static inline int ocfs2_delete_entry_id(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_entry *de_del,
@@ -639,13 +1236,16 @@ static inline int ocfs2_delete_entry_el(handle_t *handle,
}
/*
- * ocfs2_delete_entry deletes a directory entry by merging it with the
- * previous entry
+ * Delete a directory entry. Hide the details of directory
+ * implementation from the caller.
*/
int ocfs2_delete_entry(handle_t *handle,
struct inode *dir,
struct ocfs2_dir_lookup_result *res)
{
+ if (ocfs2_dir_indexed(dir))
+ return ocfs2_delete_entry_dx(handle, dir, res);
+
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return ocfs2_delete_entry_id(handle, dir, res->dl_entry,
res->dl_leaf_bh);
@@ -679,6 +1279,58 @@ static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
return 0;
}
+static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
+ struct ocfs2_dx_entry *dx_new_entry)
+{
+ int i;
+
+ i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
+ dx_leaf->dl_list.de_entries[i] = *dx_new_entry;
+
+ le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
+}
+
+static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
+ struct ocfs2_dx_hinfo *hinfo,
+ u64 dirent_blk,
+ struct buffer_head *dx_leaf_bh)
+{
+ int ret, i;
+ struct ocfs2_dx_entry *dx_entry;
+ struct ocfs2_dx_leaf *dx_leaf;
+
+ ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
+ i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
+ dx_entry = &dx_leaf->dl_list.de_entries[i];
+
+ memset(dx_entry, 0, sizeof(*dx_entry));
+ dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
+ dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
+ dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
+
+ le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
+
+ ocfs2_journal_dirty(handle, dx_leaf_bh);
+
+out:
+ return ret;
+}
+
+static int ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
+ struct ocfs2_dir_lookup_result *lookup)
+{
+ return __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
+ lookup->dl_leaf_bh->b_blocknr,
+ lookup->dl_dx_leaf_bh);
+}
+
/* we don't always have a dentry for what we want to add, so people
* like orphan dir can call this instead.
*
@@ -754,10 +1406,21 @@ int __ocfs2_add_entry(handle_t *handle,
status = ocfs2_journal_access_di(handle, dir,
insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
- else
+ else {
status = ocfs2_journal_access_db(handle, dir,
insert_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ocfs2_dir_indexed(dir)) {
+ status = ocfs2_dx_dir_leaf_insert(dir,
+ handle,
+ lookup);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+ }
+
/* By now the buffer is marked for journaling */
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
@@ -887,6 +1550,10 @@ out:
return 0;
}
+/*
+ * NOTE: This function can be called against unindexed directories,
+ * and indexed ones.
+ */
static int ocfs2_dir_foreach_blk_el(struct inode *inode,
u64 *f_version,
loff_t *f_pos, void *priv,
@@ -1184,6 +1851,8 @@ static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
* routine to check that the specified directory is empty (for rmdir)
*
* Returns 1 if dir is empty, zero otherwise.
+ *
+ * XXX: This is a performance problem
*/
int ocfs2_empty_dir(struct inode *inode)
{
@@ -1285,7 +1954,8 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
struct inode *parent,
struct inode *inode,
struct buffer_head *fe_bh,
- struct ocfs2_alloc_context *data_ac)
+ struct ocfs2_alloc_context *data_ac,
+ struct buffer_head **ret_new_bh)
{
int status;
unsigned int size = osb->sb->s_blocksize;
@@ -1334,6 +2004,10 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
}
status = 0;
+ if (ret_new_bh) {
+ *ret_new_bh = new_bh;
+ new_bh = NULL;
+ }
bail:
brelse(new_bh);
@@ -1341,20 +2015,382 @@ bail:
return status;
}
+static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
+ handle_t *handle, struct inode *dir,
+ struct buffer_head *di_bh,
+ struct ocfs2_alloc_context *meta_ac,
+ struct buffer_head **ret_dx_root_bh)
+{
+ int ret;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+ u16 dr_suballoc_bit;
+ u64 dr_blkno;
+ unsigned int num_bits;
+ struct buffer_head *dx_root_bh = NULL;
+ struct ocfs2_dx_root_block *dx_root;
+
+ ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit,
+ &num_bits, &dr_blkno);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ mlog(0, "Dir %llu, attach new index block: %llu\n",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno,
+ (unsigned long long)dr_blkno);
+
+ dx_root_bh = sb_getblk(osb->sb, dr_blkno);
+ if (dx_root_bh == NULL) {
+ ret = -EIO;
+ goto out;
+ }
+ ocfs2_set_new_buffer_uptodate(dir, dx_root_bh);
+
+ ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+ OCFS2_JOURNAL_ACCESS_CREATE);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+ memset(dx_root, 0, osb->sb->s_blocksize);
+ strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
+ dx_root->dr_suballoc_slot = cpu_to_le16(osb->slot_num);
+ dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
+ dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
+ dx_root->dr_blkno = cpu_to_le64(dr_blkno);
+ dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
+ dx_root->dr_list.l_count =
+ cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
+
+ ret = ocfs2_journal_dirty(handle, dx_root_bh);
+ if (ret)
+ mlog_errno(ret);
+
+ ret = ocfs2_journal_access_di(handle, dir, di_bh,
+ OCFS2_JOURNAL_ACCESS_CREATE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ di->i_dx_root = cpu_to_le64(dr_blkno);
+
+ OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
+ di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+
+ ret = ocfs2_journal_dirty(handle, di_bh);
+ if (ret)
+ mlog_errno(ret);
+
+ *ret_dx_root_bh = dx_root_bh;
+ dx_root_bh = NULL;
+
+out:
+ brelse(dx_root_bh);
+ return ret;
+}
+
+static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
+ handle_t *handle, struct inode *dir,
+ struct buffer_head **dx_leaves,
+ int num_dx_leaves, u64 start_blk)
+{
+ int ret, i;
+ struct ocfs2_dx_leaf *dx_leaf;
+ struct buffer_head *bh;
+
+ for (i = 0; i < num_dx_leaves; i++) {
+ bh = sb_getblk(osb->sb, start_blk + i);
+ if (bh == NULL) {
+ ret = -EIO;
+ goto out;
+ }
+ dx_leaves[i] = bh;
+
+ ocfs2_set_new_buffer_uptodate(dir, bh);
+
+ ret = ocfs2_journal_access_dl(handle, dir, bh,
+ OCFS2_JOURNAL_ACCESS_CREATE);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;