summaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorKurt Hackel <kurt.hackel@oracle.com>2005-12-15 14:31:23 -0800
committerJoel Becker <joel.becker@oracle.com>2006-01-03 11:45:47 -0800
commit6714d8e86bf443f6f7af50f9d432025649f091f5 (patch)
tree2c484bd1894a90cad7020869c7054f192d3bf34d /fs/ocfs2
parent98211489d4147e41b11703e4245846d60b3acce4 (diff)
[PATCH] OCFS2: The Second Oracle Cluster Filesystem
A distributed lock manager built with the cluster file system use case in mind. The OCFS2 dlm exposes a VMS style API, though things have been simplified internally. The only lock levels implemented currently are NLMODE, PRMODE and EXMODE. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/dlm/Makefile6
-rw-r--r--fs/ocfs2/dlm/dlmapi.h214
-rw-r--r--fs/ocfs2/dlm/dlmast.c466
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h884
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c530
-rw-r--r--fs/ocfs2/dlm/dlmconvert.h35
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c246
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h30
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c1469
-rw-r--r--fs/ocfs2/dlm/dlmdomain.h36
-rw-r--r--fs/ocfs2/dlm/dlmlock.c676
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2666
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2132
-rw-r--r--fs/ocfs2/dlm/dlmthread.c695
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c672
-rw-r--r--fs/ocfs2/dlm/dlmver.c42
-rw-r--r--fs/ocfs2/dlm/dlmver.h31
17 files changed, 10830 insertions, 0 deletions
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
new file mode 100644
index 000000000000..2a5274bcc8bb
--- /dev/null
+++ b/fs/ocfs2/dlm/Makefile
@@ -0,0 +1,6 @@
+EXTRA_CFLAGS += -Ifs/ocfs2
+
+obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o
+
+ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
+ dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
new file mode 100644
index 000000000000..53652f51c0e1
--- /dev/null
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -0,0 +1,214 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmapi.h
+ *
+ * externally exported dlm interfaces
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ */
+
+#ifndef DLMAPI_H
+#define DLMAPI_H
+
+struct dlm_lock;
+struct dlm_ctxt;
+
+/* NOTE: changes made to this enum should be reflected in dlmdebug.c */
+enum dlm_status {
+ DLM_NORMAL = 0, /* 0: request in progress */
+ DLM_GRANTED, /* 1: request granted */
+ DLM_DENIED, /* 2: request denied */
+ DLM_DENIED_NOLOCKS, /* 3: request denied, out of system resources */
+ DLM_WORKING, /* 4: async request in progress */
+ DLM_BLOCKED, /* 5: lock request blocked */
+ DLM_BLOCKED_ORPHAN, /* 6: lock request blocked by a orphan lock*/
+ DLM_DENIED_GRACE_PERIOD, /* 7: topological change in progress */
+ DLM_SYSERR, /* 8: system error */
+ DLM_NOSUPPORT, /* 9: unsupported */
+ DLM_CANCELGRANT, /* 10: can't cancel convert: already granted */
+ DLM_IVLOCKID, /* 11: bad lockid */
+ DLM_SYNC, /* 12: synchronous request granted */
+ DLM_BADTYPE, /* 13: bad resource type */
+ DLM_BADRESOURCE, /* 14: bad resource handle */
+ DLM_MAXHANDLES, /* 15: no more resource handles */
+ DLM_NOCLINFO, /* 16: can't contact cluster manager */
+ DLM_NOLOCKMGR, /* 17: can't contact lock manager */
+ DLM_NOPURGED, /* 18: can't contact purge daemon */
+ DLM_BADARGS, /* 19: bad api args */
+ DLM_VOID, /* 20: no status */
+ DLM_NOTQUEUED, /* 21: NOQUEUE was specified and request failed */
+ DLM_IVBUFLEN, /* 22: invalid resource name length */
+ DLM_CVTUNGRANT, /* 23: attempted to convert ungranted lock */
+ DLM_BADPARAM, /* 24: invalid lock mode specified */
+ DLM_VALNOTVALID, /* 25: value block has been invalidated */
+ DLM_REJECTED, /* 26: request rejected, unrecognized client */
+ DLM_ABORT, /* 27: blocked lock request cancelled */
+ DLM_CANCEL, /* 28: conversion request cancelled */
+ DLM_IVRESHANDLE, /* 29: invalid resource handle */
+ DLM_DEADLOCK, /* 30: deadlock recovery refused this request */
+ DLM_DENIED_NOASTS, /* 31: failed to allocate AST */
+ DLM_FORWARD, /* 32: request must wait for primary's response */
+ DLM_TIMEOUT, /* 33: timeout value for lock has expired */
+ DLM_IVGROUPID, /* 34: invalid group specification */
+ DLM_VERS_CONFLICT, /* 35: version conflicts prevent request handling */
+ DLM_BAD_DEVICE_PATH, /* 36: Locks device does not exist or path wrong */
+ DLM_NO_DEVICE_PERMISSION, /* 37: Client has insufficient pers for device */
+ DLM_NO_CONTROL_DEVICE, /* 38: Cannot set options on opened device */
+
+ DLM_RECOVERING, /* 39: extension, allows caller to fail a lock
+ request if it is being recovered */
+ DLM_MIGRATING, /* 40: extension, allows caller to fail a lock
+ request if it is being migrated */
+ DLM_MAXSTATS, /* 41: upper limit for return code validation */
+};
+
+/* for pretty-printing dlm_status error messages */
+const char *dlm_errmsg(enum dlm_status err);
+/* for pretty-printing dlm_status error names */
+const char *dlm_errname(enum dlm_status err);
+
+/* Eventually the DLM will use standard errno values, but in the
+ * meantime this lets us track dlm errors as they bubble up. When we
+ * bring its error reporting into line with the rest of the stack,
+ * these can just be replaced with calls to mlog_errno. */
+#define dlm_error(st) do { \
+ if ((st) != DLM_RECOVERING && \
+ (st) != DLM_MIGRATING && \
+ (st) != DLM_FORWARD) \
+ mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \
+} while (0)
+
+#define DLM_LKSB_UNUSED1 0x01
+#define DLM_LKSB_PUT_LVB 0x02
+#define DLM_LKSB_GET_LVB 0x04
+#define DLM_LKSB_UNUSED2 0x08
+#define DLM_LKSB_UNUSED3 0x10
+#define DLM_LKSB_UNUSED4 0x20
+#define DLM_LKSB_UNUSED5 0x40
+#define DLM_LKSB_UNUSED6 0x80
+
+#define DLM_LVB_LEN 64
+
+/* Callers are only allowed access to the lvb and status members of
+ * this struct. */
+struct dlm_lockstatus {
+ enum dlm_status status;
+ u32 flags;
+ struct dlm_lock *lockid;
+ char lvb[DLM_LVB_LEN];
+};
+
+/* Valid lock modes. */
+#define LKM_IVMODE (-1) /* invalid mode */
+#define LKM_NLMODE 0 /* null lock */
+#define LKM_CRMODE 1 /* concurrent read unsupported */
+#define LKM_CWMODE 2 /* concurrent write unsupported */
+#define LKM_PRMODE 3 /* protected read */
+#define LKM_PWMODE 4 /* protected write unsupported */
+#define LKM_EXMODE 5 /* exclusive */
+#define LKM_MAXMODE 5
+#define LKM_MODEMASK 0xff
+
+/* Flags passed to dlmlock and dlmunlock:
+ * reserved: flags used by the "real" dlm
+ * only a few are supported by this dlm
+ * (U) = unsupported by ocfs2 dlm */
+#define LKM_ORPHAN 0x00000010 /* this lock is orphanable (U) */
+#define LKM_PARENTABLE 0x00000020 /* this lock was orphaned (U) */
+#define LKM_BLOCK 0x00000040 /* blocking lock request (U) */
+#define LKM_LOCAL 0x00000080 /* local lock request */
+#define LKM_VALBLK 0x00000100 /* lock value block request */
+#define LKM_NOQUEUE 0x00000200 /* non blocking request */
+#define LKM_CONVERT 0x00000400 /* conversion request */
+#define LKM_NODLCKWT 0x00000800 /* this lock wont deadlock (U) */
+#define LKM_UNLOCK 0x00001000 /* deallocate this lock */
+#define LKM_CANCEL 0x00002000 /* cancel conversion request */
+#define LKM_DEQALL 0x00004000 /* remove all locks held by proc (U) */
+#define LKM_INVVALBLK 0x00008000 /* invalidate lock value block */
+#define LKM_SYNCSTS 0x00010000 /* return synchronous status if poss (U) */
+#define LKM_TIMEOUT 0x00020000 /* lock request contains timeout (U) */
+#define LKM_SNGLDLCK 0x00040000 /* request can self-deadlock (U) */
+#define LKM_FINDLOCAL 0x00080000 /* find local lock request (U) */
+#define LKM_PROC_OWNED 0x00100000 /* owned by process, not group (U) */
+#define LKM_XID 0x00200000 /* use transaction id for deadlock (U) */
+#define LKM_XID_CONFLICT 0x00400000 /* do not allow lock inheritance (U) */
+#define LKM_FORCE 0x00800000 /* force unlock flag */
+#define LKM_REVVALBLK 0x01000000 /* temporary solution: re-validate
+ lock value block (U) */
+/* unused */
+#define LKM_UNUSED1 0x00000001 /* unused */
+#define LKM_UNUSED2 0x00000002 /* unused */
+#define LKM_UNUSED3 0x00000004 /* unused */
+#define LKM_UNUSED4 0x00000008 /* unused */
+#define LKM_UNUSED5 0x02000000 /* unused */
+#define LKM_UNUSED6 0x04000000 /* unused */
+#define LKM_UNUSED7 0x08000000 /* unused */
+
+/* ocfs2 extensions: internal only
+ * should never be used by caller */
+#define LKM_MIGRATION 0x10000000 /* extension: lockres is to be migrated
+ to another node */
+#define LKM_PUT_LVB 0x20000000 /* extension: lvb is being passed
+ should be applied to lockres */
+#define LKM_GET_LVB 0x40000000 /* extension: lvb should be copied
+ from lockres when lock is granted */
+#define LKM_RECOVERY 0x80000000 /* extension: flag for recovery lock
+ used to avoid recovery rwsem */
+
+
+typedef void (dlm_astlockfunc_t)(void *);
+typedef void (dlm_bastlockfunc_t)(void *, int);
+typedef void (dlm_astunlockfunc_t)(void *, enum dlm_status);
+
+enum dlm_status dlmlock(struct dlm_ctxt *dlm,
+ int mode,
+ struct dlm_lockstatus *lksb,
+ int flags,
+ const char *name,
+ dlm_astlockfunc_t *ast,
+ void *data,
+ dlm_bastlockfunc_t *bast);
+
+enum dlm_status dlmunlock(struct dlm_ctxt *dlm,
+ struct dlm_lockstatus *lksb,
+ int flags,
+ dlm_astunlockfunc_t *unlockast,
+ void *data);
+
+struct dlm_ctxt * dlm_register_domain(const char *domain, u32 key);
+
+void dlm_unregister_domain(struct dlm_ctxt *dlm);
+
+void dlm_print_one_lock(struct dlm_lock *lockid);
+
+typedef void (dlm_eviction_func)(int, void *);
+struct dlm_eviction_cb {
+ struct list_head ec_item;
+ dlm_eviction_func *ec_func;
+ void *ec_data;
+};
+void dlm_setup_eviction_cb(struct dlm_eviction_cb *cb,
+ dlm_eviction_func *f,
+ void *data);
+void dlm_register_eviction_cb(struct dlm_ctxt *dlm,
+ struct dlm_eviction_cb *cb);
+void dlm_unregister_eviction_cb(struct dlm_eviction_cb *cb);
+
+#endif /* DLMAPI_H */
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
new file mode 100644
index 000000000000..8d17d28ef91c
--- /dev/null
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -0,0 +1,466 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmast.c
+ *
+ * AST and BAST functionality for local and remote nodes
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/utsname.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/random.h>
+#include <linux/blkdev.h>
+#include <linux/socket.h>
+#include <linux/inet.h>
+#include <linux/spinlock.h>
+
+
+#include "cluster/heartbeat.h"
+#include "cluster/nodemanager.h"
+#include "cluster/tcp.h"
+#include "cluster/endian.h"
+
+#include "dlmapi.h"
+#include "dlmcommon.h"
+
+#define MLOG_MASK_PREFIX ML_DLM
+#include "cluster/masklog.h"
+
+static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+ struct dlm_lock *lock);
+static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
+
+/* Should be called as an ast gets queued to see if the new
+ * lock level will obsolete a pending bast.
+ * For example, if dlm_thread queued a bast for an EX lock that
+ * was blocking another EX, but before sending the bast the
+ * lock owner downconverted to NL, the bast is now obsolete.
+ * Only the ast should be sent.
+ * This is needed because the lock and convert paths can queue
+ * asts out-of-band (not waiting for dlm_thread) in order to
+ * allow for LKM_NOQUEUE to get immediate responses. */
+static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
+{
+ assert_spin_locked(&dlm->ast_lock);
+ assert_spin_locked(&lock->spinlock);
+
+ if (lock->ml.highest_blocked == LKM_IVMODE)
+ return 0;
+ BUG_ON(lock->ml.highest_blocked == LKM_NLMODE);
+
+ if (lock->bast_pending &&
+ list_empty(&lock->bast_list))
+ /* old bast already sent, ok */
+ return 0;
+
+ if (lock->ml.type == LKM_EXMODE)
+ /* EX blocks anything left, any bast still valid */
+ return 0;
+ else if (lock->ml.type == LKM_NLMODE)
+ /* NL blocks nothing, no reason to send any bast, cancel it */
+ return 1;
+ else if (lock->ml.highest_blocked != LKM_EXMODE)
+ /* PR only blocks EX */
+ return 1;
+
+ return 0;
+}
+
+static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
+{
+ mlog_entry_void();
+
+ BUG_ON(!dlm);
+ BUG_ON(!lock);
+
+ assert_spin_locked(&dlm->ast_lock);
+ if (!list_empty(&lock->ast_list)) {
+ mlog(ML_ERROR, "ast list not empty!! pending=%d, newlevel=%d\n",
+ lock->ast_pending, lock->ml.type);
+ BUG();
+ }
+ BUG_ON(!list_empty(&lock->ast_list));
+ if (lock->ast_pending)
+ mlog(0, "lock has an ast getting flushed right now\n");
+
+ /* putting lock on list, add a ref */
+ dlm_lock_get(lock);
+ spin_lock(&lock->spinlock);
+
+ /* check to see if this ast obsoletes the bast */
+ if (dlm_should_cancel_bast(dlm, lock)) {
+ struct dlm_lock_resource *res = lock->lockres;
+ mlog(0, "%s: cancelling bast for %.*s\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ lock->bast_pending = 0;
+ list_del_init(&lock->bast_list);
+ lock->ml.highest_blocked = LKM_IVMODE;
+ /* removing lock from list, remove a ref. guaranteed
+ * this won't be the last ref because of the get above,
+ * so res->spinlock will not be taken here */
+ dlm_lock_put(lock);
+ /* free up the reserved bast that we are cancelling.
+ * guaranteed that this will not be the last reserved
+ * ast because *both* an ast and a bast were reserved
+ * to get to this point. the res->spinlock will not be
+ * taken here */
+ dlm_lockres_release_ast(dlm, res);
+ }
+ list_add_tail(&lock->ast_list, &dlm->pending_asts);
+ lock->ast_pending = 1;
+ spin_unlock(&lock->spinlock);
+}
+
+void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
+{
+ mlog_entry_void();
+
+ BUG_ON(!dlm);
+ BUG_ON(!lock);
+
+ spin_lock(&dlm->ast_lock);
+ __dlm_queue_ast(dlm, lock);
+ spin_unlock(&dlm->ast_lock);
+}
+
+
+static void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
+{
+ mlog_entry_void();
+
+ BUG_ON(!dlm);
+ BUG_ON(!lock);
+ assert_spin_locked(&dlm->ast_lock);
+
+ BUG_ON(!list_empty(&lock->bast_list));
+ if (lock->bast_pending)
+ mlog(0, "lock has a bast getting flushed right now\n");
+
+ /* putting lock on list, add a ref */
+ dlm_lock_get(lock);
+ spin_lock(&lock->spinlock);
+ list_add_tail(&lock->bast_list, &dlm->pending_basts);
+ lock->bast_pending = 1;
+ spin_unlock(&lock->spinlock);
+}
+
+void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
+{
+ mlog_entry_void();
+
+ BUG_ON(!dlm);
+ BUG_ON(!lock);
+
+ spin_lock(&dlm->ast_lock);
+ __dlm_queue_bast(dlm, lock);
+ spin_unlock(&dlm->ast_lock);
+}
+
+static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+ struct dlm_lock *lock)
+{
+ struct dlm_lockstatus *lksb = lock->lksb;
+ BUG_ON(!lksb);
+
+ /* only updates if this node masters the lockres */
+ if (res->owner == dlm->node_num) {
+
+ spin_lock(&res->spinlock);
+ /* check the lksb flags for the direction */
+ if (lksb->flags & DLM_LKSB_GET_LVB) {
+ mlog(0, "getting lvb from lockres for %s node\n",
+ lock->ml.node == dlm->node_num ? "master" :
+ "remote");
+ memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
+ } else if (lksb->flags & DLM_LKSB_PUT_LVB) {
+ mlog(0, "setting lvb from lockres for %s node\n",
+ lock->ml.node == dlm->node_num ? "master" :
+ "remote");
+ memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
+ }
+ spin_unlock(&res->spinlock);
+ }
+
+ /* reset any lvb flags on the lksb */
+ lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
+}
+
+void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+ struct dlm_lock *lock)
+{
+ dlm_astlockfunc_t *fn;
+ struct dlm_lockstatus *lksb;
+
+ mlog_entry_void();
+
+ lksb = lock->lksb;
+ fn = lock->ast;
+ BUG_ON(lock->ml.node != dlm->node_num);
+
+ dlm_update_lvb(dlm, res, lock);
+ (*fn)(lock->astdata);
+}
+
+
+int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+ struct dlm_lock *lock)
+{
+ int ret;
+ struct dlm_lockstatus *lksb;
+ int lksbflags;
+
+ mlog_entry_void();
+
+ lksb = lock->lksb;
+ BUG_ON(lock->ml.node == dlm->node_num);
+
+ lksbflags = lksb->flags;
+ dlm_update_lvb(dlm, res, lock);
+
+ /* lock request came from another node
+ * go do the ast over there */
+ ret = dlm_send_proxy_ast(dlm, res, lock, lksbflags);
+ return ret;
+}
+
+void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+ struct dlm_lock *lock, int blocked_type)
+{
+ dlm_bastlockfunc_t *fn = lock->bast;
+
+ mlog_entry_void();
+ BUG_ON(lock->ml.node != dlm->node_num);
+
+ (*fn)(lock->astdata, blocked_type);
+}
+
+
+
+int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
+{
+ int ret;
+ unsigned int locklen;
+ struct dlm_ctxt *dlm = data;
+ struct dlm_lock_resource *res = NULL;
+ struct dlm_lock *lock = NULL;
+ struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf;
+ char *name;
+ struct list_head *iter, *head=NULL;
+ u64 cookie;
+ u32 flags;
+
+ if (!dlm_grab(dlm)) {
+ dlm_error(DLM_REJECTED);
+ return DLM_REJECTED;
+ }
+
+ mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
+ "Domain %s not fully joined!\n", dlm->name);
+
+ name = past->name;
+ locklen = past->namelen;
+ cookie = be64_to_cpu(past->cookie);
+ flags = be32_to_cpu(past->flags);
+
+ if (locklen > DLM_LOCKID_NAME_MAX) {
+ ret = DLM_IVBUFLEN;
+ mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n");
+ goto leave;
+ }
+
+ if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
+ (LKM_PUT_LVB|LKM_GET_LVB)) {
+ mlog(ML_ERROR, "both PUT and GET lvb specified\n");
+ ret = DLM_BADARGS;
+ goto leave;
+ }
+
+ mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" :
+ (flags & LKM_GET_LVB ? "get lvb" : "none"));
+
+ mlog(0, "type=%d, blocked_type=%d\n", past->type, past->blocked_type);
+
+ if (past->type != DLM_AST &&
+ past->type != DLM_BAST) {
+ mlog(ML_ERROR, "Unknown ast type! %d, cookie=%"MLFu64", "
+ "name=%.*s\n", past->type, cookie, locklen, name);
+ ret = DLM_IVLOCKID;
+ goto leave;
+ }
+
+ res = dlm_lookup_lockres(dlm, name, locklen);
+ if (!res) {
+ mlog(ML_ERROR, "got %sast for unknown lockres! "
+ "cookie=%"MLFu64", name=%.*s, namelen=%u\n",
+ past->type == DLM_AST ? "" : "b",
+ cookie, locklen, name, locklen);
+ ret = DLM_IVLOCKID;
+ goto leave;
+ }
+
+ /* cannot get a proxy ast message if this node owns it */
+ BUG_ON(res->owner == dlm->node_num);
+
+ mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name);
+
+ spin_lock(&res->spinlock);
+ if (res->state & DLM_LOCK_RES_RECOVERING) {
+ mlog(0, "responding with DLM_RECOVERING!\n");
+ ret = DLM_RECOVERING;
+ goto unlock_out;
+ }
+ if (res->state & DLM_LOCK_RES_MIGRATING) {
+ mlog(0, "responding with DLM_MIGRATING!\n");
+ ret = DLM_MIGRATING;
+ goto unlock_out;
+ }
+ /* try convert queue for both ast/bast */
+ head = &res->converting;
+ lock = NULL;
+ list_for_each(iter, head) {
+ lock = list_entry (iter, struct dlm_lock, list);
+ if (be64_to_cpu(lock->ml.cookie) == cookie)
+ goto do_ast;
+ }
+
+ /* if not on convert, try blocked for ast, granted for bast */
+ if (past->type == DLM_AST)
+ head = &res->blocked;
+ else
+ head = &res->granted;
+
+ list_for_each(iter, head) {
+ lock = list_entry (iter, struct dlm_lock, list);
+ if (be64_to_cpu(lock->ml.cookie) == cookie)
+ goto do_ast;
+ }
+
+ mlog(ML_ERROR, "got %sast for unknown lock! cookie=%"MLFu64", "
+ "name=%.*s, namelen=%u\n",
+ past->type == DLM_AST ? "" : "b", cookie, locklen, name, locklen);
+
+ ret = DLM_NORMAL;
+unlock_out:
+ spin_unlock(&res->spinlock);
+ goto leave;
+
+do_ast:
+ ret = DLM_NORMAL;
+ if (past->type == DLM_AST) {
+ /* do not alter lock refcount. switching lists. */
+ list_del_init(&lock->list);
+ list_add_tail(&lock->list, &res->granted);
+ mlog(0, "ast: adding to granted list... type=%d, "
+ "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
+ if (lock->ml.convert_type != LKM_IVMODE) {
+ lock->ml.type = lock->ml.convert_type;
+ lock->ml.convert_type = LKM_IVMODE;
+ } else {
+ // should already be there....
+ }
+
+ lock->lksb->status = DLM_NORMAL;
+
+ /* if we requested the lvb, fetch it into our lksb now */
+ if (flags & LKM_GET_LVB) {
+ BUG_ON(!(lock->lksb->flags & DLM_LKSB_GET_LVB));
+ memcpy(lock->lksb->lvb, past->lvb, DLM_LVB_LEN);
+ }
+ }
+ spin_unlock(&res->spinlock);
+
+ if (past->type == DLM_AST)
+ dlm_do_local_ast(dlm, res, lock);
+ else
+ dlm_do_local_bast(dlm, res, lock, past->blocked_type);
+
+leave:
+
+ if (res)
+ dlm_lockres_put(res);
+
+ dlm_put(dlm);
+ return ret;
+}
+
+
+
+int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
+ struct dlm_lock *lock, int msg_type,
+ int blocked_type, int flags)
+{
+ int ret = 0;
+ struct dlm_proxy_ast past;
+ struct kvec vec[2];
+ size_t veclen = 1;
+ int status;
+
+ mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n",
+ res->lockname.len, res->lockname.name, lock->ml.node,
+ msg_type, blocked_type);
+
+ memset(&past, 0, sizeof(struct dlm_proxy_ast));
+ past.node_idx = dlm->node_num;
+ past.type = msg_type;
+ past.blocked_type = blocked_type;
+ past.namelen = res->lockname.len;
+ memcpy(past.name, res->lockname.name, past.namelen);
+ past.cookie = lock->ml.cookie;
+
+ vec[0].iov_len = sizeof(struct dlm_proxy_ast);
+ vec[0].iov_base = &past;
+ if (flags & DLM_LKSB_GET_LVB) {
+ mlog(0, "returning requested LVB data\n");
+ be32_add_cpu(&past.flags, LKM_GET_LVB);
+ vec[1].iov_len = DLM_LVB_LEN;
+ vec[1].iov_base = lock->lksb->lvb;
+ veclen++;
+ }
+
+ ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
+ lock->ml.node, &status);
+ if (ret < 0)
+ mlog_errno(ret);
+ else {
+ if (status == DLM_RECOVERING) {
+ mlog(ML_ERROR, "sent AST to node %u, it thinks this "
+ "node is dead!\n", lock->ml.node);
+ BUG();
+ } else if (status == DLM_MIGRATING) {
+ mlog(ML_ERROR, "sent AST to node %u, it returned "
+ "DLM_MIGRATING!\n", lock->ml.node);
+ BUG();
+ } else if (status != DLM_NORMAL) {
+ mlog(ML_ERROR, "AST to node %u returned %d!\n",
+ lock->ml.node, status);
+ /* ignore it */
+ }
+ ret = 0;
+ }
+ return ret;
+}
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
new file mode 100644
index 000000000000..3fecba0a6023
--- /dev/null
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -0,0 +1,884 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dlmcommon.h
+ *
+ * Copyright (C) 2004 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ */
+
+#ifndef DLMCOMMON_H
+#define DLMCOMMON_H
+
+#include <linux/kref.h>
+
+#define DLM_HB_NODE_DOWN_PRI (0xf000000)
+#define DLM_HB_NODE_UP_PRI (0x8000000)
+
+#define DLM_LOCKID_NAME_MAX 32
+
+#define DLM_DOMAIN_NAME_MAX_LEN 255
+#define DLM_LOCK_RES_OWNER_UNKNOWN O2NM_MAX_NODES
+#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
+#define DLM_THREAD_MS 200 // flush at least every 200 ms
+
+#define DLM_HASH_BITS 7
+#define DLM_HASH_SIZE (1 << DLM_HASH_BITS)
+#define DLM_HASH_MASK (DLM_HASH_SIZE - 1)
+
+enum dlm_ast_type {
+ DLM_AST = 0,
+ DLM_BAST,
+ DLM_ASTUNLOCK
+};
+
+
+#define LKM_VALID_FLAGS (LKM_VALBLK | LKM_CONVERT | LKM_UNLOCK | \
+ LKM_CANCEL | LKM_INVVALBLK | LKM_FORCE | \
+ LKM_RECOVERY | LKM_LOCAL | LKM_NOQUEUE)
+
+#define DLM_RECOVERY_LOCK_NAME "$RECOVERY"
+#define DLM_RECOVERY_LOCK_NAME_LEN 9
+
+static inline int dlm_is_recovery_lock(const char *lock_name, int name_len)
+{
+ if (name_len == DLM_RECOVERY_LOCK_NAME_LEN &&
+ memcmp(lock_name, DLM_RECOVERY_LOCK_NAME, name_len)==0)
+ return 1;
+ return 0;
+}
+
+#define DLM_RECO_STATE_ACTIVE 0x0001
+
+struct dlm_recovery_ctxt
+{
+ struct list_head resources;
+ struct list_head received;
+ struct list_head node_data;
+ u8 new_master;
+ u8 dead_node;
+ u16 state;
+ unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
+ wait_queue_head_t event;
+};
+
+enum dlm_ctxt_state {
+ DLM_CTXT_NEW = 0,
+ DLM_CTXT_JOINED,
+ DLM_CTXT_IN_SHUTDOWN,
+ DLM_CTXT_LEAVING,
+};
+
+struct dlm_ctxt
+{
+ struct list_head list;
+ struct list_head *resources;
+ struct list_head dirty_list;
+ struct list_head purge_list;
+ struct list_head pending_asts;
+ struct list_head pending_basts;
+ unsigned int purge_count;
+ spinlock_t spinlock;
+ spinlock_t ast_lock;
+ char *name;
+ u8 node_num;
+ u32 key;
+ u8 joining_node;
+ wait_queue_head_t dlm_join_events;
+ unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
+ unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
+ unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
+ struct dlm_recovery_ctxt reco;
+ spinlock_t master_lock;
+ struct list_head master_list;
+ struct list_head mle_hb_events;
+
+ /* these give a really vague idea of the system load */
+ atomic_t local_resources;
+ atomic_t remote_resources;
+ atomic_t unknown_resources;
+
+ /* NOTE: Next three are protected by dlm_domain_lock */
+ struct kref dlm_refs;
+ enum dlm_ctxt_state dlm_state;
+ unsigned int num_joins;
+
+ struct o2hb_callback_func dlm_hb_up;
+ struct o2hb_callback_func dlm_hb_down;
+ struct task_struct *dlm_thread_task;
+ struct task_struct *dlm_reco_thread_task;
+ wait_queue_head_t dlm_thread_wq;
+ wait_queue_head_t dlm_reco_thread_wq;
+ wait_queue_head_t ast_wq;
+ wait_queue_head_t migration_wq;
+
+ struct work_struct dispatched_work;
+ struct list_head work_list;
+ spinlock_t work_lock;
+ struct list_head dlm_domain_handlers;
+ struct list_head dlm_eviction_callbacks;
+};
+
+/* these keventd work queue items are for less-frequently
+ * called functions that cannot be directly called from the
+ * net message handlers for some reason, usually because
+ * they need to send net messages of their own. */
+void dlm_dispatch_work(void *data);
+
+struct dlm_lock_resource;
+struct dlm_work_item;
+
+typedef void (dlm_workfunc_t)(struct dlm_work_item *, void *);
+
+struct dlm_request_all_locks_priv
+{
+ u8 reco_master;
+ u8 dead_node;
+};
+
+struct dlm_mig_lockres_priv
+{
+ struct dlm_lock_resource *lockres;
+ u8 real_master;
+};
+
+struct dlm_assert_master_priv
+{
+ struct dlm_lock_resource *lockres;
+ u8 request_from;
+ u32 flags;
+ unsigned ignore_higher:1;
+};
+
+
+struct dlm_work_item
+{
+ struct list_head list;
+ dlm_workfunc_t *func;
+ struct dlm_ctxt *dlm;
+ void *data;
+ union {
+ struct dlm_request_all_locks_priv ral;
+ struct dlm_mig_lockres_priv ml;
+ struct dlm_assert_master_priv am;
+ } u;
+};
+
+static inline void dlm_init_work_item(struct dlm_ctxt *dlm,
+ struct dlm_work_item *i,
+ dlm_workfunc_t *f, void *data)
+{
+ memset(i, 0, sizeof(*i));
+ i->func = f;
+ INIT_LIST_HEAD(&i->list);
+ i->data = data;
+ i->dlm = dlm; /* must have already done a dlm_grab on this! */
+}
+
+
+
+static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
+ u8 node)
+{
+ assert_spin_locked(&dlm->spinlock);
+
+ dlm->joining_node = node;
+ wake_up(&dlm->dlm_join_events);
+}
+
+#define DLM_LOCK_RES_UNINITED 0x00000001
+#define DLM_LOCK_RES_RECOVERING 0x00000002
+#define DLM_LOCK_RES_READY 0x00000004
+#define DLM_LOCK_RES_DIRTY 0x00000008
+#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
+#define DLM_LOCK_RES_MIGRATING 0x00000020
+
+#define DLM_PURGE_INTERVAL_MS (8 * 1000)
+
+struct dlm_lock_resource
+{
+ /* WARNING: Please see the comment in dlm_init_lockres before
+ * adding fields here. */
+ struct list_head list;
+ struct kref refs;
+
+ /* please keep these next 3 in this order
+ * some funcs want to iterate over all lists */
+ struct list_head granted;
+ struct list_head converting;
+ struct list_head blocked;
+
+ struct list_head dirty;
+ struct list_head recovering; // dlm_recovery_ctxt.resources list
+
+ /* unused lock resources have their last_used stamped and are
+ * put on a list for the dlm thread to run. */
+ struct list_head purge;
+ unsigned long last_used;
+
+ unsigned migration_pending:1;
+ atomic_t asts_reserved;
+ spinlock_t spinlock;
+ wait_queue_head_t wq;
+ u8 owner; //node which owns the lock resource, or unknown
+ u16 state;
+ struct qstr lockname;
+ char lvb[DLM_LVB_LEN];
+};
+
+struct dlm_migratable_lock
+{
+ __be64 cookie;
+
+ /* these 3 are just padding for the in-memory structure, but
+ * list and flags are actually used when sent over the wire */
+ __be16 pad1;
+ u8 list; // 0=granted, 1=converting, 2=blocked
+ u8 flags;
+
+ s8 type;