From b23cdde4c6240d70bb3d2e3c4046b60d6f6c8451 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 22 Jun 2007 13:07:02 -0700
Subject: configfs: consistent attribute size

The attribute store/show code currently limits attributes at PAGE_SIZE.
This code comes from sysfs, where it still works that way.

However, PAGE_SIZE is not constant.  A 16k attribute string works on
ia64 but not on x86.  Really a subsystem shouldn't allow different
attribute sizes based on platform.

As such, limit all simple attributes to 4k.  This works on all
platforms, and is consistent with all current code.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/file.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 3527c7c6def8..0f4b65e85245 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -33,6 +33,13 @@
 #include <linux/configfs.h>
 #include "configfs_internal.h"
 
+/*
+ * A simple attribute can only be 4096 characters.  Why 4k?  Because the
+ * original code limited it to PAGE_SIZE.  That's a bad idea, though,
+ * because an attribute of 16k on ia64 won't work on x86.  So we limit to
+ * 4k, our minimum common page size.
+ */
+#define SIMPLE_ATTR_SIZE 4096
 
 struct configfs_buffer {
 	size_t			count;
@@ -69,7 +76,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
 
 	count = ops->show_attribute(item,attr,buffer->page);
 	buffer->needs_read_fill = 0;
-	BUG_ON(count > (ssize_t)PAGE_SIZE);
+	BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
 	if (count >= 0)
 		buffer->count = count;
 	else
@@ -137,8 +144,8 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
 	if (!buffer->page)
 		return -ENOMEM;
 
-	if (count >= PAGE_SIZE)
-		count = PAGE_SIZE - 1;
+	if (count >= SIMPLE_ATTR_SIZE)
+		count = SIMPLE_ATTR_SIZE - 1;
 	error = copy_from_user(buffer->page,buf,count);
 	buffer->needs_read_fill = 1;
 	/* if buf is assumed to contain a string, terminate it by \0,
-- 
cgit v1.2.3


From 4c62b53454a83178676e5ecae6665447d363c7b4 Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Wed, 27 Jun 2007 16:02:14 +0530
Subject: configfs: misc cleanups

1. item.c:config_item_cleanup() is a private function (only called by
config_item_release() in same file). However, it is spuriously
exported in include/linux/configfs.h, so remove that export and make
it static in item.c. Also, it is no longer exported / interface
function, so no need to give comment for this function (the comment
was stating obvious thing, anyway).

2. Kernel-doc comment format does not allow empty line between end of
comment and start of function (declaration line). There were several
such spurious empty lines in item.c, so fix them.

  fs/configfs/item.c       |   15 +++------------
  include/linux/configfs.h |    1 -
  2 files changed, 3 insertions(+), 13 deletions(-)

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/item.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 24421209f854..b762bbeaa0be 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -62,7 +62,6 @@ void config_item_init(struct config_item * item)
  *	dynamically allocated string that @item->ci_name points to.
  *	Otherwise, use the static @item->ci_namebuf array.
  */
-
 int config_item_set_name(struct config_item * item, const char * fmt, ...)
 {
 	int error = 0;
@@ -139,12 +138,7 @@ struct config_item * config_item_get(struct config_item * item)
 	return item;
 }
 
-/**
- *	config_item_cleanup - free config_item resources.
- *	@item:	item.
- */
-
-void config_item_cleanup(struct config_item * item)
+static void config_item_cleanup(struct config_item * item)
 {
 	struct config_item_type * t = item->ci_type;
 	struct config_group * s = item->ci_group;
@@ -179,12 +173,10 @@ void config_item_put(struct config_item * item)
 		kref_put(&item->ci_kref, config_item_release);
 }
 
-
 /**
  *	config_group_init - initialize a group for use
  *	@k:	group
  */
-
 void config_group_init(struct config_group *group)
 {
 	config_item_init(&group->cg_item);
@@ -201,8 +193,8 @@ void config_group_init(struct config_group *group)
  *	looking for a matching config_item. If matching item is found
  *	take a reference and return the item.
  */
-
-struct config_item * config_group_find_obj(struct config_group * group, const char * name)
+struct config_item *config_group_find_obj(struct config_group *group,
+					  const char * name)
 {
 	struct list_head * entry;
 	struct config_item * ret = NULL;
@@ -219,7 +211,6 @@ struct config_item * config_group_find_obj(struct config_group * group, const ch
 	return ret;
 }
 
-
 EXPORT_SYMBOL(config_item_init);
 EXPORT_SYMBOL(config_group_init);
 EXPORT_SYMBOL(config_item_get);
-- 
cgit v1.2.3


From 9b1d9aa4e9c5cafe73b9df21d758b50b5d75264d Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Wed, 4 Jul 2007 16:37:06 +0530
Subject: [PATCH] configfs+dlm: Separate out __CONFIGFS_ATTR into configfs.h

fs/dlm/config.c contains a useful generic macro called __CONFIGFS_ATTR
that is similar to sysfs' __ATTR macro that makes defining attributes
easy for any user of configfs. Separate it out into configfs.h so that
other users (forthcoming in dynamic netconsole patchset) can use it too.

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Cc: David Teigland <teigland@redhat.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/dlm/config.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 5069b2cb5a1f..e47eb42406fa 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -133,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
 	return len;
 }
 
-#define __CONFIGFS_ATTR(_name,_mode,_read,_write) {                           \
-	.attr   = { .ca_name = __stringify(_name),                            \
-		    .ca_mode = _mode,                                         \
-		    .ca_owner = THIS_MODULE },                                \
-	.show   = _read,                                                      \
-	.store  = _write,                                                     \
-}
-
 #define CLUSTER_ATTR(name, check_zero)                                        \
 static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len)  \
 {                                                                             \
-- 
cgit v1.2.3


From 3fe6c5ce1176cf661dbe71fc43b627c1a742a89a Mon Sep 17 00:00:00 2001
From: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Date: Wed, 4 Jul 2007 16:37:16 +0530
Subject: [PATCH] configfs+dlm: Rename config_group_find_obj and state
 semantics clearly

Configfs being based upon sysfs code, config_group_find_obj() is probably
so named because of the similar kset_find_obj() in sysfs. However,
"kobject"s in sysfs become "config_item"s in configfs, so let's call it
config_group_find_item() instead, for sake of uniformity, and make
corresponding change in the users of this function.

BTW a crucial difference between kset_find_obj and config_group_find_item
is in locking expectations. kset_find_obj does its locking by itself, but
config_group_find_item expects the *caller* to do the locking. The reason
for this: kset's have their own locks, config_group's don't but instead
rely on the subsystem mutex. And, subsystem needn't necessarily be around
when config_group_find_item() is called.

So let's state these locking semantics explicitly, and rectify the comment,
otherwise bugs could continue to occur in future, as they did in the past
(refer commit d82b8191e238 in gfs2-2.6-fixes.git).

[ I also took the opportunity to fix some bad whitespace and
double-empty lines. --Joel ]

[ Conflict in fs/dlm/config.c with commit
  3168b0780d06ace875696f8a648d04d6089654e5 manually resolved. --Mark ]

Signed-off-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Cc: David Teigland <teigland@redhat.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/item.c | 18 ++++++++----------
 fs/dlm/config.c    |  2 +-
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index b762bbeaa0be..76dc4c3e5d51 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -183,27 +183,25 @@ void config_group_init(struct config_group *group)
 	INIT_LIST_HEAD(&group->cg_children);
 }
 
-
 /**
- *	config_group_find_obj - search for item in group.
+ *	config_group_find_item - search for item in group.
  *	@group:	group we're looking in.
  *	@name:	item's name.
  *
- *	Lock group via @group->cg_subsys, and iterate over @group->cg_list,
- *	looking for a matching config_item. If matching item is found
- *	take a reference and return the item.
+ *	Iterate over @group->cg_list, looking for a matching config_item.
+ *	If matching item is found take a reference and return the item.
+ *	Caller must have locked group via @group->cg_subsys->su_mtx.
  */
-struct config_item *config_group_find_obj(struct config_group *group,
-					  const char * name)
+struct config_item *config_group_find_item(struct config_group *group,
+					   const char *name)
 {
 	struct list_head * entry;
 	struct config_item * ret = NULL;
 
-        /* XXX LOCKING! */
 	list_for_each(entry,&group->cg_children) {
 		struct config_item * item = to_item(entry);
 		if (config_item_name(item) &&
-                    !strcmp(config_item_name(item), name)) {
+		    !strcmp(config_item_name(item), name)) {
 			ret = config_item_get(item);
 			break;
 		}
@@ -215,4 +213,4 @@ EXPORT_SYMBOL(config_item_init);
 EXPORT_SYMBOL(config_group_init);
 EXPORT_SYMBOL(config_item_get);
 EXPORT_SYMBOL(config_item_put);
-EXPORT_SYMBOL(config_group_find_obj);
+EXPORT_SYMBOL(config_group_find_item);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index e47eb42406fa..4348cb42cf17 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -752,7 +752,7 @@ static struct space *get_space(char *name)
 		return NULL;
 
 	down(&space_list->cg_subsys->su_sem);
-	i = config_group_find_obj(space_list, name);
+	i = config_group_find_item(space_list, name);
 	up(&space_list->cg_subsys->su_sem);
 
 	return to_space(i);
-- 
cgit v1.2.3


From e6bd07aee739566803425acdbf5cdb29919164e1 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 6 Jul 2007 23:33:17 -0700
Subject: configfs: Convert subsystem semaphore to mutex

Convert the su_sem member of struct configfs_subsystem to a struct
mutex, as that's what it is. Also convert all the users and update
Documentation/configfs.txt and Documentation/configfs_example.c
accordingly.

[ Conflict in fs/dlm/config.c with commit
  3168b0780d06ace875696f8a648d04d6089654e5 manually resolved. --Mark ]

Inspired-by: Satyam Sharma <ssatyam@cse.iitk.ac.in>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c              | 16 ++++++++--------
 fs/dlm/config.c                | 10 +++++-----
 fs/ocfs2/cluster/nodemanager.c |  2 +-
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 5e6e37e58f36..d3b1dbb9b5b8 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -562,7 +562,7 @@ static int populate_groups(struct config_group *group)
 
 /*
  * All of link_obj/unlink_obj/link_group/unlink_group require that
- * subsys->su_sem is held.
+ * subsys->su_mutex is held.
  */
 
 static void unlink_obj(struct config_item *item)
@@ -783,7 +783,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
 
-	down(&subsys->su_sem);
+	mutex_lock(&subsys->su_mutex);
 	group = NULL;
 	item = NULL;
 	if (type->ct_group_ops->make_group) {
@@ -797,7 +797,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		if (item)
 			link_obj(parent_item, item);
 	}
-	up(&subsys->su_sem);
+	mutex_unlock(&subsys->su_mutex);
 
 	kfree(name);
 	if (!item) {
@@ -841,13 +841,13 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 out_unlink:
 	if (ret) {
 		/* Tear down everything we built up */
-		down(&subsys->su_sem);
+		mutex_lock(&subsys->su_mutex);
 		if (group)
 			unlink_group(group);
 		else
 			unlink_obj(item);
 		client_drop_item(parent_item, item);
-		up(&subsys->su_sem);
+		mutex_unlock(&subsys->su_mutex);
 
 		if (module_got)
 			module_put(owner);
@@ -910,17 +910,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (sd->s_type & CONFIGFS_USET_DIR) {
 		configfs_detach_group(item);
 
-		down(&subsys->su_sem);
+		mutex_lock(&subsys->su_mutex);
 		unlink_group(to_config_group(item));
 	} else {
 		configfs_detach_item(item);
 
-		down(&subsys->su_sem);
+		mutex_lock(&subsys->su_mutex);
 		unlink_obj(item);
 	}
 
 	client_drop_item(parent_item, item);
-	up(&subsys->su_sem);
+	mutex_unlock(&subsys->su_mutex);
 
 	/* Drop our reference from above */
 	config_item_put(item);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 4348cb42cf17..2f8e3c81bc19 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -607,7 +607,7 @@ static struct clusters clusters_root = {
 int dlm_config_init(void)
 {
 	config_group_init(&clusters_root.subsys.su_group);
-	init_MUTEX(&clusters_root.subsys.su_sem);
+	mutex_init(&clusters_root.subsys.su_mutex);
 	return configfs_register_subsystem(&clusters_root.subsys);
 }
 
@@ -751,9 +751,9 @@ static struct space *get_space(char *name)
 	if (!space_list)
 		return NULL;
 
-	down(&space_list->cg_subsys->su_sem);
+	mutex_lock(&space_list->cg_subsys->su_mutex);
 	i = config_group_find_item(space_list, name);
-	up(&space_list->cg_subsys->su_sem);
+	mutex_unlock(&space_list->cg_subsys->su_mutex);
 
 	return to_space(i);
 }
@@ -772,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
 	if (!comm_list)
 		return NULL;
 
-	down(&clusters_root.subsys.su_sem);
+	mutex_lock(&clusters_root.subsys.su_mutex);
 
 	list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
 		cm = to_comm(i);
@@ -792,7 +792,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
 			break;
 		}
 	}
-	up(&clusters_root.subsys.su_sem);
+	mutex_unlock(&clusters_root.subsys.su_mutex);
 
 	if (!found)
 		cm = NULL;
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 9f5ad0f01ce0..48b77d113cb2 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -934,7 +934,7 @@ static int __init init_o2nm(void)
 		goto out_sysctl;
 
 	config_group_init(&o2nm_cluster_group.cs_subsys.su_group);
-	init_MUTEX(&o2nm_cluster_group.cs_subsys.su_sem);
+	mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex);
 	ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys);
 	if (ret) {
 		printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);
-- 
cgit v1.2.3


From 6d748924b753d63a57dad130fdf11f64c27ff54b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 22 Jun 2007 11:20:00 +0200
Subject: [PATCH] configsfs buffer: use mutex

Seems copied from sysfs, but I don't see a reason here nor there to use
a semaphore instead of a mutex. Convert.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/file.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 0f4b65e85245..a3658f9a082c 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -27,8 +27,8 @@
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
-#include <asm/semaphore.h>
 
 #include <linux/configfs.h>
 #include "configfs_internal.h"
@@ -46,7 +46,7 @@ struct configfs_buffer {
 	loff_t			pos;
 	char			* page;
 	struct configfs_item_operations	* ops;
-	struct semaphore	sem;
+	struct mutex		mutex;
 	int			needs_read_fill;
 };
 
@@ -109,7 +109,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
 	struct configfs_buffer * buffer = file->private_data;
 	ssize_t retval = 0;
 
-	down(&buffer->sem);
+	mutex_lock(&buffer->mutex);
 	if (buffer->needs_read_fill) {
 		if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
 			goto out;
@@ -119,7 +119,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
 	retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
 					 buffer->count);
 out:
-	up(&buffer->sem);
+	mutex_unlock(&buffer->mutex);
 	return retval;
 }
 
@@ -200,13 +200,13 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof
 	struct configfs_buffer * buffer = file->private_data;
 	ssize_t len;
 
-	down(&buffer->sem);
+	mutex_lock(&buffer->mutex);
 	len = fill_write_buffer(buffer, buf, count);
 	if (len > 0)
 		len = flush_write_buffer(file->f_path.dentry, buffer, count);
 	if (len > 0)
 		*ppos += len;
-	up(&buffer->sem);
+	mutex_unlock(&buffer->mutex);
 	return len;
 }
 
@@ -260,7 +260,7 @@ static int check_perm(struct inode * inode, struct file * file)
 		error = -ENOMEM;
 		goto Enomem;
 	}
-	init_MUTEX(&buffer->sem);
+	mutex_init(&buffer->mutex);
 	buffer->needs_read_fill = 1;
 	buffer->ops = ops;
 	file->private_data = buffer;
@@ -299,6 +299,7 @@ static int configfs_release(struct inode * inode, struct file * filp)
 	if (buffer) {
 		if (buffer->page)
 			free_page((unsigned long)buffer->page);
+		mutex_destroy(&buffer->mutex);
 		kfree(buffer);
 	}
 	return 0;
-- 
cgit v1.2.3


From 299894cc9001b09e3e9685f2709b49e7e1092ccc Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 6 Oct 2006 17:33:23 -0700
Subject: configfs: accessing item hierarchy during rmdir(2)

Add a notification callback, ops->disconnect_notify(). It has the same
prototype as ->drop_item(), but it will be called just before the item
linkage is broken. This way, configfs users who want to do work while
the object is still in the heirarchy have a chance.

Client drivers will still need to config_item_put() in their
->drop_item(), if they implement it.  They need do nothing in
->disconnect_notify().  They don't have to provide it if they don't
care.  But someone who wants to be notified before ci_parent is set to
NULL can now be notified.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index d3b1dbb9b5b8..125954723eb7 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -713,6 +713,28 @@ static void configfs_detach_group(struct config_item *item)
 	configfs_detach_item(item);
 }
 
+/*
+ * After the item has been detached from the filesystem view, we are
+ * ready to tear it out of the hierarchy.  Notify the client before
+ * we do that so they can perform any cleanup that requires
+ * navigating the hierarchy.  A client does not need to provide this
+ * callback.  The subsystem semaphore MUST be held by the caller, and
+ * references must be valid for both items.  It also assumes the
+ * caller has validated ci_type.
+ */
+static void client_disconnect_notify(struct config_item *parent_item,
+				     struct config_item *item)
+{
+	struct config_item_type *type;
+
+	type = parent_item->ci_type;
+	BUG_ON(!type);
+
+	if (type->ct_group_ops && type->ct_group_ops->disconnect_notify)
+		type->ct_group_ops->disconnect_notify(to_config_group(parent_item),
+						      item);
+}
+
 /*
  * Drop the initial reference from make_item()/make_group()
  * This function assumes that reference is held on item
@@ -733,7 +755,7 @@ static void client_drop_item(struct config_item *parent_item,
 	 */
 	if (type->ct_group_ops && type->ct_group_ops->drop_item)
 		type->ct_group_ops->drop_item(to_config_group(parent_item),
-						item);
+					      item);
 	else
 		config_item_put(item);
 }
@@ -842,11 +864,14 @@ out_unlink:
 	if (ret) {
 		/* Tear down everything we built up */
 		mutex_lock(&subsys->su_mutex);
+
+		client_disconnect_notify(parent_item, item);
 		if (group)
 			unlink_group(group);
 		else
 			unlink_obj(item);
 		client_drop_item(parent_item, item);
+
 		mutex_unlock(&subsys->su_mutex);
 
 		if (module_got)
@@ -911,11 +936,13 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 		configfs_detach_group(item);
 
 		mutex_lock(&subsys->su_mutex);
+		client_disconnect_notify(parent_item, item);
 		unlink_group(to_config_group(item));
 	} else {
 		configfs_detach_item(item);
 
 		mutex_lock(&subsys->su_mutex);
+		client_disconnect_notify(parent_item, item);
 		unlink_obj(item);
 	}
 
-- 
cgit v1.2.3


From 631d1febab8e546e3bb800bdfe2c212b8adf87de Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 18 Jun 2007 18:06:09 -0700
Subject: configfs: config item dependancies.

Sometimes other drivers depend on particular configfs items.  For
example, ocfs2 mounts depend on a heartbeat region item.  If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly.  Not happy.

This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item().  A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on.  configfs will then return -EBUSY from rmdir(2) for that
item.  When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.

These API cannot be called underneath any configfs callbacks, as
they will conflict.  They can block and allocate.  A client driver
probably shouldn't calling them of its own gumption.  Rather it should
be providing an API that external subsystems call.

How does this work?  Imagine the ocfs2 mount process.  When it mounts,
it asks for a heart region item.  This is done via a call into the
heartbeat code.  Inside the heartbeat code, the region item is looked
up.  Here, the heartbeat code calls configfs_depend_item().  If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.

[ Fixed some bad whitespace in configfs.txt. --Mark ]

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/configfs_internal.h |   7 +-
 fs/configfs/dir.c               | 244 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 248 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 7b48c034b312..3b0185fdf9a4 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -29,10 +29,11 @@
 
 struct configfs_dirent {
 	atomic_t		s_count;
+	int			s_dependent_count;
 	struct list_head	s_sibling;
 	struct list_head	s_children;
 	struct list_head	s_links;
-	void 			* s_element;
+	void			* s_element;
 	int			s_type;
 	umode_t			s_mode;
 	struct dentry		* s_dentry;
@@ -41,8 +42,8 @@ struct configfs_dirent {
 
 #define CONFIGFS_ROOT		0x0001
 #define CONFIGFS_DIR		0x0002
-#define CONFIGFS_ITEM_ATTR 	0x0004
-#define CONFIGFS_ITEM_LINK 	0x0020
+#define CONFIGFS_ITEM_ATTR	0x0004
+#define CONFIGFS_ITEM_LINK	0x0020
 #define CONFIGFS_USET_DIR	0x0040
 #define CONFIGFS_USET_DEFAULT	0x0080
 #define CONFIGFS_USET_DROPPING	0x0100
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 125954723eb7..2f436d4f1d6d 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -355,6 +355,10 @@ static int configfs_detach_prep(struct dentry *dentry)
 			/* Mark that we've taken i_mutex */
 			sd->s_type |= CONFIGFS_USET_DROPPING;
 
+			/*
+			 * Yup, recursive.  If there's a problem, blame
+			 * deep nesting of default_groups
+			 */
 			ret = configfs_detach_prep(sd->s_dentry);
 			if (!ret)
 				continue;
@@ -760,6 +764,239 @@ static void client_drop_item(struct config_item *parent_item,
 		config_item_put(item);
 }
 
+#ifdef DEBUG
+static void configfs_dump_one(struct configfs_dirent *sd, int level)
+{
+	printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd));
+
+#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type);
+	type_print(CONFIGFS_ROOT);
+	type_print(CONFIGFS_DIR);
+	type_print(CONFIGFS_ITEM_ATTR);
+	type_print(CONFIGFS_ITEM_LINK);
+	type_print(CONFIGFS_USET_DIR);
+	type_print(CONFIGFS_USET_DEFAULT);
+	type_print(CONFIGFS_USET_DROPPING);
+#undef type_print
+}
+
+static int configfs_dump(struct configfs_dirent *sd, int level)
+{
+	struct configfs_dirent *child_sd;
+	int ret = 0;
+
+	configfs_dump_one(sd, level);
+
+	if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT)))
+		return 0;
+
+	list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
+		ret = configfs_dump(child_sd, level + 2);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+#endif
+
+
+/*
+ * configfs_depend_item() and configfs_undepend_item()
+ *
+ * WARNING: Do not call these from a configfs callback!
+ *
+ * This describes these functions and their helpers.
+ *
+ * Allow another kernel system to depend on a config_item.  If this
+ * happens, the item cannot go away until the dependant can live without
+ * it.  The idea is to give client modules as simple an interface as
+ * possible.  When a system asks them to depend on an item, they just
+ * call configfs_depend_item().  If the item is live and the client
+ * driver is in good shape, we'll happily do the work for them.
+ *
+ * Why is the locking complex?  Because configfs uses the VFS to handle
+ * all locking, but this function is called outside the normal
+ * VFS->configfs path.  So it must take VFS locks to prevent the
+ * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc).  This is
+ * why you can't call these functions underneath configfs callbacks.
+ *
+ * Note, btw, that this can be called at *any* time, even when a configfs
+ * subsystem isn't registered, or when configfs is loading or unloading.
+ * Just like configfs_register_subsystem().  So we take the same
+ * precautions.  We pin the filesystem.  We lock each i_mutex _in_order_
+ * on our way down the tree.  If we can find the target item in the
+ * configfs tree, it must be part of the subsystem tree as well, so we
+ * do not need the subsystem semaphore.  Holding the i_mutex chain locks
+ * out mkdir() and rmdir(), who might be racing us.
+ */
+
+/*
+ * configfs_depend_prep()
+ *
+ * Only subdirectories count here.  Files (CONFIGFS_NOT_PINNED) are
+ * attributes.  This is similar but not the same to configfs_detach_prep().
+ * Note that configfs_detach_prep() expects the parent to be locked when it
+ * is called, but we lock the parent *inside* configfs_depend_prep().  We
+ * do that so we can unlock it if we find nothing.
+ *
+ * Here we do a depth-first search of the dentry hierarchy looking for
+ * our object.  We take i_mutex on each step of the way down.  IT IS
+ * ESSENTIAL THAT i_mutex LOCKING IS ORDERED.  If we come back up a branch,
+ * we'll drop the i_mutex.
+ *
+ * If the target is not found, -ENOENT is bubbled up and we have released
+ * all locks.  If the target was found, the locks will be cleared by
+ * configfs_depend_rollback().
+ *
+ * This adds a requirement that all config_items be unique!
+ *
+ * This is recursive because the locking traversal is tricky.  There isn't
+ * much on the stack, though, so folks that need this function - be careful
+ * about your stack!  Patches will be accepted to make it iterative.
+ */
+static int configfs_depend_prep(struct dentry *origin,
+				struct config_item *target)
+{
+	struct configfs_dirent *child_sd, *sd = origin->d_fsdata;
+	int ret = 0;
+
+	BUG_ON(!origin || !sd);
+
+	/* Lock this guy on the way down */
+	mutex_lock(&sd->s_dentry->d_inode->i_mutex);
+	if (sd->s_element == target)  /* Boo-yah */
+		goto out;
+
+	list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
+		if (child_sd->s_type & CONFIGFS_DIR) {
+			ret = configfs_depend_prep(child_sd->s_dentry,
+						   target);
+			if (!ret)
+				goto out;  /* Child path boo-yah */
+		}
+	}
+
+	/* We looped all our children and didn't find target */
+	mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
+	ret = -ENOENT;
+
+out:
+	return ret;
+}
+
+/*
+ * This is ONLY called if configfs_depend_prep() did its job.  So we can
+ * trust the entire path from item back up to origin.
+ *
+ * We walk backwards from item, unlocking each i_mutex.  We finish by
+ * unlocking origin.
+ */
+static void configfs_depend_rollback(struct dentry *origin,
+				     struct config_item *item)
+{
+	struct dentry *dentry = item->ci_dentry;
+
+	while (dentry != origin) {
+		mutex_unlock(&dentry->d_inode->i_mutex);
+		dentry = dentry->d_parent;
+	}
+
+	mutex_unlock(&origin->d_inode->i_mutex);
+}
+
+int configfs_depend_item(struct configfs_subsystem *subsys,
+			 struct config_item *target)
+{
+	int ret;
+	struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
+	struct config_item *s_item = &subsys->su_group.cg_item;
+
+	/*
+	 * Pin the configfs filesystem.  This means we can safely access
+	 * the root of the configfs filesystem.
+	 */
+	ret = configfs_pin_fs();
+	if (ret)
+		return ret;
+
+	/*
+	 * Next, lock the root directory.  We're going to check that the
+	 * subsystem is really registered, and so we need to lock out
+	 * configfs_[un]register_subsystem().
+	 */
+	mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
+
+	root_sd = configfs_sb->s_root->d_fsdata;
+
+	list_for_each_entry(p, &root_sd->s_children, s_sibling) {
+		if (p->s_type & CONFIGFS_DIR) {
+			if (p->s_element == s_item) {
+				subsys_sd = p;
+				break;
+			}
+		}
+	}
+
+	if (!subsys_sd) {
+		ret = -ENOENT;
+		goto out_unlock_fs;
+	}
+
+	/* Ok, now we can trust subsys/s_item */
+
+	/* Scan the tree, locking i_mutex recursively, return 0 if found */
+	ret = configfs_depend_prep(subsys_sd->s_dentry, target);
+	if (ret)
+		goto out_unlock_fs;
+
+	/* We hold all i_mutexes from the subsystem down to the target */
+	p = target->ci_dentry->d_fsdata;
+	p->s_dependent_count += 1;
+
+	configfs_depend_rollback(subsys_sd->s_dentry, target);
+
+out_unlock_fs:
+	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
+
+	/*
+	 * If we succeeded, the fs is pinned via other methods.  If not,
+	 * we're done with it anyway.  So release_fs() is always right.
+	 */
+	configfs_release_fs();
+
+	return ret;
+}
+EXPORT_SYMBOL(configfs_depend_item);
+
+/*
+ * Release the dependent linkage.  This is much simpler than
+ * configfs_depend_item() because we know that that the client driver is
+ * pinned, thus the subsystem is pinned, and therefore configfs is pinned.
+ */
+void configfs_undepend_item(struct configfs_subsystem *subsys,
+			    struct config_item *target)
+{
+	struct configfs_dirent *sd;
+
+	/*
+	 * Since we can trust everything is pinned, we just need i_mutex
+	 * on the item.
+	 */
+	mutex_lock(&target->ci_dentry->d_inode->i_mutex);
+
+	sd = target->ci_dentry->d_fsdata;
+	BUG_ON(sd->s_dependent_count < 1);
+
+	sd->s_dependent_count -= 1;
+
+	/*
+	 * After this unlock, we cannot trust the item to stay alive!
+	 * DO NOT REFERENCE item after this unlock.
+	 */
+	mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
+}
+EXPORT_SYMBOL(configfs_undepend_item);
 
 static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
@@ -906,6 +1143,13 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (sd->s_type & CONFIGFS_USET_DEFAULT)
 		return -EPERM;
 
+	/*
+	 * Here's where we check for dependents.  We're protected by
+	 * i_mutex.
+	 */
+	if (sd->s_dependent_count)
+		return -EBUSY;
+
 	/* Get a working ref until we have the child */
 	parent_item = configfs_get_config_item(dentry->d_parent);
 	subsys = to_config_group(parent_item)->cg_subsys;
-- 
cgit v1.2.3


From 14829422be6d6b6721f61b1e749acf5a9cb664d8 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 14 Jun 2007 21:40:49 -0700
Subject: ocfs2: Depend on configfs heartbeat items.

ocfs2 mounts require a heartbeat region.  Use the new configfs_depend_item()
facility to actually depend on them so they can't go away from under us.

First, teach cluster/nodemanager.c to depend an item on the o2cb subsystem.
Then teach o2hb_register_callbacks to take a UUID and depend on the
appropriate region.  Finally, teach all users of o2hb to pass a UUID or
NULL if they don't require a pin.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c   | 64 ++++++++++++++++++++++++++++++++++++++++--
 fs/ocfs2/cluster/heartbeat.h   |  6 ++--
 fs/ocfs2/cluster/nodemanager.c | 10 +++++++
 fs/ocfs2/cluster/nodemanager.h |  3 ++
 fs/ocfs2/cluster/tcp.c         |  8 +++---
 fs/ocfs2/dlm/dlmdomain.c       |  8 +++---
 fs/ocfs2/heartbeat.c           | 10 +++----
 7 files changed, 92 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 979113479c66..e331f4cb2c81 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1665,7 +1665,56 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
 }
 EXPORT_SYMBOL_GPL(o2hb_setup_callback);
 
-int o2hb_register_callback(struct o2hb_callback_func *hc)
+static struct o2hb_region *o2hb_find_region(const char *region_uuid)
+{
+	struct o2hb_region *p, *reg = NULL;
+
+	assert_spin_locked(&o2hb_live_lock);
+
+	list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
+		if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
+			reg = p;
+			break;
+		}
+	}
+
+	return reg;
+}
+
+static int o2hb_region_get(const char *region_uuid)
+{
+	int ret = 0;
+	struct o2hb_region *reg;
+
+	spin_lock(&o2hb_live_lock);
+
+	reg = o2hb_find_region(region_uuid);
+	if (!reg)
+		ret = -ENOENT;
+	spin_unlock(&o2hb_live_lock);
+
+	if (!ret)
+		ret = o2nm_depend_item(&reg->hr_item);
+
+	return ret;
+}
+
+static void o2hb_region_put(const char *region_uuid)
+{
+	struct o2hb_region *reg;
+
+	spin_lock(&o2hb_live_lock);
+
+	reg = o2hb_find_region(region_uuid);
+
+	spin_unlock(&o2hb_live_lock);
+
+	if (reg)
+		o2nm_undepend_item(&reg->hr_item);
+}
+
+int o2hb_register_callback(const char *region_uuid,
+			   struct o2hb_callback_func *hc)
 {
 	struct o2hb_callback_func *tmp;
 	struct list_head *iter;
@@ -1681,6 +1730,12 @@ int o2hb_register_callback(struct o2hb_callback_func *hc)
 		goto out;
 	}
 
+	if (region_uuid) {
+		ret = o2hb_region_get(region_uuid);
+		if (ret)
+			goto out;
+	}
+
 	down_write(&o2hb_callback_sem);
 
 	list_for_each(iter, &hbcall->list) {
@@ -1702,16 +1757,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(o2hb_register_callback);
 
-void o2hb_unregister_callback(struct o2hb_callback_func *hc)
+void o2hb_unregister_callback(const char *region_uuid,
+			      struct o2hb_callback_func *hc)
 {
 	BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
 
 	mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
 	     __builtin_return_address(0), hc);
 
+	/* XXX Can this happen _with_ a region reference? */
 	if (list_empty(&hc->hc_item))
 		return;
 
+	if (region_uuid)
+		o2hb_region_put(region_uuid);
+
 	down_write(&o2hb_callback_sem);
 
 	list_del_init(&hc->hc_item);
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index cc6d40b39771..35397dd5ecdb 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -69,8 +69,10 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
 			 o2hb_cb_func *func,
 			 void *data,
 			 int priority);
-int o2hb_register_callback(struct o2hb_callback_func *hc);
-void o2hb_unregister_callback(struct o2hb_callback_func *hc);
+int o2hb_register_callback(const char *region_uuid,
+			   struct o2hb_callback_func *hc);
+void o2hb_unregister_callback(const char *region_uuid,
+			      struct o2hb_callback_func *hc);
 void o2hb_fill_node_map(unsigned long *map,
 			unsigned bytes);
 void o2hb_init(void);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 48b77d113cb2..eab46d8a7c8c 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -900,6 +900,16 @@ static struct o2nm_cluster_group o2nm_cluster_group = {
 	},
 };
 
+int o2nm_depend_item(struct config_item *item)
+{
+	return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item);
+}
+
+void o2nm_undepend_item(struct config_item *item)
+{
+	configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item);
+}
+
 static void __exit exit_o2nm(void)
 {
 	if (ocfs2_table_header)
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index 070522138ae2..55ae1a00d735 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -77,4 +77,7 @@ struct o2nm_node *o2nm_get_node_by_ip(__be32 addr);
 void o2nm_node_get(struct o2nm_node *node);
 void o2nm_node_put(struct o2nm_node *node);
 
+int o2nm_depend_item(struct config_item *item);
+void o2nm_undepend_item(struct config_item *item);
+
 #endif /* O2CLUSTER_NODEMANAGER_H */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 0b229a9c7952..d58c7dddb853 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1638,8 +1638,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
 
 void o2net_unregister_hb_callbacks(void)
 {
-	o2hb_unregister_callback(&o2net_hb_up);
-	o2hb_unregister_callback(&o2net_hb_down);
+	o2hb_unregister_callback(NULL, &o2net_hb_up);
+	o2hb_unregister_callback(NULL, &o2net_hb_down);
 }
 
 int o2net_register_hb_callbacks(void)
@@ -1651,9 +1651,9 @@ int o2net_register_hb_callbacks(void)
 	o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB,
 			    o2net_hb_node_up_cb, NULL, O2NET_HB_PRI);
 
-	ret = o2hb_register_callback(&o2net_hb_up);
+	ret = o2hb_register_callback(NULL, &o2net_hb_up);
 	if (ret == 0)
-		ret = o2hb_register_callback(&o2net_hb_down);
+		ret = o2hb_register_callback(NULL, &o2net_hb_down);
 
 	if (ret)
 		o2net_unregister_hb_callbacks();
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index d836b98dd99a..6954565b8ccb 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1128,8 +1128,8 @@ bail:
 
 static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
 {
-	o2hb_unregister_callback(&dlm->dlm_hb_up);
-	o2hb_unregister_callback(&dlm->dlm_hb_down);
+	o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
+	o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
 	o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
 }
 
@@ -1141,13 +1141,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
 
 	o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
 			    dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
-	status = o2hb_register_callback(&dlm->dlm_hb_down);
+	status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
 	if (status)
 		goto bail;
 
 	o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
 			    dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
-	status = o2hb_register_callback(&dlm->dlm_hb_up);
+	status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
 	if (status)
 		goto bail;
 
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index b25ef63781ba..352eb4a13f98 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -157,16 +157,16 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
 	if (ocfs2_mount_local(osb))
 		return 0;
 
-	status = o2hb_register_callback(&osb->osb_hb_down);
+	status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
-	status = o2hb_register_callback(&osb->osb_hb_up);
+	status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
 	if (status < 0) {
 		mlog_errno(status);
-		o2hb_unregister_callback(&osb->osb_hb_down);
+		o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
 	}
 
 bail:
@@ -178,8 +178,8 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
 	if (ocfs2_mount_local(osb))
 		return;
 
-	o2hb_unregister_callback(&osb->osb_hb_down);
-	o2hb_unregister_callback(&osb->osb_hb_up);
+	o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
+	o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
 }
 
 void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
-- 
cgit v1.2.3


From 16c6a4f24de2933b26477ad5dfb71f518220d641 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 19 Jun 2007 11:34:03 -0700
Subject: ocfs2: live heartbeat depends on the local node configuration

Removing the local node configuration out from underneath a running
heartbeat is "bad".  Provide an API in the ocfs2 nodemanager to request
a configfs dependancy on the local node, then use it in heartbeat.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c   | 17 ++++++++++++++---
 fs/ocfs2/cluster/nodemanager.c | 30 ++++++++++++++++++++++++++++++
 fs/ocfs2/cluster/nodemanager.h |  2 ++
 3 files changed, 46 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index e331f4cb2c81..2877d468f115 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1693,9 +1693,18 @@ static int o2hb_region_get(const char *region_uuid)
 		ret = -ENOENT;
 	spin_unlock(&o2hb_live_lock);
 
-	if (!ret)
-		ret = o2nm_depend_item(&reg->hr_item);
+	if (ret)
+		goto out;
+
+	ret = o2nm_depend_this_node();
+	if (ret)
+		goto out;
 
+	ret = o2nm_depend_item(&reg->hr_item);
+	if (ret)
+		o2nm_undepend_this_node();
+
+out:
 	return ret;
 }
 
@@ -1709,8 +1718,10 @@ static void o2hb_region_put(const char *region_uuid)
 
 	spin_unlock(&o2hb_live_lock);
 
-	if (reg)
+	if (reg) {
 		o2nm_undepend_item(&reg->hr_item);
+		o2nm_undepend_this_node();
+	}
 }
 
 int o2hb_register_callback(const char *region_uuid,
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index eab46d8a7c8c..af2070da308b 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -910,6 +910,36 @@ void o2nm_undepend_item(struct config_item *item)
 	configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item);
 }
 
+int o2nm_depend_this_node(void)
+{
+	int ret = 0;
+	struct o2nm_node *local_node;
+
+	local_node = o2nm_get_node_by_num(o2nm_this_node());
+	if (!local_node) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = o2nm_depend_item(&local_node->nd_item);
+	o2nm_node_put(local_node);
+
+out:
+	return ret;
+}
+
+void o2nm_undepend_this_node(void)
+{
+	struct o2nm_node *local_node;
+
+	local_node = o2nm_get_node_by_num(o2nm_this_node());
+	BUG_ON(!local_node);
+
+	o2nm_undepend_item(&local_node->nd_item);
+	o2nm_node_put(local_node);
+}
+
+
 static void __exit exit_o2nm(void)
 {
 	if (ocfs2_table_header)
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index 55ae1a00d735..7c860361b8dd 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -79,5 +79,7 @@ void o2nm_node_put(struct o2nm_node *node);
 
 int o2nm_depend_item(struct config_item *item);
 void o2nm_undepend_item(struct config_item *item);
+int o2nm_depend_this_node(void);
+void o2nm_undepend_this_node(void);
 
 #endif /* O2CLUSTER_NODEMANAGER_H */
-- 
cgit v1.2.3


From e6df3a663a5d1ee68aeae7f007197f272700d9cc Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Tue, 6 Feb 2007 15:45:39 -0800
Subject: ocfs2: Wake up a starting region if it gets killed in the background.

Tell o2cb_region_dev_write() to wake up if rmdir(2) happens on the
heartbeat region while it is starting up.  Then o2hb_region_dev_write()
can check to see if it is alive and act accordingly.  This prevents a hang
(not being woken) and a crash (if it's woken by a signal).

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 2877d468f115..2bd7f788cf34 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1335,6 +1335,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	ret = wait_event_interruptible(o2hb_steady_queue,
 				atomic_read(&reg->hr_steady_iterations) == 0);
 	if (ret) {
+		/* We got interrupted (hello ptrace!).  Clean up */
 		spin_lock(&o2hb_live_lock);
 		hb_task = reg->hr_task;
 		reg->hr_task = NULL;
@@ -1345,7 +1346,16 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 		goto out;
 	}
 
-	ret = count;
+	/* Ok, we were woken.  Make sure it wasn't by drop_item() */
+	spin_lock(&o2hb_live_lock);
+	hb_task = reg->hr_task;
+	spin_unlock(&o2hb_live_lock);
+
+	if (hb_task)
+		ret = count;
+	else
+		ret = -EIO;
+
 out:
 	if (filp)
 		fput(filp);
@@ -1523,6 +1533,15 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
 	if (hb_task)
 		kthread_stop(hb_task);
 
+	/*
+	 * If we're racing a dev_write(), we need to wake them.  They will
+	 * check reg->hr_task
+	 */
+	if (atomic_read(&reg->hr_steady_iterations) != 0) {
+		atomic_set(&reg->hr_steady_iterations, 0);
+		wake_up(&o2hb_steady_queue);
+	}
+
 	config_item_put(item);
 }
 
-- 
cgit v1.2.3


From 800deef3f6f87fee3a2e89cf7237a1f20c1a78d7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 May 2007 16:03:13 +0200
Subject: [PATCH] ocfs2: use list_for_each_entry where benefical

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/tcp.c     | 13 +++-----
 fs/ocfs2/dlm/dlmmaster.c   | 40 +++++++-----------------
 fs/ocfs2/dlm/dlmrecovery.c | 77 +++++++++++++++-------------------------------
 fs/ocfs2/dlmglue.c         |  6 ++--
 fs/ocfs2/extent_map.c      | 10 ++----
 fs/ocfs2/journal.c         |  6 ++--
 6 files changed, 47 insertions(+), 105 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index d58c7dddb853..f0bdfd944c44 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -261,14 +261,12 @@ out:
 
 static void o2net_complete_nodes_nsw(struct o2net_node *nn)
 {
-	struct list_head *iter, *tmp;
+	struct o2net_status_wait *nsw, *tmp;
 	unsigned int num_kills = 0;
-	struct o2net_status_wait *nsw;
 
 	assert_spin_locked(&nn->nn_lock);
 
-	list_for_each_safe(iter, tmp, &nn->nn_status_list) {
-		nsw = list_entry(iter, struct o2net_status_wait, ns_node_item);
+	list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) {
 		o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0);
 		num_kills++;
 	}
@@ -764,13 +762,10 @@ EXPORT_SYMBOL_GPL(o2net_register_handler);
 
 void o2net_unregister_handler_list(struct list_head *list)
 {
-	struct list_head *pos, *n;
-	struct o2net_msg_handler *nmh;
+	struct o2net_msg_handler *nmh, *n;
 
 	write_lock(&o2net_handler_lock);
-	list_for_each_safe(pos, n, list) {
-		nmh = list_entry(pos, struct o2net_msg_handler,
-				 nh_unregister_item);
+	list_for_each_entry_safe(nmh, n, list, nh_unregister_item) {
 		mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n",
 		     nmh->nh_func, nmh->nh_msg_type, nmh->nh_key);
 		rb_erase(&nmh->nh_node, &o2net_handler_tree);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 6edffca99d98..65b2b9b92688 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -192,25 +192,20 @@ static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
 static void dlm_dump_mles(struct dlm_ctxt *dlm)
 {
 	struct dlm_master_list_entry *mle;
-	struct list_head *iter;
 	
 	mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
 	spin_lock(&dlm->master_lock);
-	list_for_each(iter, &dlm->master_list) {
-		mle = list_entry(iter, struct dlm_master_list_entry, list);
+	list_for_each_entry(mle, &dlm->master_list, list)
 		dlm_print_one_mle(mle);
-	}
 	spin_unlock(&dlm->master_lock);
 }
 
 int dlm_dump_all_mles(const char __user *data, unsigned int len)
 {
-	struct list_head *iter;
 	struct dlm_ctxt *dlm;
 
 	spin_lock(&dlm_domain_lock);
-	list_for_each(iter, &dlm_domains) {
-		dlm = list_entry (iter, struct dlm_ctxt, list);
+	list_for_each_entry(dlm, &dlm_domains, list) {
 		mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name);
 		dlm_dump_mles(dlm);
 	}
@@ -454,12 +449,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
 			char *name, unsigned int namelen)
 {
 	struct dlm_master_list_entry *tmpmle;
-	struct list_head *iter;
 
 	assert_spin_locked(&dlm->master_lock);
 
-	list_for_each(iter, &dlm->master_list) {
-		tmpmle = list_entry(iter, struct dlm_master_list_entry, list);
+	list_for_each_entry(tmpmle, &dlm->master_list, list) {
 		if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
 			continue;
 		dlm_get_mle(tmpmle);
@@ -472,13 +465,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
 void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
 {
 	struct dlm_master_list_entry *mle;
-	struct list_head *iter;
 
 	assert_spin_locked(&dlm->spinlock);
 	
-	list_for_each(iter, &dlm->mle_hb_events) {
-		mle = list_entry(iter, struct dlm_master_list_entry, 
-				 hb_events);
+	list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
 		if (node_up)
 			dlm_mle_node_up(dlm, mle, NULL, idx);
 		else
@@ -2434,7 +2424,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
 	int ret;
 	int i;
 	int count = 0;
-	struct list_head *queue, *iter;
+	struct list_head *queue;
 	struct dlm_lock *lock;
 
 	assert_spin_locked(&res->spinlock);
@@ -2453,8 +2443,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
 	ret = 0;
 	queue = &res->granted;
 	for (i = 0; i < 3; i++) {
-		list_for_each(iter, queue) {
-			lock = list_entry(iter, struct dlm_lock, list);
+		list_for_each_entry(lock, queue, list) {
 			++count;
 			if (lock->ml.node == dlm->node_num) {
 				mlog(0, "found a lock owned by this node still "
@@ -2923,18 +2912,16 @@ again:
 static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
 				      struct dlm_lock_resource *res)
 {
-	struct list_head *iter, *iter2;
 	struct list_head *queue = &res->granted;
 	int i, bit;
-	struct dlm_lock *lock;
+	struct dlm_lock *lock, *next;
 
 	assert_spin_locked(&res->spinlock);
 
 	BUG_ON(res->owner == dlm->node_num);
 
 	for (i=0; i<3; i++) {
-		list_for_each_safe(iter, iter2, queue) {
-			lock = list_entry (iter, struct dlm_lock, list);
+		list_for_each_entry_safe(lock, next, queue, list) {
 			if (lock->ml.node != dlm->node_num) {
 				mlog(0, "putting lock for node %u\n",
 				     lock->ml.node);
@@ -2976,7 +2963,6 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
 {
 	int i;
 	struct list_head *queue = &res->granted;
-	struct list_head *iter;
 	struct dlm_lock *lock;
 	int nodenum;
 
@@ -2984,10 +2970,9 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
 
 	spin_lock(&res->spinlock);
 	for (i=0; i<3; i++) {
-		list_for_each(iter, queue) {
+		list_for_each_entry(lock, queue, list) {
 			/* up to the caller to make sure this node
 			 * is alive */
-			lock = list_entry (iter, struct dlm_lock, list);
 			if (lock->ml.node != dlm->node_num) {
 				spin_unlock(&res->spinlock);
 				return lock->ml.node;
@@ -3234,8 +3219,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
 
 void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
 {
-	struct list_head *iter, *iter2;
-	struct dlm_master_list_entry *mle;
+	struct dlm_master_list_entry *mle, *next;
 	struct dlm_lock_resource *res;
 	unsigned int hash;
 
@@ -3245,9 +3229,7 @@ top:
 
 	/* clean the master list */
 	spin_lock(&dlm->master_lock);
-	list_for_each_safe(iter, iter2, &dlm->master_list) {
-		mle = list_entry(iter, struct dlm_master_list_entry, list);
-
+	list_for_each_entry_safe(mle, next, &dlm->master_list, list) {
 		BUG_ON(mle->type != DLM_MLE_BLOCK &&
 		       mle->type != DLM_MLE_MASTER &&
 		       mle->type != DLM_MLE_MIGRATION);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 671c4ed58ee2..74d276ec276f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -158,8 +158,7 @@ void dlm_dispatch_work(struct work_struct *work)
 	struct dlm_ctxt *dlm =
 		container_of(work, struct dlm_ctxt, dispatched_work);
 	LIST_HEAD(tmp_list);
-	struct list_head *iter, *iter2;
-	struct dlm_work_item *item;
+	struct dlm_work_item *item, *next;
 	dlm_workfunc_t *workfunc;
 	int tot=0;
 
@@ -167,13 +166,12 @@ void dlm_dispatch_work(struct work_struct *work)
 	list_splice_init(&dlm->work_list, &tmp_list);
 	spin_unlock(&dlm->work_lock);
 
-	list_for_each_safe(iter, iter2, &tmp_list) {
+	list_for_each_entry(item, &tmp_list, list) {
 		tot++;
 	}
 	mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
 
-	list_for_each_safe(iter, iter2, &tmp_list) {
-		item = list_entry(iter, struct dlm_work_item, list);
+	list_for_each_entry_safe(item, next, &tmp_list, list) {
 		workfunc = item->func;
 		list_del_init(&item->list);
 
@@ -549,7 +547,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 {
 	int status = 0;
 	struct dlm_reco_node_data *ndata;
-	struct list_head *iter;
 	int all_nodes_done;
 	int destroy = 0;
 	int pass = 0;
@@ -567,8 +564,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 
 	/* safe to access the node data list without a lock, since this
 	 * process is the only one to change the list */
-	list_for_each(iter, &dlm->reco.node_data) {
-		ndata = list_entry (iter, struct dlm_reco_node_data, list);
+	list_for_each_entry(ndata, &dlm->reco.node_data, list) {
 		BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
 		ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
 
@@ -655,9 +651,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
 		 * done, or if anyone died */
 		all_nodes_done = 1;
 		spin_lock(&dlm_reco_state_lock);
-		list_for_each(iter, &dlm->reco.node_data) {
-			ndata = list_entry (iter, struct dlm_reco_node_data, list);
-
+		list_for_each_entry(ndata, &dlm->reco.node_data, list) {
 			mlog(0, "checking recovery state of node %u\n",
 			     ndata->node_num);
 			switch (ndata->state) {
@@ -774,16 +768,14 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
 
 static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
 {
-	struct list_head *iter, *iter2;
-	struct dlm_reco_node_data *ndata;
+	struct dlm_reco_node_data *ndata, *next;
 	LIST_HEAD(tmplist);
 
 	spin_lock(&dlm_reco_state_lock);
 	list_splice_init(&dlm->reco.node_data, &tmplist);
 	spin_unlock(&dlm_reco_state_lock);
 
-	list_for_each_safe(iter, iter2, &tmplist) {
-		ndata = list_entry (iter, struct dlm_reco_node_data, list);
+	list_for_each_entry_safe(ndata, next, &tmplist, list) {
 		list_del_init(&ndata->list);
 		kfree(ndata);
 	}
@@ -876,7 +868,6 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
 	struct dlm_lock_resource *res;
 	struct dlm_ctxt *dlm;
 	LIST_HEAD(resources);
-	struct list_head *iter;
 	int ret;
 	u8 dead_node, reco_master;
 	int skip_all_done = 0;
@@ -920,8 +911,7 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
 
 	/* any errors returned will be due to the new_master dying,
 	 * the dlm_reco_thread should detect this */
-	list_for_each(iter, &resources) {
-		res = list_entry (iter, struct dlm_lock_resource, recovering);
+	list_for_each_entry(res, &resources, recovering) {
 		ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
 				   	DLM_MRES_RECOVERY);
 		if (ret < 0) {
@@ -983,7 +973,6 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
 {
 	struct dlm_ctxt *dlm = data;
 	struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
-	struct list_head *iter;
 	struct dlm_reco_node_data *ndata = NULL;
 	int ret = -EINVAL;
 
@@ -1000,8 +989,7 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
 			dlm->reco.dead_node, done->node_idx, dlm->node_num);
 
 	spin_lock(&dlm_reco_state_lock);
-	list_for_each(iter, &dlm->reco.node_data) {
-		ndata = list_entry (iter, struct dlm_reco_node_data, list);
+	list_for_each_entry(ndata, &dlm->reco.node_data, list) {
 		if (ndata->node_num != done->node_idx)
 			continue;
 
@@ -1049,13 +1037,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
 					struct list_head *list,
 				       	u8 dead_node)
 {
-	struct dlm_lock_resource *res;
-	struct list_head *iter, *iter2;
+	struct dlm_lock_resource *res, *next;
 	struct dlm_lock *lock;
 
 	spin_lock(&dlm->spinlock);
-	list_for_each_safe(iter, iter2, &dlm->reco.resources) {
-		res = list_entry (iter, struct dlm_lock_resource, recovering);
+	list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
 		/* always prune any $RECOVERY entries for dead nodes,
 		 * otherwise hangs can occur during later recovery */
 		if (dlm_is_recovery_lock(res->lockname.name,
@@ -1252,7 +1238,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 			 struct dlm_migratable_lockres *mres,
 			 u8 send_to, u8 flags)
 {
-	struct list_head *queue, *iter;
+	struct list_head *queue;
 	int total_locks, i;
 	u64 mig_cookie = 0;
 	struct dlm_lock *lock;
@@ -1278,9 +1264,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	total_locks = 0;
 	for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) {
 		queue = dlm_list_idx_to_ptr(res, i);
-		list_for_each(iter, queue) {
-			lock = list_entry (iter, struct dlm_lock, list);
-
+		list_for_each_entry(lock, queue, list) {
 			/* add another lock. */
 			total_locks++;
 			if (!dlm_add_lock_to_array(lock, mres, i))
@@ -1717,7 +1701,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 	struct dlm_lockstatus *lksb = NULL;
 	int ret = 0;
 	int i, j, bad;
-	struct list_head *iter;
 	struct dlm_lock *lock = NULL;
 	u8 from = O2NM_MAX_NODES;
 	unsigned int added = 0;
@@ -1755,8 +1738,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 			spin_lock(&res->spinlock);
 			for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
 				tmpq = dlm_list_idx_to_ptr(res, j);
-				list_for_each(iter, tmpq) {
-					lock = list_entry (iter, struct dlm_lock, list);
+				list_for_each_entry(lock, tmpq, list) {
 					if (lock->ml.cookie != ml->cookie)
 						lock = NULL;
 					else
@@ -1930,8 +1912,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
 				       struct dlm_lock_resource *res)
 {
 	int i;
-	struct list_head *queue, *iter, *iter2;
-	struct dlm_lock *lock;
+	struct list_head *queue;
+	struct dlm_lock *lock, *next;
 
 	res->state |= DLM_LOCK_RES_RECOVERING;
 	if (!list_empty(&res->recovering)) {
@@ -1947,8 +1929,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
 	/* find any pending locks and put them back on proper list */
 	for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) {
 		queue = dlm_list_idx_to_ptr(res, i);
-		list_for_each_safe(iter, iter2, queue) {
-			lock = list_entry (iter, struct dlm_lock, list);
+		list_for_each_entry_safe(lock, next, queue, list) {
 			dlm_lock_get(lock);
 			if (lock->convert_pending) {
 				/* move converting lock back to granted */
@@ -2013,18 +1994,15 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 					      u8 dead_node, u8 new_master)
 {
 	int i;
-	struct list_head *iter, *iter2;
 	struct hlist_node *hash_iter;
 	struct hlist_head *bucket;
-
-	struct dlm_lock_resource *res;
+	struct dlm_lock_resource *res, *next;
 
 	mlog_entry_void();
 
 	assert_spin_locked(&dlm->spinlock);
 
-	list_for_each_safe(iter, iter2, &dlm->reco.resources) {
-		res = list_entry (iter, struct dlm_lock_resource, recovering);
+	list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
 		if (res->owner == dead_node) {
 			list_del_init(&res->recovering);
 			spin_lock(&res->spinlock);
@@ -2099,7 +2077,7 @@ static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local)
 static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
 			       struct dlm_lock_resource *res, u8 dead_node)
 {
-	struct list_head *iter, *queue;
+	struct list_head *queue;
 	struct dlm_lock *lock;
 	int blank_lvb = 0, local = 0;
 	int i;
@@ -2121,8 +2099,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
 
 	for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) {
 		queue = dlm_list_idx_to_ptr(res, i);
-		list_for_each(iter, queue) {
-			lock = list_entry (iter, struct dlm_lock, list);
+		list_for_each_entry(lock, queue, list) {
 			if (lock->ml.node == search_node) {
 				if (dlm_lvb_needs_invalidation(lock, local)) {
 					/* zero the lksb lvb and lockres lvb */
@@ -2143,8 +2120,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
 static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 				struct dlm_lock_resource *res, u8 dead_node)
 {
-	struct list_head *iter, *tmpiter;
-	struct dlm_lock *lock;
+	struct dlm_lock *lock, *next;
 	unsigned int freed = 0;
 
 	/* this node is the lockres master:
@@ -2155,24 +2131,21 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 	assert_spin_locked(&res->spinlock);
 
 	/* TODO: check pending_asts, pending_basts here */
-	list_for_each_safe(iter, tmpiter, &res->granted) {
-		lock = list_entry (iter, struct dlm_lock, list);
+	list_for_each_entry_safe(lock, next, &res->granted, list) {
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
 			freed++;
 		}
 	}
-	list_for_each_safe(iter, tmpiter, &res->converting) {
-		lock = list_entry (iter, struct dlm_lock, list);
+	list_for_each_entry_safe(lock, next, &res->converting, list) {
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
 			freed++;
 		}
 	}
-	list_for_each_safe(iter, tmpiter, &res->blocked) {
-		lock = list_entry (iter, struct dlm_lock, list);
+	list_for_each_entry_safe(lock, next, &res->blocked, list) {
 		if (lock->ml.node == dead_node) {
 			list_del_init(&lock->list);
 			dlm_lock_put(lock);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index d1bd305ef0d7..f71250ed166f 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -600,15 +600,13 @@ static inline int ocfs2_highest_compat_lock_level(int level)
 static void lockres_set_flags(struct ocfs2_lock_res *lockres,
 			      unsigned long newflags)
 {
-	struct list_head *pos, *tmp;
-	struct ocfs2_mask_waiter *mw;
+	struct ocfs2_mask_waiter *mw, *tmp;
 
  	assert_spin_locked(&lockres->l_lock);
 
 	lockres->l_flags = newflags;
 
-	list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) {
-		mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item);
+	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
 			continue;
 
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index ba2b2ab1c6e4..e23e416ca74c 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -109,17 +109,14 @@ static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  */
 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 {
-	struct list_head *p, *n;
-	struct ocfs2_extent_map_item *emi;
+	struct ocfs2_extent_map_item *emi, *n;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
 	LIST_HEAD(tmp_list);
 	unsigned int range;
 
 	spin_lock(&oi->ip_lock);
-	list_for_each_safe(p, n, &em->em_list) {
-		emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
-
+	list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 		if (emi->ei_cpos >= cpos) {
 			/* Full truncate of this record. */
 			list_move(&emi->ei_list, &tmp_list);
@@ -136,8 +133,7 @@ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 	}
 	spin_unlock(&oi->ip_lock);
 
-	list_for_each_safe(p, n, &tmp_list) {
-		emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
+	list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 		list_del(&emi->ei_list);
 		kfree(emi);
 	}
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index dc1188081720..dbfb20bb27ea 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -722,8 +722,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
 		container_of(work, struct ocfs2_journal, j_recovery_work);
 	struct ocfs2_super *osb = journal->j_osb;
 	struct ocfs2_dinode *la_dinode, *tl_dinode;
-	struct ocfs2_la_recovery_item *item;
-	struct list_head *p, *n;
+	struct ocfs2_la_recovery_item *item,