add idl4k kernel firmware version 1.13.0.105

2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions
--- a/kernel/fs/dlm/Kconfig
+++ b/kernel/fs/dlm/Kconfig
@@ -0,0 +1,17 @@
+menuconfig DLM
+	tristate "Distributed Lock Manager (DLM)"
+	depends on EXPERIMENTAL && INET
+	depends on SYSFS && (IPV6 || IPV6=n)
+	select CONFIGFS_FS
+	select IP_SCTP
+	help
+	A general purpose distributed lock manager for kernel or userspace
+	applications.
+
+config DLM_DEBUG
+	bool "DLM debugging"
+	depends on DLM
+	help
+	Under the debugfs mount point, the name of each lockspace will
+	appear as a file in the "dlm" directory.  The output is the
+	list of resource and locks the local node knows about.
--- a/kernel/fs/dlm/Makefile
+++ b/kernel/fs/dlm/Makefile
@@ -0,0 +1,21 @@
+obj-$(CONFIG_DLM) +=		dlm.o
+dlm-y :=			ast.o \
+				config.o \
+				dir.o \
+				lock.o \
+				lockspace.o \
+				main.o \
+				member.o \
+				memory.o \
+				midcomms.o \
+				netlink.o \
+				lowcomms.o \
+				plock.o \
+				rcom.o \
+				recover.o \
+				recoverd.o \
+				requestqueue.o \
+				user.o \
+				util.o 
+dlm-$(CONFIG_DLM_DEBUG) +=	debug_fs.o
+
--- a/kernel/fs/dlm/ast.c
+++ b/kernel/fs/dlm/ast.c
@@ -0,0 +1,207 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2010 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "lock.h"
+#include "user.h"
+#include "ast.h"
+
+#define WAKE_ASTS  0
+
+static struct list_head		ast_queue;
+static spinlock_t		ast_queue_lock;
+static struct task_struct *	astd_task;
+static unsigned long		astd_wakeflags;
+static struct mutex		astd_running;
+
+
+void dlm_del_ast(struct dlm_lkb *lkb)
+{
+	spin_lock(&ast_queue_lock);
+	if (lkb->lkb_ast_type & (AST_COMP | AST_BAST))
+		list_del(&lkb->lkb_astqueue);
+	spin_unlock(&ast_queue_lock);
+}
+
+void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode)
+{
+	if (lkb->lkb_flags & DLM_IFL_USER) {
+		dlm_user_add_ast(lkb, type, mode);
+		return;
+	}
+
+	spin_lock(&ast_queue_lock);
+	if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) {
+		kref_get(&lkb->lkb_ref);
+		list_add_tail(&lkb->lkb_astqueue, &ast_queue);
+		lkb->lkb_ast_first = type;
+	}
+
+	/* sanity check, this should not happen */
+
+	if ((type == AST_COMP) && (lkb->lkb_ast_type & AST_COMP))
+		log_print("repeat cast %d castmode %d lock %x %s",
+			  mode, lkb->lkb_castmode,
+			  lkb->lkb_id, lkb->lkb_resource->res_name);
+
+	lkb->lkb_ast_type |= type;
+	if (type == AST_BAST)
+		lkb->lkb_bastmode = mode;
+	else
+		lkb->lkb_castmode = mode;
+	spin_unlock(&ast_queue_lock);
+
+	set_bit(WAKE_ASTS, &astd_wakeflags);
+	wake_up_process(astd_task);
+}
+
+static void process_asts(void)
+{
+	struct dlm_ls *ls = NULL;
+	struct dlm_rsb *r = NULL;
+	struct dlm_lkb *lkb;
+	void (*castfn) (void *astparam);
+	void (*bastfn) (void *astparam, int mode);
+	int type, first, bastmode, castmode, do_bast, do_cast, last_castmode;
+
+repeat:
+	spin_lock(&ast_queue_lock);
+	list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
+		r = lkb->lkb_resource;
+		ls = r->res_ls;
+
+		if (dlm_locking_stopped(ls))
+			continue;
+
+		list_del(&lkb->lkb_astqueue);
+		type = lkb->lkb_ast_type;
+		lkb->lkb_ast_type = 0;
+		first = lkb->lkb_ast_first;
+		lkb->lkb_ast_first = 0;
+		bastmode = lkb->lkb_bastmode;
+		castmode = lkb->lkb_castmode;
+		castfn = lkb->lkb_astfn;
+		bastfn = lkb->lkb_bastfn;
+		spin_unlock(&ast_queue_lock);
+
+		do_cast = (type & AST_COMP) && castfn;
+		do_bast = (type & AST_BAST) && bastfn;
+
+		/* Skip a bast if its blocking mode is compatible with the
+		   granted mode of the preceding cast. */
+
+		if (do_bast) {
+			if (first == AST_COMP)
+				last_castmode = castmode;
+			else
+				last_castmode = lkb->lkb_castmode_done;
+			if (dlm_modes_compat(bastmode, last_castmode))
+				do_bast = 0;
+		}
+
+		if (first == AST_COMP) {
+			if (do_cast)
+				castfn(lkb->lkb_astparam);
+			if (do_bast)
+				bastfn(lkb->lkb_astparam, bastmode);
+		} else if (first == AST_BAST) {
+			if (do_bast)
+				bastfn(lkb->lkb_astparam, bastmode);
+			if (do_cast)
+				castfn(lkb->lkb_astparam);
+		} else {
+			log_error(ls, "bad ast_first %d ast_type %d",
+				  first, type);
+		}
+
+		if (do_cast)
+			lkb->lkb_castmode_done = castmode;
+		if (do_bast)
+			lkb->lkb_bastmode_done = bastmode;
+
+		/* this removes the reference added by dlm_add_ast
+		   and may result in the lkb being freed */
+		dlm_put_lkb(lkb);
+
+		cond_resched();
+		goto repeat;
+	}
+	spin_unlock(&ast_queue_lock);
+}
+
+static inline int no_asts(void)
+{
+	int ret;
+
+	spin_lock(&ast_queue_lock);
+	ret = list_empty(&ast_queue);
+	spin_unlock(&ast_queue_lock);
+	return ret;
+}
+
+static int dlm_astd(void *data)
+{
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!test_bit(WAKE_ASTS, &astd_wakeflags))
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		mutex_lock(&astd_running);
+		if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags))
+			process_asts();
+		mutex_unlock(&astd_running);
+	}
+	return 0;
+}
+
+void dlm_astd_wake(void)
+{
+	if (!no_asts()) {
+		set_bit(WAKE_ASTS, &astd_wakeflags);
+		wake_up_process(astd_task);
+	}
+}
+
+int dlm_astd_start(void)
+{
+	struct task_struct *p;
+	int error = 0;
+
+	INIT_LIST_HEAD(&ast_queue);
+	spin_lock_init(&ast_queue_lock);
+	mutex_init(&astd_running);
+
+	p = kthread_run(dlm_astd, NULL, "dlm_astd");
+	if (IS_ERR(p))
+		error = PTR_ERR(p);
+	else
+		astd_task = p;
+	return error;
+}
+
+void dlm_astd_stop(void)
+{
+	kthread_stop(astd_task);
+}
+
+void dlm_astd_suspend(void)
+{
+	mutex_lock(&astd_running);
+}
+
+void dlm_astd_resume(void)
+{
+	mutex_unlock(&astd_running);
+}
+
--- a/kernel/fs/dlm/ast.h
+++ b/kernel/fs/dlm/ast.h
@@ -0,0 +1,26 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005-2010 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __ASTD_DOT_H__
+#define __ASTD_DOT_H__
+
+void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode);
+void dlm_del_ast(struct dlm_lkb *lkb);
+
+void dlm_astd_wake(void);
+int dlm_astd_start(void);
+void dlm_astd_stop(void);
+void dlm_astd_suspend(void);
+void dlm_astd_resume(void);
+
+#endif
+
--- a/kernel/fs/dlm/config.c
+++ b/kernel/fs/dlm/config.c
--- a/kernel/fs/dlm/config.h
+++ b/kernel/fs/dlm/config.h
@@ -0,0 +1,46 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __CONFIG_DOT_H__
+#define __CONFIG_DOT_H__
+
+#define DLM_MAX_ADDR_COUNT 3
+
+struct dlm_config_info {
+	int ci_tcp_port;
+	int ci_buffer_size;
+	int ci_rsbtbl_size;
+	int ci_lkbtbl_size;
+	int ci_dirtbl_size;
+	int ci_recover_timer;
+	int ci_toss_secs;
+	int ci_scan_secs;
+	int ci_log_debug;
+	int ci_protocol;
+	int ci_timewarn_cs;
+};
+
+extern struct dlm_config_info dlm_config;
+
+int dlm_config_init(void);
+void dlm_config_exit(void);
+int dlm_node_weight(char *lsname, int nodeid);
+int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
+		    int **new_out, int *new_count_out);
+int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
+int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
+int dlm_our_nodeid(void);
+int dlm_our_addr(struct sockaddr_storage *addr, int num);
+
+#endif				/* __CONFIG_DOT_H__ */
+
--- a/kernel/fs/dlm/debug_fs.c
+++ b/kernel/fs/dlm/debug_fs.c
@@ -0,0 +1,729 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005-2009 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+
+#include "dlm_internal.h"
+#include "lock.h"
+
+#define DLM_DEBUG_BUF_LEN 4096
+static char debug_buf[DLM_DEBUG_BUF_LEN];
+static struct mutex debug_buf_lock;
+
+static struct dentry *dlm_root;
+
+static char *print_lockmode(int mode)
+{
+	switch (mode) {
+	case DLM_LOCK_IV:
+		return "--";
+	case DLM_LOCK_NL:
+		return "NL";
+	case DLM_LOCK_CR:
+		return "CR";
+	case DLM_LOCK_CW:
+		return "CW";
+	case DLM_LOCK_PR:
+		return "PR";
+	case DLM_LOCK_PW:
+		return "PW";
+	case DLM_LOCK_EX:
+		return "EX";
+	default:
+		return "??";
+	}
+}
+
+static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
+			      struct dlm_rsb *res)
+{
+	seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
+
+	if (lkb->lkb_status == DLM_LKSTS_CONVERT ||
+	    lkb->lkb_status == DLM_LKSTS_WAITING)
+		seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode));
+
+	if (lkb->lkb_nodeid) {
+		if (lkb->lkb_nodeid != res->res_nodeid)
+			seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid,
+				   lkb->lkb_remid);
+		else
+			seq_printf(s, " Master:     %08x", lkb->lkb_remid);
+	}
+
+	if (lkb->lkb_wait_type)
+		seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
+
+	return seq_printf(s, "\n");
+}
+
+static int print_format1(struct dlm_rsb *res, struct seq_file *s)
+{
+	struct dlm_lkb *lkb;
+	int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
+	int rv;
+
+	lock_rsb(res);
+
+	rv = seq_printf(s, "\nResource %p Name (len=%d) \"",
+			res, res->res_length);
+	if (rv)
+		goto out;
+
+	for (i = 0; i < res->res_length; i++) {
+		if (isprint(res->res_name[i]))
+			seq_printf(s, "%c", res->res_name[i]);
+		else
+			seq_printf(s, "%c", '.');
+	}
+
+	if (res->res_nodeid > 0)
+		rv = seq_printf(s, "\"  \nLocal Copy, Master is node %d\n",
+				res->res_nodeid);
+	else if (res->res_nodeid == 0)
+		rv = seq_printf(s, "\"  \nMaster Copy\n");
+	else if (res->res_nodeid == -1)
+		rv = seq_printf(s, "\"  \nLooking up master (lkid %x)\n",
+			   	res->res_first_lkid);
+	else
+		rv = seq_printf(s, "\"  \nInvalid master %d\n",
+				res->res_nodeid);
+	if (rv)
+		goto out;
+
+	/* Print the LVB: */
+	if (res->res_lvbptr) {
+		seq_printf(s, "LVB: ");
+		for (i = 0; i < lvblen; i++) {
+			if (i == lvblen / 2)
+				seq_printf(s, "\n     ");
+			seq_printf(s, "%02x ",
+				   (unsigned char) res->res_lvbptr[i]);
+		}
+		if (rsb_flag(res, RSB_VALNOTVALID))
+			seq_printf(s, " (INVALID)");
+		rv = seq_printf(s, "\n");
+		if (rv)
+			goto out;
+	}
+
+	root_list = !list_empty(&res->res_root_list);
+	recover_list = !list_empty(&res->res_recover_list);
+
+	if (root_list || recover_list) {
+		rv = seq_printf(s, "Recovery: root %d recover %d flags %lx "
+				"count %d\n", root_list, recover_list,
+			   	res->res_flags, res->res_recover_locks_count);
+		if (rv)
+			goto out;
+	}
+
+	/* Print the locks attached to this resource */
+	seq_printf(s, "Granted Queue\n");
+	list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) {
+		rv = print_format1_lock(s, lkb, res);
+		if (rv)
+			goto out;
+	}
+
+	seq_printf(s, "Conversion Queue\n");
+	list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) {
+		rv = print_format1_lock(s, lkb, res);
+		if (rv)
+			goto out;
+	}
+
+	seq_printf(s, "Waiting Queue\n");
+	list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) {
+		rv = print_format1_lock(s, lkb, res);
+		if (rv)
+			goto out;
+	}
+
+	if (list_empty(&res->res_lookup))
+		goto out;
+
+	seq_printf(s, "Lookup Queue\n");
+	list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) {
+		rv = seq_printf(s, "%08x %s", lkb->lkb_id,
+				print_lockmode(lkb->lkb_rqmode));
+		if (lkb->lkb_wait_type)
+			seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
+		rv = seq_printf(s, "\n");
+	}
+ out:
+	unlock_rsb(res);
+	return rv;
+}
+
+static int print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
+			      struct dlm_rsb *r)
+{
+	u64 xid = 0;
+	u64 us;
+	int rv;
+
+	if (lkb->lkb_flags & DLM_IFL_USER) {
+		if (lkb->lkb_ua)
+			xid = lkb->lkb_ua->xid;
+	}
+
+	/* microseconds since lkb was added to current queue */
+	us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_timestamp));
+
+	/* id nodeid remid pid xid exflags flags sts grmode rqmode time_us
+	   r_nodeid r_len r_name */
+
+	rv = seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n",
+			lkb->lkb_id,
+			lkb->lkb_nodeid,
+			lkb->lkb_remid,
+			lkb->lkb_ownpid,
+			(unsigned long long)xid,
+			lkb->lkb_exflags,
+			lkb->lkb_flags,
+			lkb->lkb_status,
+			lkb->lkb_grmode,
+			lkb->lkb_rqmode,
+			(unsigned long long)us,
+			r->res_nodeid,
+			r->res_length,
+			r->res_name);
+	return rv;
+}
+
+static int print_format2(struct dlm_rsb *r, struct seq_file *s)
+{
+	struct dlm_lkb *lkb;
+	int rv = 0;
+
+	lock_rsb(r);
+
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
+		rv = print_format2_lock(s, lkb, r);
+		if (rv)
+			goto out;
+	}
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
+		rv = print_format2_lock(s, lkb, r);
+		if (rv)
+			goto out;
+	}
+
+	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
+		rv = print_format2_lock(s, lkb, r);
+		if (rv)
+			goto out;
+	}
+ out:
+	unlock_rsb(r);
+	return rv;
+}
+
+static int print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
+			      int rsb_lookup)
+{
+	u64 xid = 0;
+	int rv;
+
+	if (lkb->lkb_flags & DLM_IFL_USER) {
+		if (lkb->lkb_ua)
+			xid = lkb->lkb_ua->xid;
+	}
+
+	rv = seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n",
+			lkb->lkb_id,
+			lkb->lkb_nodeid,
+			lkb->lkb_remid,
+			lkb->lkb_ownpid,
+			(unsigned long long)xid,
+			lkb->lkb_exflags,
+			lkb->lkb_flags,
+			lkb->lkb_status,
+			lkb->lkb_grmode,
+			lkb->lkb_rqmode,
+			lkb->lkb_highbast,
+			rsb_lookup,
+			lkb->lkb_wait_type,
+			lkb->lkb_lvbseq,
+			(unsigned long long)ktime_to_ns(lkb->lkb_timestamp),
+			(unsigned long long)ktime_to_ns(lkb->lkb_time_bast));
+	return rv;
+}
+
+static int print_format3(struct dlm_rsb *r, struct seq_file *s)
+{
+	struct dlm_lkb *lkb;
+	int i, lvblen = r->res_ls->ls_lvblen;
+	int print_name = 1;
+	int rv;
+
+	lock_rsb(r);
+
+	rv = seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ",
+			r,
+			r->res_nodeid,
+			r->res_first_lkid,
+			r->res_flags,
+			!list_empty(&r->res_root_list),
+			!list_empty(&r->res_recover_list),
+			r->res_recover_locks_count,
+			r->res_length);
+	if (rv)
+		goto out;
+
+	for (i = 0; i < r->res_length; i++) {
+		if (!isascii(r->res_name[i]) || !isprint(r->res_name[i]))
+			print_name = 0;
+	}
+
+	seq_printf(s, "%s", print_name ? "str " : "hex");
+
+	for (i = 0; i < r->res_length; i++) {
+		if (print_name)
+			seq_printf(s, "%c", r->res_name[i]);
+		else
+			seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
+	}
+	rv = seq_printf(s, "\n");
+	if (rv)
+		goto out;
+
+	if (!r->res_lvbptr)
+		goto do_locks;
+
+	seq_printf(s, "lvb %u %d", r->res_lvbseq, lvblen);
+
+	for (i = 0; i < lvblen; i++)
+		seq_printf(s, " %02x", (unsigned char)r->res_lvbptr[i]);
+	rv = seq_printf(s, "\n");
+	if (rv)
+		goto out;
+
+ do_locks:
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
+		rv = print_format3_lock(s, lkb, 0);
+		if (rv)
+			goto out;
+	}
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
+		rv = print_format3_lock(s, lkb, 0);
+		if (rv)
+			goto out;
+	}
+
+	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
+		rv = print_format3_lock(s, lkb, 0);
+		if (rv)
+			goto out;
+	}
+
+	list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup) {
+		rv = print_format3_lock(s, lkb, 1);
+		if (rv)
+			goto out;
+	}
+ out:
+	unlock_rsb(r);
+	return rv;
+}
+
+struct rsbtbl_iter {
+	struct dlm_rsb *rsb;
+	unsigned bucket;
+	int format;
+	int header;
+};
+
+/* seq_printf returns -1 if the buffer is full, and 0 otherwise.
+   If the buffer is full, seq_printf can be called again, but it
+   does nothing and just returns -1.  So, the these printing routines
+   periodically check the return value to avoid wasting too much time
+   trying to print to a full buffer. */
+
+static int table_seq_show(struct seq_file *seq, void *iter_ptr)
+{
+	struct rsbtbl_iter *ri = iter_ptr;
+	int rv = 0;
+
+	switch (ri->format) {
+	case 1:
+		rv = print_format1(ri->rsb, seq);
+		break;
+	case 2:
+		if (ri->header) {
+			seq_printf(seq, "id nodeid remid pid xid exflags "
+					"flags sts grmode rqmode time_ms "
+					"r_nodeid r_len r_name\n");
+			ri->header = 0;
+		}
+		rv = print_format2(ri->rsb, seq);
+		break;
+	case 3:
+		if (ri->header) {
+			seq_printf(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n");
+			ri->header = 0;
+		}
+		rv = print_format3(ri->rsb, seq);
+		break;
+	}
+
+	return rv;
+}
+
+static const struct seq_operations format1_seq_ops;
+static const struct seq_operations format2_seq_ops;
+static const struct seq_operations format3_seq_ops;
+
+static void *table_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct dlm_ls *ls = seq->private;
+	struct rsbtbl_iter *ri;
+	struct dlm_rsb *r;
+	loff_t n = *pos;
+	unsigned bucket, entry;
+
+	bucket = n >> 32;
+	entry = n & ((1LL << 32) - 1);
+
+	if (bucket >= ls->ls_rsbtbl_size)
+		return NULL;
+
+	ri = kzalloc(sizeof(struct rsbtbl_iter), GFP_NOFS);
+	if (!ri)
+		return NULL;
+	if (n == 0)
+		ri->header = 1;
+	if (seq->op == &format1_seq_ops)
+		ri->format = 1;
+	if (seq->op == &format2_seq_ops)
+		ri->format = 2;
+	if (seq->op == &format3_seq_ops)
+		ri->format = 3;
+
+	spin_lock(&ls->ls_rsbtbl[bucket].lock);
+	if (!list_empty(&ls->ls_rsbtbl[bucket].list)) {
+		list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list,
+				    res_hashchain) {
+			if (!entry--) {
+				dlm_hold_rsb(r);
+				ri->rsb = r;
+				ri->bucket = bucket;
+				spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+				return ri;
+			}
+		}
+	}
+	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+
+	/*
+	 * move to the first rsb in the next non-empty bucket
+	 */
+
+	/* zero the entry */
+	n &= ~((1LL << 32) - 1);
+
+	while (1) {
+		bucket++;
+		n += 1LL << 32;
+
+		if (bucket >= ls->ls_rsbtbl_size) {
+			kfree(ri);
+			return NULL;
+		}
+
+		spin_lock(&ls->ls_rsbtbl[bucket].lock);
+		if (!list_empty(&ls->ls_rsbtbl[bucket].list)) {
+			r = list_first_entry(&ls->ls_rsbtbl[bucket].list,
+					     struct dlm_rsb, res_hashchain);
+			dlm_hold_rsb(r);
+			ri->rsb = r;
+			ri->bucket = bucket;
+			spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+			*pos = n;
+			return ri;
+		}
+		spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+	}
+}
+
+static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
+{
+	struct dlm_ls *ls = seq->private;
+	struct rsbtbl_iter *ri = iter_ptr;
+	struct list_head *next;
+	struct dlm_rsb *r, *rp;
+	loff_t n = *pos;
+	unsigned bucket;
+
+	bucket = n >> 32;
+
+	/*
+	 * move to the next rsb in the same bucket
+	 */
+
+	spin_lock(&ls->ls_rsbtbl[bucket].lock);
+	rp = ri->rsb;
+	next = rp->res_hashchain.next;
+
+	if (next != &ls->ls_rsbtbl[bucket].list) {
+		r = list_entry(next, struct dlm_rsb, res_hashchain);
+		dlm_hold_rsb(r);
+		ri->rsb = r;
+		spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+		dlm_put_rsb(rp);
+		++*pos;
+		return ri;
+	}
+	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+	dlm_put_rsb(rp);
+
+	/*
+	 * move to the first rsb in the next non-empty bucket
+	 */
+
+	/* zero the entry */
+	n &= ~((1LL << 32) - 1);
+
+	while (1) {
+		bucket++;
+		n += 1LL << 32;
+
+		if (bucket >= ls->ls_rsbtbl_size) {
+			kfree(ri);
+			return NULL;
+		}
+
+		spin_lock(&ls->ls_rsbtbl[bucket].lock);
+		if (!list_empty(&ls->ls_rsbtbl[bucket].list)) {
+			r = list_first_entry(&ls->ls_rsbtbl[bucket].list,
+					     struct dlm_rsb, res_hashchain);
+			dlm_hold_rsb(r);
+			ri->rsb = r;
+			ri->bucket = bucket;
+			spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+			*pos = n;
+			return ri;
+		}
+		spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+	}
+}
+
+static void table_seq_stop(struct seq_file *seq, void *iter_ptr)
+{
+	struct rsbtbl_iter *ri = iter_ptr;
+
+	if (ri) {
+		dlm_put_rsb(ri->rsb);
+		kfree(ri);
+	}
+}
+
+static const struct seq_operations format1_seq_ops = {
+	.start = table_seq_start,
+	.next  = table_seq_next,
+	.stop  = table_seq_stop,
+	.show  = table_seq_show,
+};
+
+static const struct seq_operations format2_seq_ops = {
+	.start = table_seq_start,
+	.next  = table_seq_next,
+	.stop  = table_seq_stop,
+	.show  = table_seq_show,
+};
+
+static const struct seq_operations format3_seq_ops = {
+	.start = table_seq_start,
+	.next  = table_seq_next,
+	.stop  = table_seq_stop,
+	.show  = table_seq_show,
+};
+
+static const struct file_operations format1_fops;
+static const struct file_operations format2_fops;
+static const struct file_operations format3_fops;
+
+static int table_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret = -1;
+
+	if (file->f_op == &format1_fops)
+		ret = seq_open(file, &format1_seq_ops);
+	else if (file->f_op == &format2_fops)
+		ret = seq_open(file, &format2_seq_ops);
+	else if (file->f_op == &format3_fops)
+		ret = seq_open(file, &format3_seq_ops);
+
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private; /* the dlm_ls */
+	return 0;
+}
+
+static const struct file_operations format1_fops = {
+	.owner   = THIS_MODULE,
+	.open    = table_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+static const struct file_operations format2_fops = {
+	.owner   = THIS_MODULE,
+	.open    = table_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+static const struct file_operations format3_fops = {
+	.owner   = THIS_MODULE,
+	.open    = table_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+/*
+ * dump lkb's on the ls_waiters list
+ */
+
+static int waiters_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t waiters_read(struct file *file, char __user *userbuf,
+			    size_t count, loff_t *ppos)
+{
+	struct dlm_ls *ls = file->private_data;
+	struct dlm_lkb *lkb;
+	size_t len = DLM_DEBUG_BUF_LEN, pos = 0, ret, rv;
+
+	mutex_lock(&debug_buf_lock);
+	mutex_lock(&ls->ls_waiters_mutex);
+	memset(debug_buf, 0, sizeof(debug_buf));
+
+	list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
+		ret = snprintf(debug_buf + pos, len - pos, "%x %d %d %s\n",
+			       lkb->lkb_id, lkb->lkb_wait_type,
+			       lkb->lkb_nodeid, lkb->lkb_resource->res_name);
+		if (ret >= len - pos)
+			break;
+		pos += ret;
+	}
+	mutex_unlock(&ls->ls_waiters_mutex);
+
+	rv = simple_read_from_buffer(userbuf, count, ppos, debug_buf, pos);
+	mutex_unlock(&debug_buf_lock);
+	return rv;
+}
+
+static const struct file_operations waiters_fops = {
+	.owner   = THIS_MODULE,
+	.open    = waiters_open,
+	.read    = waiters_read
+};
+
+void dlm_delete_debug_file(struct dlm_ls *ls)
+{
+	if (ls->ls_debug_rsb_dentry)
+		debugfs_remove(ls->ls_debug_rsb_dentry);
+	if (ls->ls_debug_waiters_dentry)
+		debugfs_remove(ls->ls_debug_waiters_dentry);
+	if (ls->ls_debug_locks_dentry)
+		debugfs_remove(ls->ls_debug_locks_dentry);
+	if (ls->ls_debug_all_dentry)
+		debugfs_remove(ls->ls_debug_all_dentry);
+}
+
+int dlm_create_debug_file(struct dlm_ls *ls)
+{
+	char name[DLM_LOCKSPACE_LEN+8];
+
+	/* format 1 */
+
+	ls->ls_debug_rsb_dentry = debugfs_create_file(ls->ls_name,
+						      S_IFREG | S_IRUGO,
+						      dlm_root,
+						      ls,
+						      &format1_fops);
+	if (!ls->ls_debug_rsb_dentry)
+		goto fail;
+
+	/* format 2 */
+
+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
+
+	ls->ls_debug_locks_dentry = debugfs_create_file(name,
+							S_IFREG | S_IRUGO,
+							dlm_root,
+							ls,
+							&format2_fops);
+	if (!ls->ls_debug_locks_dentry)
+		goto fail;
+
+	/* format 3 */
+
+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_all", ls->ls_name);
+
+	ls->ls_debug_all_dentry = debugfs_create_file(name,
+						      S_IFREG | S_IRUGO,
+						      dlm_root,
+						      ls,
+						      &format3_fops);
+	if (!ls->ls_debug_all_dentry)
+		goto fail;
+
+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name);
+
+	ls->ls_debug_waiters_dentry = debugfs_create_file(name,
+							  S_IFREG | S_IRUGO,
+							  dlm_root,
+							  ls,
+							  &waiters_fops);
+	if (!ls->ls_debug_waiters_dentry)
+		goto fail;
+
+	return 0;
+
+ fail:
+	dlm_delete_debug_file(ls);
+	return -ENOMEM;
+}
+
+int __init dlm_register_debugfs(void)
+{
+	mutex_init(&debug_buf_lock);
+	dlm_root = debugfs_create_dir("dlm", NULL);
+	return dlm_root ? 0 : -ENOMEM;
+}
+
+void dlm_unregister_debugfs(void)
+{
+	debugfs_remove(dlm_root);
+}
+
--- a/kernel/fs/dlm/dir.c
+++ b/kernel/fs/dlm/dir.c
@@ -0,0 +1,441 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "lockspace.h"
+#include "member.h"
+#include "lowcomms.h"
+#include "rcom.h"
+#include "config.h"
+#include "memory.h"
+#include "recover.h"
+#include "util.h"
+#include "lock.h"
+#include "dir.h"
+
+
+static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de)
+{
+	spin_lock(&ls->ls_recover_list_lock);
+	list_add(&de->list, &ls->ls_recover_list);
+	spin_unlock(&ls->ls_recover_list_lock);
+}
+
+static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
+{
+	int found = 0;
+	struct dlm_direntry *de;
+
+	spin_lock(&ls->ls_recover_list_lock);
+	list_for_each_entry(de, &ls->ls_recover_list, list) {
+		if (de->length == len) {
+			list_del(&de->list);
+			de->master_nodeid = 0;
+			memset(de->name, 0, len);
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock(&ls->ls_recover_list_lock);
+
+	if (!found)
+		de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS);
+	return de;
+}
+
+void dlm_clear_free_entries(struct dlm_ls *ls)
+{
+	struct dlm_direntry *de;
+
+	spin_lock(&ls->ls_recover_list_lock);
+	while (!list_empty(&ls->ls_recover_list)) {
+		de = list_entry(ls->ls_recover_list.next, struct dlm_direntry,
+				list);
+		list_del(&de->list);
+		kfree(de);
+	}
+	spin_unlock(&ls->ls_recover_list_lock);
+}
+
+/*
+ * We use the upper 16 bits of the hash value to select the directory node.
+ * Low bits are used for distribution of rsb's among hash buckets on each node.
+ *
+ * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
+ * num_nodes to the hash value.  This value in the desired range is used as an
+ * offset into the sorted list of nodeid's to give the particular nodeid.
+ */
+
+int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
+{
+	struct list_head *tmp;
+	struct dlm_member *memb = NULL;
+	uint32_t node, n = 0;
+	int nodeid;
+
+	if (ls->ls_num_nodes == 1) {
+		nodeid = dlm_our_nodeid();
+		goto out;
+	}
+
+	if (ls->ls_node_array) {
+		node = (hash >> 16) % ls->ls_total_weight;
+		nodeid = ls->ls_node_array[node];
+		goto out;
+	}
+
+	/* make_member_array() failed to kmalloc ls_node_array... */
+
+	node = (hash >> 16) % ls->ls_num_nodes;
+
+	list_for_each(tmp, &ls->ls_nodes) {
+		if (n++ != node)
+			continue;
+		memb = list_entry(tmp, struct dlm_member, list);
+		break;
+	}
+
+	DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n",
+				 ls->ls_num_nodes, n, node););
+	nodeid = memb->nodeid;
+ out:
+	return nodeid;
+}
+
+int dlm_dir_nodeid(struct dlm_rsb *r)
+{
+	return dlm_hash2nodeid(r->res_ls, r->res_hash);
+}
+
+static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len)
+{
+	uint32_t val;
+
+	val = jhash(name, len, 0);
+	val &= (ls->ls_dirtbl_size - 1);
+
+	return val;
+}
+
+static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de)
+{
+	uint32_t bucket;
+
+	bucket = dir_hash(ls, de->name, de->length);
+	list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
+}
+
+static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name,
+					  int namelen, uint32_t bucket)
+{
+	struct dlm_direntry *de;
+
+	list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) {
+		if (de->length == namelen && !memcmp(name, de->name, namelen))
+			goto out;
+	}
+	de = NULL;
+ out:
+	return de;
+}
+
+void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen)
+{
+	struct dlm_direntry *de;
+	uint32_t bucket;
+
+	bucket = dir_hash(ls, name, namelen);
+
+	spin_lock(&ls->ls_dirtbl[bucket].lock);
+
+	de = search_bucket(ls, name, namelen, bucket);
+
+	if (!de) {
+		log_error(ls, "remove fr %u none", nodeid);
+		goto out;
+	}
+
+	if (de->master_nodeid != nodeid) {
+		log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid);
+		goto out;
+	}
+
+	list_del(&de->list);
+	kfree(de);
+ out:
+	spin_unlock(&ls->ls_dirtbl[bucket].lock);
+}
+
+void dlm_dir_clear(struct dlm_ls *ls)
+{
+	struct list_head *head;
+	struct dlm_direntry *de;
+	int i;
+
+	DLM_ASSERT(list_empty(&ls->ls_recover_list), );
+
+	for (i = 0; i < ls->ls_dirtbl_size; i++) {
+		spin_lock(&ls->ls_dirtbl[i].lock);
+		head = &ls->ls_dirtbl[i].list;
+		while (!list_empty(head)) {
+			de = list_entry(head->next, struct dlm_direntry, list);
+			list_del(&de->list);
+			put_free_de(ls, de);
+		}
+		spin_unlock(&ls->ls_dirtbl[i].lock);
+	}
+}
+
+int dlm_recover_directory(struct dlm_ls *ls)
+{
+	struct dlm_member *memb;
+	struct dlm_direntry *de;
+	char *b, *last_name = NULL;
+	int error = -ENOMEM, last_len, count = 0;
+	uint16_t namelen;
+
+	log_debug(ls, "dlm_recover_directory");
+
+	if (dlm_no_directory(ls))
+		goto out_status;
+
+	dlm_dir_clear(ls);
+
+	last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
+	if (!last_name)
+		goto out;
+
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		memset(last_name, 0, DLM_RESNAME_MAXLEN);
+		last_len = 0;
+
+		for (;;) {
+			int left;
+			error = dlm_recovery_stopped(ls);
+			if (error)
+				goto out_free;
+
+			error = dlm_rcom_names(ls, memb->nodeid,
+					       last_name, last_len);
+			if (error)
+				goto out_free;
+
+			schedule();
+
+			/*
+			 * pick namelen/name pairs out of received buffer
+			 */
+
+			b = ls->ls_recover_buf->rc_buf;
+			left = ls->ls_recover_buf->rc_header.h_length;
+			left -= sizeof(struct dlm_rcom);
+
+			for (;;) {
+				__be16 v;
+
+				error = -EINVAL;
+				if (left < sizeof(__be16))
+					goto out_free;
+
+				memcpy(&v, b, sizeof(__be16));
+				namelen = be16_to_cpu(v);
+				b += sizeof(__be16);
+				left -= sizeof(__be16);
+
+				/* namelen of 0xFFFFF marks end of names for
+				   this node; namelen of 0 marks end of the
+				   buffer */
+
+				if (namelen == 0xFFFF)
+					goto done;
+				if (!namelen)
+					break;
+
+				if (namelen > left)
+					goto out_free;
+
+				if (namelen > DLM_RESNAME_MAXLEN)
+					goto out_free;
+
+				error = -ENOMEM;
+				de = get_free_de(ls, namelen);
+				if (!de)
+					goto out_free;
+
+				de->master_nodeid = memb->nodeid;
+				de->length = namelen;
+				last_len = namelen;
+				memcpy(de->name, b, namelen);
+				memcpy(last_name, b, namelen);
+				b += namelen;
+				left -= namelen;
+
+				add_entry_to_hash(ls, de);
+				count++;
+			}
+		}
+         done:
+		;
+	}
+
+ out_status:
+	error = 0;
+	dlm_set_recover_status(ls, DLM_RS_DIR);
+	log_debug(ls, "dlm_recover_directory %d entries", count);
+ out_free:
+	kfree(last_name);
+ out:
+	dlm_clear_free_entries(ls);
+	return error;
+}
+
+static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
+		     int namelen, int *r_nodeid)
+{
+	struct dlm_direntry *de, *tmp;
+	uint32_t bucket;
+
+	bucket = dir_hash(ls, name, namelen);
+
+	spin_lock(&ls->ls_dirtbl[bucket].lock);
+	de = search_bucket(ls, name, namelen, bucket);
+	if (de) {
+		*r_nodeid = de->master_nodeid;
+		spin_unlock(&ls->ls_dirtbl[bucket].lock);
+		if (*r_nodeid == nodeid)
+			return -EEXIST;
+		return 0;
+	}
+
+	spin_unlock(&ls->ls_dirtbl[bucket].lock);
+
+	if (namelen > DLM_RESNAME_MAXLEN)
+		return -EINVAL;
+
+	de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS);
+	if (!de)
+		return -ENOMEM;
+
+	de->master_nodeid = nodeid;
+	de->length = namelen;
+	memcpy(de->name, name, namelen);
+
+	spin_lock(&ls->ls_dirtbl[bucket].lock);
+	tmp = search_bucket(ls, name, namelen, bucket);
+	if (tmp) {
+		kfree(de);
+		de = tmp;
+	} else {
+		list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
+	}
+	*r_nodeid = de->master_nodeid;
+	spin_unlock(&ls->ls_dirtbl[bucket].lock);
+	return 0;
+}
+
+int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
+		   int *r_nodeid)
+{
+	return get_entry(ls, nodeid, name, namelen, r_nodeid);
+}
+
+static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
+{
+	struct dlm_rsb *r;
+
+	down_read(&ls->ls_root_sem);
+	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
+		if (len == r->res_length && !memcmp(name, r->res_name, len)) {
+			up_read(&ls->ls_root_sem);
+			return r;
+		}
+	}
+	up_read(&ls->ls_root_sem);
+	return NULL;
+}
+
+/* Find the rsb where we left off (or start again), then send rsb names
+   for rsb's we're master of and whose directory node matches the requesting
+   node.  inbuf is the rsb name last sent, inlen is the name's length */
+
+void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
+ 			   char *outbuf, int outlen, int nodeid)
+{
+	struct list_head *list;
+	struct dlm_rsb *r;
+	int offset = 0, dir_nodeid;
+	__be16 be_namelen;
+
+	down_read(&ls->ls_root_sem);
+
+	if (inlen > 1) {
+		r = find_rsb_root(ls, inbuf, inlen);
+		if (!r) {
+			inbuf[inlen - 1] = '\0';
+			log_error(ls, "copy_master_names from %d start %d %s",
+				  nodeid, inlen, inbuf);
+			goto out;
+		}
+		list = r->res_root_list.next;
+	} else {
+		list = ls->ls_root_list.next;
+	}
+
+	for (offset = 0; list != &ls->ls_root_list; list = list->next) {
+		r = list_entry(list, struct dlm_rsb, res_root_list);
+		if (r->res_nodeid)
+			continue;
+
+		dir_nodeid = dlm_dir_nodeid(r);
+		if (dir_nodeid != nodeid)
+			continue;
+
+		/*
+		 * The block ends when we can't fit the following in the
+		 * remaining buffer space:
+		 * namelen (uint16_t) +
+		 * name (r->res_length) +
+		 * end-of-block record 0x0000 (uint16_t)
+		 */
+
+		if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
+			/* Write end-of-block record */
+			be_namelen = cpu_to_be16(0);
+			memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+			offset += sizeof(__be16);
+			goto out;
+		}
+
+		be_namelen = cpu_to_be16(r->res_length);
+		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+		offset += sizeof(__be16);
+		memcpy(outbuf + offset, r->res_name, r->res_length);
+		offset += r->res_length;
+	}
+
+	/*
+	 * If we've reached the end of the list (and there's room) write a
+	 * terminating record.
+	 */
+
+	if ((list == &ls->ls_root_list) &&
+	    (offset + sizeof(uint16_t) <= outlen)) {
+		be_namelen = cpu_to_be16(0xFFFF);
+		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
+		offset += sizeof(__be16);
+	}
+
+ out:
+	up_read(&ls->ls_root_sem);
+}
+
--- a/kernel/fs/dlm/dir.h
+++ b/kernel/fs/dlm/dir.h
@@ -0,0 +1,30 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __DIR_DOT_H__
+#define __DIR_DOT_H__
+
+
+int dlm_dir_nodeid(struct dlm_rsb *rsb);
+int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash);
+void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int len);
+void dlm_dir_clear(struct dlm_ls *ls);
+void dlm_clear_free_entries(struct dlm_ls *ls);
+int dlm_recover_directory(struct dlm_ls *ls);
+int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
+	int *r_nodeid);
+void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
+	char *outbuf, int outlen, int nodeid);
+
+#endif				/* __DIR_DOT_H__ */
+
--- a/kernel/fs/dlm/dlm_internal.h
+++ b/kernel/fs/dlm/dlm_internal.h
@@ -0,0 +1,608 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2010 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __DLM_INTERNAL_DOT_H__
+#define __DLM_INTERNAL_DOT_H__
+
+/*
+ * This is the main header file to be included in each DLM source file.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <linux/delay.h>
+#include <linux/socket.h>
+#include <linux/kthread.h>
+#include <linux/kobject.h>
+#include <linux/kref.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+
+#include <linux/dlm.h>
+#include "config.h"
+
+/* Size of the temp buffer midcomms allocates on the stack.
+   We try to make this large enough so most messages fit.
+   FIXME: should sctp make this unnecessary? */
+
+#define DLM_INBUF_LEN		148
+
+struct dlm_ls;
+struct dlm_lkb;
+struct dlm_rsb;
+struct dlm_member;
+struct dlm_lkbtable;
+struct dlm_rsbtable;
+struct dlm_dirtable;
+struct dlm_direntry;
+struct dlm_recover;
+struct dlm_header;
+struct dlm_message;
+struct dlm_rcom;
+struct dlm_mhandle;
+
+#define log_print(fmt, args...) \
+	printk(KERN_ERR "dlm: "fmt"\n" , ##args)
+#define log_error(ls, fmt, args...) \
+	printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
+
+#define log_debug(ls, fmt, args...) \
+do { \
+	if (dlm_config.ci_log_debug) \
+		printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
+		       (ls)->ls_name , ##args); \
+} while (0)
+
+#define DLM_ASSERT(x, do) \
+{ \
+  if (!(x)) \
+  { \
+    printk(KERN_ERR "\nDLM:  Assertion failed on line %d of file %s\n" \
+               "DLM:  assertion:  \"%s\"\n" \
+               "DLM:  time = %lu\n", \
+               __LINE__, __FILE__, #x, jiffies); \
+    {do} \
+    printk("\n"); \
+    BUG(); \
+    panic("DLM:  Record message above and reboot.\n"); \
+  } \
+}
+
+
+struct dlm_direntry {
+	struct list_head	list;
+	uint32_t		master_nodeid;
+	uint16_t		length;
+	char			name[1];
+};
+
+struct dlm_dirtable {
+	struct list_head	list;
+	spinlock_t		lock;
+};
+
+struct dlm_rsbtable {
+	struct list_head	list;
+	struct list_head	toss;
+	spinlock_t		lock;
+};
+
+struct dlm_lkbtable {
+	struct list_head	list;
+	rwlock_t		lock;
+	uint16_t		counter;
+};
+
+/*
+ * Lockspace member (per node in a ls)
+ */
+
+struct dlm_member {
+	struct list_head	list;
+	int			nodeid;
+	int			weight;
+};
+
+/*
+ * Save and manage recovery state for a lockspace.
+ */
+
+struct dlm_recover {
+	struct list_head	list;
+	int			*nodeids;   /* nodeids of all members */
+	int			node_count;
+	int			*new;       /* nodeids of new members */
+	int			new_count;
+	uint64_t		seq;
+};
+
+/*
+ * Pass input args to second stage locking function.
+ */
+
+struct dlm_args {
+	uint32_t		flags;
+	void			(*astfn) (void *astparam);
+	void			*astparam;
+	void			(*bastfn) (void *astparam, int mode);
+	int			mode;
+	struct dlm_lksb		*lksb;
+	unsigned long		timeout;
+};
+
+
+/*
+ * Lock block
+ *
+ * A lock can be one of three types:
+ *
+ * local copy      lock is mastered locally
+ *                 (lkb_nodeid is zero and DLM_LKF_MSTCPY is not set)
+ * process copy    lock is mastered on a remote node
+ *                 (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is not set)
+ * master copy     master node's copy of a lock owned by remote node
+ *                 (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is set)
+ *
+ * lkb_exflags: a copy of the most recent flags arg provided to dlm_lock or
+ * dlm_unlock.  The dlm does not modify these or use any private flags in
+ * this field; it only contains DLM_LKF_ flags from dlm.h.  These flags
+ * are sent as-is to the remote master when the lock is remote.
+ *
+ * lkb_flags: internal dlm flags (DLM_IFL_ prefix) from dlm_internal.h.
+ * Some internal flags are shared between the master and process nodes;
+ * these shared flags are kept in the lower two bytes.  One of these
+ * flags set on the master copy will be propagated to the process copy
+ * and v.v.  Other internal flags are private to the master or process
+ * node (e.g. DLM_IFL_MSTCPY).  These are kept in the high two bytes.
+ *
+ * lkb_sbflags: status block flags.  These flags are copied directly into
+ * the caller's lksb.sb_flags prior to the dlm_lock/dlm_unlock completion
+ * ast.  All defined in dlm.h with DLM_SBF_ prefix.
+ *
+ * lkb_status: the lock status indicates which rsb queue the lock is
+ * on, grant, convert, or wait.  DLM_LKSTS_ WAITING/GRANTED/CONVERT
+ *
+ * lkb_wait_type: the dlm message type (DLM_MSG_ prefix) for which a
+ * reply is needed.  Only set when the lkb is on the lockspace waiters
+ * list awaiting a reply from a remote node.
+ *
+ * lkb_nodeid: when the lkb is a local copy, nodeid is 0; when the lkb
+ * is a master copy, nodeid specifies the remote lock holder, when the
+ * lkb is a process copy, the nodeid specifies the lock master.
+ */
+
+/* lkb_ast_type */
+
+#define AST_COMP		1
+#define AST_BAST		2
+
+/* lkb_status */
+
+#define DLM_LKSTS_WAITING	1
+#define DLM_LKSTS_GRANTED	2
+#define DLM_LKSTS_CONVERT	3
+
+/* lkb_flags */
+
+#define DLM_IFL_MSTCPY		0x00010000
+#define DLM_IFL_RESEND		0x00020000
+#define DLM_IFL_DEAD		0x00040000
+#define DLM_IFL_OVERLAP_UNLOCK  0x00080000
+#define DLM_IFL_OVERLAP_CANCEL  0x00100000
+#define DLM_IFL_ENDOFLIFE	0x00200000
+#define DLM_IFL_WATCH_TIMEWARN	0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL	0x01000000
+#define DLM_IFL_USER		0x00000001
+#define DLM_IFL_ORPHAN		0x00000002
+
+struct dlm_lkb {
+	struct dlm_rsb		*lkb_resource;	/* the rsb */
+	struct kref		lkb_ref;
+	int			lkb_nodeid;	/* copied from rsb */
+	int			lkb_ownpid;	/* pid of lock owner */
+	uint32_t		lkb_id;		/* our lock ID */
+	uint32_t		lkb_remid;	/* lock ID on remote partner */
+	uint32_t		lkb_exflags;	/* external flags from caller */
+	uint32_t		lkb_sbflags;	/* lksb flags */
+	uint32_t		lkb_flags;	/* internal flags */
+	uint32_t		lkb_lvbseq;	/* lvb sequence number */
+
+	int8_t			lkb_status;     /* granted, waiting, convert */
+	int8_t			lkb_rqmode;	/* requested lock mode */
+	int8_t			lkb_grmode;	/* granted lock mode */
+	int8_t			lkb_highbast;	/* highest mode bast sent for */
+
+	int8_t			lkb_wait_type;	/* type of reply waiting for */
+	int8_t			lkb_wait_count;
+	int8_t			lkb_ast_type;	/* type of ast queued for */
+	int8_t			lkb_ast_first;	/* type of first ast queued */
+
+	int8_t			lkb_bastmode;	/* req mode of queued bast */
+	int8_t			lkb_castmode;	/* gr mode of queued cast */
+	int8_t			lkb_bastmode_done; /* last delivered bastmode */
+	int8_t			lkb_castmode_done; /* last delivered castmode */
+
+	struct list_head	lkb_idtbl_list;	/* lockspace lkbtbl */
+	struct list_head	lkb_statequeue;	/* rsb g/c/w list */
+	struct list_head	lkb_rsb_lookup;	/* waiting for rsb lookup */
+	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
+	struct list_head	lkb_astqueue;	/* need ast to be sent */
+	struct list_head	lkb_ownqueue;	/* list of locks for a process */
+	struct list_head	lkb_time_list;
+	ktime_t			lkb_time_bast;	/* for debugging */
+	ktime_t			lkb_timestamp;
+	unsigned long		lkb_timeout_cs;
+
+	char			*lkb_lvbptr;
+	struct dlm_lksb		*lkb_lksb;      /* caller's status block */
+	void			(*lkb_astfn) (void *astparam);
+	void			(*lkb_bastfn) (void *astparam, int mode);
+	union {
+		void			*lkb_astparam;	/* caller's ast arg */
+		struct dlm_user_args	*lkb_ua;
+	};
+};
+
+
+struct dlm_rsb {
+	struct dlm_ls		*res_ls;	/* the lockspace */
+	struct kref		res_ref;
+	struct mutex		res_mutex;
+	unsigned long		res_flags;
+	int			res_length;	/* length of rsb name */
+	int			res_nodeid;
+	uint32_t                res_lvbseq;
+	uint32_t		res_hash;
+	uint32_t		res_bucket;	/* rsbtbl */
+	unsigned long		res_toss_time;
+	uint32_t		res_first_lkid;
+	struct list_head	res_lookup;	/* lkbs waiting on first */
+	struct list_head	res_hashchain;	/* rsbtbl */
+	struct list_head	res_grantqueue;
+	struct list_head	res_convertqueue;
+	struct list_head	res_waitqueue;
+
+	struct list_head	res_root_list;	    /* used for recovery */
+	struct list_head	res_recover_list;   /* used for recovery */
+	int			res_recover_locks_count;
+
+	char			*res_lvbptr;
+	char			res_name[1];
+};
+
+/* find_rsb() flags */
+
+#define R_MASTER		1	/* only return rsb if it's a master */
+#define R_CREATE		2	/* create/add rsb if not found */
+
+/* rsb_flags */
+
+enum rsb_flags {
+	RSB_MASTER_UNCERTAIN,
+	RSB_VALNOTVALID,
+	RSB_VALNOTVALID_PREV,
+	RSB_NEW_MASTER,
+	RSB_NEW_MASTER2,
+	RSB_RECOVER_CONVERT,
+	RSB_LOCKS_PURGED,
+};
+
+static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
+{
+	__set_bit(flag, &r->res_flags);
+}
+
+static inline void rsb_clear_flag(struct dlm_rsb *r, enum rsb_flags flag)
+{
+	__clear_bit(flag, &r->res_flags);
+}
+
+static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
+{
+	return test_bit(flag, &r->res_flags);
+}
+
+
+/* dlm_header is first element of all structs sent between nodes */
+
+#define DLM_HEADER_MAJOR	0x00030000
+#define DLM_HEADER_MINOR	0x00000000
+
+#define DLM_MSG			1
+#define DLM_RCOM		2
+
+struct dlm_header {
+	uint32_t		h_version;
+	uint32_t		h_lockspace;
+	uint32_t		h_nodeid;	/* nodeid of sender */
+	uint16_t		h_length;
+	uint8_t			h_cmd;		/* DLM_MSG, DLM_RCOM */
+	uint8_t			h_pad;
+};
+
+
+#define DLM_MSG_REQUEST		1
+#define DLM_MSG_CONVERT		2
+#define DLM_MSG_UNLOCK		3
+#define DLM_MSG_CANCEL		4
+#define DLM_MSG_REQUEST_REPLY	5
+#define DLM_MSG_CONVERT_REPLY	6
+#define DLM_MSG_UNLOCK_REPLY	7
+#define DLM_MSG_CANCEL_REPLY	8
+#define DLM_MSG_GRANT		9
+#define DLM_MSG_BAST		10
+#define DLM_MSG_LOOKUP		11
+#define DLM_MSG_REMOVE		12
+#define DLM_MSG_LOOKUP_REPLY	13
+#define DLM_MSG_PURGE		14
+
+struct dlm_message {
+	struct dlm_header	m_header;
+	uint32_t		m_type;		/* DLM_MSG_ */
+	uint32_t		m_nodeid;
+	uint32_t		m_pid;
+	uint32_t		m_lkid;		/* lkid on sender */
+	uint32_t		m_remid;	/* lkid on receiver */
+	uint32_t		m_parent_lkid;
+	uint32_t		m_parent_remid;
+	uint32_t		m_exflags;
+	uint32_t		m_sbflags;
+	uint32_t		m_flags;
+	uint32_t		m_lvbseq;
+	uint32_t		m_hash;
+	int			m_status;
+	int			m_grmode;
+	int			m_rqmode;
+	int			m_bastmode;
+	int			m_asts;
+	int			m_result;	/* 0 or -EXXX */
+	char			m_extra[0];	/* name or lvb */
+};
+
+
+#define DLM_RS_NODES		0x00000001
+#define DLM_RS_NODES_ALL	0x00000002
+#define DLM_RS_DIR		0x00000004
+#define DLM_RS_DIR_ALL		0x00000008
+#define DLM_RS_LOCKS		0x00000010
+#define DLM_RS_LOCKS_ALL	0x00000020
+#define DLM_RS_DONE		0x00000040
+#define DLM_RS_DONE_ALL		0x00000080
+
+#define DLM_RCOM_STATUS		1
+#define DLM_RCOM_NAMES		2
+#define DLM_RCOM_LOOKUP		3
+#define DLM_RCOM_LOCK		4
+#define DLM_RCOM_STATUS_REPLY	5
+#define DLM_RCOM_NAMES_REPLY	6
+#define DLM_RCOM_LOOKUP_REPLY	7
+#define DLM_RCOM_LOCK_REPLY	8
+
+struct dlm_rcom {
+	struct dlm_header	rc_header;
+	uint32_t		rc_type;	/* DLM_RCOM_ */
+	int			rc_result;	/* multi-purpose */
+	uint64_t		rc_id;		/* match reply with request */
+	uint64_t		rc_seq;		/* sender's ls_recover_seq */
+	uint64_t		rc_seq_reply;	/* remote ls_recover_seq */
+	char			rc_buf[0];
+};
+
+union dlm_packet {
+	struct dlm_header	header;		/* common to other two */
+	struct dlm_message	message;
+	struct dlm_rcom		rcom;
+};
+
+struct rcom_config {
+	__le32			rf_lvblen;
+	__le32			rf_lsflags;
+	__le64			rf_unused;
+};
+
+struct rcom_lock {
+	__le32			rl_ownpid;
+	__le32			rl_lkid;
+	__le32			rl_remid;
+	__le32			rl_parent_lkid;
+	__le32			rl_parent_remid;
+	__le32			rl_exflags;
+	__le32			rl_flags;
+	__le32			rl_lvbseq;
+	__le32			rl_result;
+	int8_t			rl_rqmode;
+	int8_t			rl_grmode;
+	int8_t			rl_status;
+	int8_t			rl_asts;
+	__le16			rl_wait_type;
+	__le16			rl_namelen;
+	char			rl_name[DLM_RESNAME_MAXLEN];
+	char			rl_lvb[0];
+};
+
+struct dlm_ls {
+	struct list_head	ls_list;	/* list of lockspaces */
+	dlm_lockspace_t		*ls_local_handle;
+	uint32_t		ls_global_id;	/* global unique lockspace ID */
+	uint32_t		ls_exflags;
+	int			ls_lvblen;
+	int			ls_count;	/* refcount of processes in
+						   the dlm using this ls */
+	int			ls_create_count; /* create/release refcount */
+	unsigned long		ls_flags;	/* LSFL_ */
+	unsigned long		ls_scan_time;
+	struct kobject		ls_kobj;
+
+	struct dlm_rsbtable	*ls_rsbtbl;
+	uint32_t		ls_rsbtbl_size;
+
+	struct dlm_lkbtable	*ls_lkbtbl;
+	uint32_t		ls_lkbtbl_size;
+
+	struct dlm_dirtable	*ls_dirtbl;
+	uint32_t		ls_dirtbl_size;
+
+	struct mutex		ls_waiters_mutex;
+	struct list_head	ls_waiters;	/* lkbs needing a reply */
+
+	struct mutex		ls_orphans_mutex;
+	struct list_head	ls_orphans;
+
+	struct mutex		ls_timeout_mutex;
+	struct list_head	ls_timeout;
+
+	struct list_head	ls_nodes;	/* current nodes in ls */
+	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
+	int			ls_num_nodes;	/* number of nodes in ls */
+	int			ls_low_nodeid;
+	int			ls_total_weight;
+	int			*ls_node_array;
+
+	struct dlm_rsb		ls_stub_rsb;	/* for returning errors */
+	struct dlm_lkb		ls_stub_lkb;	/* for returning errors */
+	struct dlm_message	ls_stub_ms;	/* for faking a reply */
+
+	struct dentry		*ls_debug_rsb_dentry; /* debugfs */
+	struct dentry		*ls_debug_waiters_dentry; /* debugfs */
+	struct dentry		*ls_debug_locks_dentry; /* debugfs */
+	struct dentry		*ls_debug_all_dentry; /* debugfs */
+
+	wait_queue_head_t	ls_uevent_wait;	/* user part of join/leave */
+	int			ls_uevent_result;
+	struct completion	ls_members_done;
+	int			ls_members_result;
+
+	struct miscdevice       ls_device;
+
+	/* recovery related */
+
+	struct timer_list	ls_timer;
+	struct task_struct	*ls_recoverd_task;
+	struct mutex		ls_recoverd_active;
+	spinlock_t		ls_recover_lock;
+	unsigned long		ls_recover_begin; /* jiffies timestamp */
+	uint32_t		ls_recover_status; /* DLM_RS_ */
+	uint64_t		ls_recover_seq;
+	struct dlm_recover	*ls_recover_args;
+	struct rw_semaphore	ls_in_recovery;	/* block local requests */
+	struct rw_semaphore	ls_recv_active;	/* block dlm_recv */
+	struct list_head	ls_requestqueue;/* queue remote requests */
+	struct mutex		ls_requestqueue_mutex;
+	struct dlm_rcom		*ls_recover_buf;
+	int			ls_recover_nodeid; /* for debugging */
+	uint64_t		ls_rcom_seq;
+	spinlock_t		ls_rcom_spin;
+	struct list_head	ls_recover_list;
+	spinlock_t		ls_recover_list_lock;
+	int			ls_recover_list_count;
+	wait_queue_head_t	ls_wait_general;
+	struct mutex		ls_clear_proc_locks;
+
+	struct list_head	ls_root_list;	/* root resources */
+	struct rw_semaphore	ls_root_sem;	/* protect root_list */
+
+	int			ls_namelen;
+	char			ls_name[1];
+};
+
+#define LSFL_WORK		0
+#define LSFL_RUNNING		1
+#define LSFL_RECOVERY_STOP	2
+#define LSFL_RCOM_READY		3
+#define LSFL_RCOM_WAIT		4
+#define LSFL_UEVENT_WAIT	5
+#define LSFL_TIMEWARN		6
+
+/* much of this is just saving user space pointers associated with the
+   lock that we pass back to the user lib with an ast */
+
+struct dlm_user_args {
+	struct dlm_user_proc	*proc; /* each process that opens the lockspace
+					  device has private data
+					  (dlm_user_proc) on the struct file,
+					  the process's locks point back to it*/
+	struct dlm_lksb		lksb;
+	int			old_mode;
+	int			update_user_lvb;
+	struct dlm_lksb __user	*user_lksb;
+	void __user		*castparam;
+	void __user		*castaddr;
+	void __user		*bastparam;
+	void __user		*bastaddr;
+	uint64_t		xid;
+};
+
+#define DLM_PROC_FLAGS_CLOSING 1
+#define DLM_PROC_FLAGS_COMPAT  2
+
+/* locks list is kept so we can remove all a process's locks when it
+   exits (or orphan those that are persistent) */
+
+struct dlm_user_proc {
+	dlm_lockspace_t		*lockspace;
+	unsigned long		flags; /* DLM_PROC_FLAGS */
+	struct list_head	asts;
+	spinlock_t		asts_spin;
+	struct list_head	locks;
+	spinlock_t		locks_spin;
+	struct list_head	unlocking;
+	wait_queue_head_t	wait;
+};
+
+static inline int dlm_locking_stopped(struct dlm_ls *ls)
+{
+	return !test_bit(LSFL_RUNNING, &ls->ls_flags);
+}
+
+static inline int dlm_recovery_stopped(struct dlm_ls *ls)
+{
+	return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+}
+
+static inline int dlm_no_directory(struct dlm_ls *ls)
+{
+	return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0;
+}
+
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
+int dlm_plock_init(void);
+void dlm_plock_exit(void);
+
+#ifdef CONFIG_DLM_DEBUG
+int dlm_register_debugfs(void);
+void dlm_unregister_debugfs(void);
+int dlm_create_debug_file(struct dlm_ls *ls);
+void dlm_delete_debug_file(struct dlm_ls *ls);
+#else
+static inline int dlm_register_debugfs(void) { return 0; }
+static inline void dlm_unregister_debugfs(void) { }
+static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
+static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
+#endif
+
+#endif				/* __DLM_INTERNAL_DOT_H__ */
+
--- a/kernel/fs/dlm/lock.c
+++ b/kernel/fs/dlm/lock.c
--- a/kernel/fs/dlm/lock.h
+++ b/kernel/fs/dlm/lock.h
@@ -0,0 +1,69 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __LOCK_DOT_H__
+#define __LOCK_DOT_H__
+
+void dlm_dump_rsb(struct dlm_rsb *r);
+void dlm_print_lkb(struct dlm_lkb *lkb);
+void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms);
+void dlm_receive_buffer(union dlm_packet *p, int nodeid);
+int dlm_modes_compat(int mode1, int mode2);
+void dlm_put_rsb(struct dlm_rsb *r);
+void dlm_hold_rsb(struct dlm_rsb *r);
+int dlm_put_lkb(struct dlm_lkb *lkb);
+void dlm_scan_rsbs(struct dlm_ls *ls);
+int dlm_lock_recovery_try(struct dlm_ls *ls);
+void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_timeout(struct dlm_ls *ls);
+void dlm_adjust_timeouts(struct dlm_ls *ls);
+
+int dlm_purge_locks(struct dlm_ls *ls);
+void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
+void dlm_grant_after_purge(struct dlm_ls *ls);
+int dlm_recover_waiters_post(struct dlm_ls *ls);
+void dlm_recover_waiters_pre(struct dlm_ls *ls);
+int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
+int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
+
+int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
+	uint32_t flags, void *name, unsigned int namelen,
+	unsigned long timeout_cs);
+int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+	int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+	unsigned long timeout_cs);
+int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+	uint32_t flags, uint32_t lkid, char *lvb_in);
+int dlm_user_cancel(struct dlm_ls *ls,  struct dlm_user_args *ua_tmp,
+	uint32_t flags, uint32_t lkid);
+int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
+	int nodeid, int pid);
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
+void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
+
+static inline int is_master(struct dlm_rsb *r)
+{
+	return !r->res_nodeid;
+}
+
+static inline void lock_rsb(struct dlm_rsb *r)
+{
+	mutex_lock(&r->res_mutex);
+}
+
+static inline void unlock_rsb(struct dlm_rsb *r)
+{
+	mutex_unlock(&r->res_mutex);
+}
+
+#endif
+
--- a/kernel/fs/dlm/lockspace.c
+++ b/kernel/fs/dlm/lockspace.c
@@ -0,0 +1,832 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "lockspace.h"
+#include "member.h"
+#include "recoverd.h"
+#include "ast.h"
+#include "dir.h"
+#include "lowcomms.h"
+#include "config.h"
+#include "memory.h"
+#include "lock.h"
+#include "recover.h"
+#include "requestqueue.h"
+#include "user.h"
+
+static int			ls_count;
+static struct mutex		ls_lock;
+static struct list_head		lslist;
+static spinlock_t		lslist_lock;
+static struct task_struct *	scand_task;
+
+
+static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
+{
+	ssize_t ret = len;
+	int n = simple_strtol(buf, NULL, 0);
+
+	ls = dlm_find_lockspace_local(ls->ls_local_handle);
+	if (!ls)
+		return -EINVAL;
+
+	switch (n) {
+	case 0:
+		dlm_ls_stop(ls);
+		break;
+	case 1:
+		dlm_ls_start(ls);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	dlm_put_lockspace(ls);
+	return ret;
+}
+
+static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
+{
+	ls->ls_uevent_result = simple_strtol(buf, NULL, 0);
+	set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
+	wake_up(&ls->ls_uevent_wait);
+	return len;
+}
+
+static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
+}
+
+static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
+{
+	ls->ls_global_id = simple_strtoul(buf, NULL, 0);
+	return len;
+}
+
+static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
+{
+	uint32_t status = dlm_recover_status(ls);
+	return snprintf(buf, PAGE_SIZE, "%x\n", status);
+}
+
+static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
+}
+
+struct dlm_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct dlm_ls *, char *);
+	ssize_t (*store)(struct dlm_ls *, const char *, size_t);
+};
+
+static struct dlm_attr dlm_attr_control = {
+	.attr  = {.name = "control", .mode = S_IWUSR},
+	.store = dlm_control_store
+};
+
+static struct dlm_attr dlm_attr_event = {
+	.attr  = {.name = "event_done", .mode = S_IWUSR},
+	.store = dlm_event_store
+};
+
+static struct dlm_attr dlm_attr_id = {
+	.attr  = {.name = "id", .mode = S_IRUGO | S_IWUSR},
+	.show  = dlm_id_show,
+	.store = dlm_id_store
+};
+
+static struct dlm_attr dlm_attr_recover_status = {
+	.attr  = {.name = "recover_status", .mode = S_IRUGO},
+	.show  = dlm_recover_status_show
+};
+
+static struct dlm_attr dlm_attr_recover_nodeid = {
+	.attr  = {.name = "recover_nodeid", .mode = S_IRUGO},
+	.show  = dlm_recover_nodeid_show
+};
+
+static struct attribute *dlm_attrs[] = {
+	&dlm_attr_control.attr,
+	&dlm_attr_event.attr,
+	&dlm_attr_id.attr,
+	&dlm_attr_recover_status.attr,
+	&dlm_attr_recover_nodeid.attr,
+	NULL,
+};
+
+static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
+			     char *buf)
+{
+	struct dlm_ls *ls  = container_of(kobj, struct dlm_ls, ls_kobj);
+	struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
+	return a->show ? a->show(ls, buf) : 0;
+}
+
+static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
+			      const char *buf, size_t len)
+{
+	struct dlm_ls *ls  = container_of(kobj, struct dlm_ls, ls_kobj);
+	struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
+	return a->store ? a->store(ls, buf, len) : len;
+}
+
+static void lockspace_kobj_release(struct kobject *k)
+{
+	struct dlm_ls *ls  = container_of(k, struct dlm_ls, ls_kobj);
+	kfree(ls);
+}
+
+static struct sysfs_ops dlm_attr_ops = {
+	.show  = dlm_attr_show,
+	.store = dlm_attr_store,
+};
+
+static struct kobj_type dlm_ktype = {
+	.default_attrs = dlm_attrs,
+	.sysfs_ops     = &dlm_attr_ops,
+	.release       = lockspace_kobj_release,
+};
+
+static struct kset *dlm_kset;
+
+static int do_uevent(struct dlm_ls *ls, int in)
+{
+	int error;
+
+	if (in)
+		kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
+	else
+		kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
+
+	log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
+
+	/* dlm_controld will see the uevent, do the necessary group management
+	   and then write to sysfs to wake us */
+
+	error = wait_event_interruptible(ls->ls_uevent_wait,
+			test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+
+	log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
+
+	if (error)
+		goto out;
+
+	error = ls->ls_uevent_result;
+ out:
+	if (error)
+		log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
+			  error, ls->ls_uevent_result);
+	return error;
+}
+
+
+int __init dlm_lockspace_init(void)
+{
+	ls_count = 0;
+	mutex_init(&ls_lock);
+	INIT_LIST_HEAD(&lslist);
+	spin_lock_init(&lslist_lock);
+
+	dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj);
+	if (!dlm_kset) {
+		printk(KERN_WARNING "%s: can not create kset\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void dlm_lockspace_exit(void)
+{
+	kset_unregister(dlm_kset);
+}
+
+static struct dlm_ls *find_ls_to_scan(void)
+{
+	struct dlm_ls *ls;
+
+	spin_lock(&lslist_lock);
+	list_for_each_entry(ls, &lslist, ls_list) {
+		if (time_after_eq(jiffies, ls->ls_scan_time +
+					    dlm_config.ci_scan_secs * HZ)) {
+			spin_unlock(&lslist_lock);
+			return ls;
+		}
+	}
+	spin_unlock(&lslist_lock);
+	return NULL;
+}
+
+static int dlm_scand(void *data)
+{
+	struct dlm_ls *ls;
+	int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
+
+	while (!kthread_should_stop()) {
+		ls = find_ls_to_scan();
+		if (ls) {
+			if (dlm_lock_recovery_try(ls)) {
+				ls->ls_scan_time = jiffies;
+				dlm_scan_rsbs(ls);
+				dlm_scan_timeout(ls);
+				dlm_unlock_recovery(ls);
+			} else {
+				ls->ls_scan_time += HZ;
+			}
+		} else {
+			schedule_timeout_interruptible(timeout_jiffies);
+		}
+	}
+	return 0;
+}
+
+static int dlm_scand_start(void)
+{
+	struct task_struct *p;
+	int error = 0;
+
+	p = kthread_run(dlm_scand, NULL, "dlm_scand");
+	if (IS_ERR(p))
+		error = PTR_ERR(p);
+	else
+		scand_task = p;
+	return error;
+}
+
+static void dlm_scand_stop(void)
+{
+	kthread_stop(scand_task);
+}
+
+struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
+{
+	struct dlm_ls *ls;
+
+	spin_lock(&lslist_lock);
+
+	list_for_each_entry(ls, &lslist, ls_list) {
+		if (ls->ls_global_id == id) {
+			ls->ls_count++;
+			goto out;
+		}
+	}
+	ls = NULL;
+ out:
+	spin_unlock(&lslist_lock);
+	return ls;
+}
+
+struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
+{
+	struct dlm_ls *ls;
+
+	spin_lock(&lslist_lock);
+	list_for_each_entry(ls, &lslist, ls_list) {
+		if (ls->ls_local_handle == lockspace) {
+			ls->ls_count++;
+			goto out;
+		}
+	}
+	ls = NULL;
+ out:
+	spin_unlock(&lslist_lock);
+	return ls;
+}
+
+struct dlm_ls *dlm_find_lockspace_device(int minor)
+{
+	struct dlm_ls *ls;
+
+	spin_lock(&lslist_lock);
+	list_for_each_entry(ls, &lslist, ls_list) {
+		if (ls->ls_device.minor == minor) {
+			ls->ls_count++;
+			goto out;
+		}
+	}
+	ls = NULL;
+ out:
+	spin_unlock(&lslist_lock);
+	return ls;
+}
+
+void dlm_put_lockspace(struct dlm_ls *ls)
+{
+	spin_lock(&lslist_lock);
+	ls->ls_count--;
+	spin_unlock(&lslist_lock);
+}
+
+static void remove_lockspace(struct dlm_ls *ls)
+{
+	for (;;) {
+		spin_lock(&lslist_lock);
+		if (ls->ls_count == 0) {
+			WARN_ON(ls->ls_create_count != 0);
+			list_del(&ls->ls_list);
+			spin_unlock(&lslist_lock);
+			return;
+		}
+		spin_unlock(&lslist_lock);
+		ssleep(1);
+	}
+}
+
+static int threads_start(void)
+{
+	int error;
+
+	/* Thread which process lock requests for all lockspace's */
+	error = dlm_astd_start();
+	if (error) {
+		log_print("cannot start dlm_astd thread %d", error);
+		goto fail;
+	}
+
+	error = dlm_scand_start();
+	if (error) {
+		log_print("cannot start dlm_scand thread %d", error);
+		goto astd_fail;
+	}
+
+	/* Thread for sending/receiving messages for all lockspace's */
+	error = dlm_lowcomms_start();
+	if (error) {
+		log_print("cannot start dlm lowcomms %d", error);
+		goto scand_fail;
+	}
+
+	return 0;
+
+ scand_fail:
+	dlm_scand_stop();
+ astd_fail:
+	dlm_astd_stop();
+ fail:
+	return error;
+}
+
+static void threads_stop(void)
+{
+	dlm_scand_stop();
+	dlm_lowcomms_stop();
+	dlm_astd_stop();
+}
+
+static int new_lockspace(const char *name, int namelen, void **lockspace,
+			 uint32_t flags, int lvblen)
+{
+	struct dlm_ls *ls;
+	int i, size, error;
+	int do_unreg = 0;
+
+	if (namelen > DLM_LOCKSPACE_LEN)
+		return -EINVAL;
+
+	if (!lvblen || (lvblen % 8))
+		return -EINVAL;
+
+	if (!try_module_get(THIS_MODULE))
+		return -EINVAL;
+
+	if (!dlm_user_daemon_available()) {
+		module_put(THIS_MODULE);
+		return -EUNATCH;
+	}
+
+	error = 0;
+
+	spin_lock(&lslist_lock);
+	list_for_each_entry(ls, &lslist, ls_list) {
+		WARN_ON(ls->ls_create_count <= 0);
+		if (ls->ls_namelen != namelen)
+			continue;
+		if (memcmp(ls->ls_name, name, namelen))
+			continue;
+		if (flags & DLM_LSFL_NEWEXCL) {
+			error = -EEXIST;
+			break;
+		}
+		ls->ls_create_count++;
+		*lockspace = ls;
+		error = 1;
+		break;
+	}
+	spin_unlock(&lslist_lock);
+
+	if (error)
+		goto out;
+
+	error = -ENOMEM;
+
+	ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
+	if (!ls)
+		goto out;
+	memcpy(ls->ls_name, name, namelen);
+	ls->ls_namelen = namelen;
+	ls->ls_lvblen = lvblen;
+	ls->ls_count = 0;
+	ls->ls_flags = 0;
+	ls->ls_scan_time = jiffies;
+
+	if (flags & DLM_LSFL_TIMEWARN)
+		set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+
+	/* ls_exflags are forced to match among nodes, and we don't
+	   need to require all nodes to have some flags set */
+	ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
+				    DLM_LSFL_NEWEXCL));
+
+	size = dlm_config.ci_rsbtbl_size;
+	ls->ls_rsbtbl_size = size;
+
+	ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS);
+	if (!ls->ls_rsbtbl)
+		goto out_lsfree;
+	for (i = 0; i < size; i++) {
+		INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list);
+		INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss);
+		spin_lock_init(&ls->ls_rsbtbl[i].lock);
+	}
+
+	size = dlm_config.ci_lkbtbl_size;
+	ls->ls_lkbtbl_size = size;
+
+	ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
+	if (!ls->ls_lkbtbl)
+		goto out_rsbfree;
+	for (i = 0; i < size; i++) {
+		INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
+		rwlock_init(&ls->ls_lkbtbl[i].lock);
+		ls->ls_lkbtbl[i].counter = 1;
+	}
+
+	size = dlm_config.ci_dirtbl_size;
+	ls->ls_dirtbl_size = size;
+
+	ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS);
+	if (!ls->ls_dirtbl)
+		goto out_lkbfree;
+	for (i = 0; i < size; i++) {
+		INIT_LIST_HEAD(&ls->ls_dirtbl[i].list);
+		spin_lock_init(&ls->ls_dirtbl[i].lock);
+	}
+
+	INIT_LIST_HEAD(&ls->ls_waiters);
+	mutex_init(&ls->ls_waiters_mutex);
+	INIT_LIST_HEAD(&ls->ls_orphans);
+	mutex_init(&ls->ls_orphans_mutex);
+	INIT_LIST_HEAD(&ls->ls_timeout);
+	mutex_init(&ls->ls_timeout_mutex);
+
+	INIT_LIST_HEAD(&ls->ls_nodes);
+	INIT_LIST_HEAD(&ls->ls_nodes_gone);
+	ls->ls_num_nodes = 0;
+	ls->ls_low_nodeid = 0;
+	ls->ls_total_weight = 0;
+	ls->ls_node_array = NULL;
+
+	memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
+	ls->ls_stub_rsb.res_ls = ls;
+
+	ls->ls_debug_rsb_dentry = NULL;
+	ls->ls_debug_waiters_dentry = NULL;
+
+	init_waitqueue_head(&ls->ls_uevent_wait);
+	ls->ls_uevent_result = 0;
+	init_completion(&ls->ls_members_done);
+	ls->ls_members_result = -1;
+
+	ls->ls_recoverd_task = NULL;
+	mutex_init(&ls->ls_recoverd_active);
+	spin_lock_init(&ls->ls_recover_lock);
+	spin_lock_init(&ls->ls_rcom_spin);
+	get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
+	ls->ls_recover_status = 0;
+	ls->ls_recover_seq = 0;
+	ls->ls_recover_args = NULL;
+	init_rwsem(&ls->ls_in_recovery);
+	init_rwsem(&ls->ls_recv_active);
+	INIT_LIST_HEAD(&ls->ls_requestqueue);
+	mutex_init(&ls->ls_requestqueue_mutex);
+	mutex_init(&ls->ls_clear_proc_locks);
+
+	ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
+	if (!ls->ls_recover_buf)
+		goto out_dirfree;
+
+	INIT_LIST_HEAD(&ls->ls_recover_list);
+	spin_lock_init(&ls->ls_recover_list_lock);
+	ls->ls_recover_list_count = 0;
+	ls->ls_local_handle = ls;
+	init_waitqueue_head(&ls->ls_wait_general);
+	INIT_LIST_HEAD(&ls->ls_root_list);
+	init_rwsem(&ls->ls_root_sem);
+
+	down_write(&ls->ls_in_recovery);
+
+	spin_lock(&lslist_lock);
+	ls->ls_create_count = 1;
+	list_add(&ls->ls_list, &lslist);
+	spin_unlock(&lslist_lock);
+
+	/* needs to find ls in lslist */
+	error = dlm_recoverd_start(ls);
+	if (error) {
+		log_error(ls, "can't start dlm_recoverd %d", error);
+		goto out_delist;
+	}
+
+	ls->ls_kobj.kset = dlm_kset;
+	error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
+				     "%s", ls->ls_name);
+	if (error)
+		goto out_stop;
+	kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
+
+	/* let kobject handle freeing of ls if there's an error */
+	do_unreg = 1;
+
+	/* This uevent triggers dlm_controld in userspace to add us to the
+	   group of nodes that are members of this lockspace (managed by the
+	   cluster infrastructure.)  Once it's done that, it tells us who the
+	   current lockspace members are (via configfs) and then tells the
+	   lockspace to start running (via sysfs) in dlm_ls_start(). */
+
+	error = do_uevent(ls, 1);
+	if (error)
+		goto out_stop;
+
+	wait_for_completion(&ls->ls_members_done);
+	error = ls->ls_members_result;
+	if (error)
+		goto out_members;
+
+	dlm_create_debug_file(ls);
+
+	log_debug(ls, "join complete");
+	*lockspace = ls;
+	return 0;
+
+ out_members:
+	do_uevent(ls, 0);
+	dlm_clear_members(ls);
+	kfree(ls->ls_node_array);
+ out_stop:
+	dlm_recoverd_stop(ls);
+ out_delist:
+	spin_lock(&lslist_lock);
+	list_del(&ls->ls_list);
+	spin_unlock(&lslist_lock);
+	kfree(ls->ls_recover_buf);
+ out_dirfree:
+	kfree(ls->ls_dirtbl);
+ out_lkbfree:
+	kfree(ls->ls_lkbtbl);
+ out_rsbfree:
+	kfree(ls->ls_rsbtbl);
+ out_lsfree:
+	if (do_unreg)
+		kobject_put(&ls->ls_kobj);
+	else
+		kfree(ls);
+ out:
+	module_put(THIS_MODULE);
+	return error;
+}
+
+int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
+		      uint32_t flags, int lvblen)
+{
+	int error = 0;
+
+	mutex_lock(&ls_lock);
+	if (!ls_count)
+		error = threads_start();
+	if (error)
+		goto out;
+
+	error = new_lockspace(name, namelen, lockspace, flags, lvblen);
+	if (!error)
+		ls_count++;
+	if (error > 0)
+		error = 0;
+	if (!ls_count)
+		threads_stop();
+ out:
+	mutex_unlock(&ls_lock);
+	return error;
+}
+
+/* Return 1 if the lockspace still has active remote locks,
+ *        2 if the lockspace still has active local locks.
+ */
+static int lockspace_busy(struct dlm_ls *ls)
+{
+	int i, lkb_found = 0;
+	struct dlm_lkb *lkb;
+
+	/* NOTE: We check the lockidtbl here rather than the resource table.
+	   This is because there may be LKBs queued as ASTs that have been
+	   unlinked from their RSBs and are pending deletion once the AST has
+	   been delivered */
+
+	for (i = 0; i < ls->ls_lkbtbl_size; i++) {
+		read_lock(&ls->ls_lkbtbl[i].lock);
+		if (!list_empty(&ls->ls_lkbtbl[i].list)) {
+			lkb_found = 1;
+			list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list,
+					    lkb_idtbl_list) {
+				if (!lkb->lkb_nodeid) {
+					read_unlock(&ls->ls_lkbtbl[i].lock);
+					return 2;
+				}
+			}
+		}
+		read_unlock(&ls->ls_lkbtbl[i].lock);
+	}
+	return lkb_found;
+}
+
+static int release_lockspace(struct dlm_ls *ls, int force)
+{
+	struct dlm_lkb *lkb;
+	struct dlm_rsb *rsb;
+	struct list_head *head;
+	int i, busy, rv;
+
+	busy = lockspace_busy(ls);
+
+	spin_lock(&lslist_lock);
+	if (ls->ls_create_count == 1) {
+		if (busy > force)
+			rv = -EBUSY;
+		else {
+			/* remove_lockspace takes ls off lslist */
+			ls->ls_create_count = 0;
+			rv = 0;
+		}
+	} else if (ls->ls_create_count > 1) {
+		rv = --ls->ls_create_count;
+	} else {
+		rv = -EINVAL;
+	}
+	spin_unlock(&lslist_lock);
+
+	if (rv) {
+		log_debug(ls, "release_lockspace no remove %d", rv);
+		return rv;
+	}
+
+	dlm_device_deregister(ls);
+
+	if (force < 3 && dlm_user_daemon_available())
+		do_uevent(ls, 0);
+
+	dlm_recoverd_stop(ls);
+
+	remove_lockspace(ls);
+
+	dlm_delete_debug_file(ls);
+
+	dlm_astd_suspend();
+
+	kfree(ls->ls_recover_buf);
+
+	/*
+	 * Free direntry structs.
+	 */
+
+	dlm_dir_clear(ls);
+	kfree(ls->ls_dirtbl);
+
+	/*
+	 * Free all lkb's on lkbtbl[] lists.
+	 */
+
+	for (i = 0; i < ls->ls_lkbtbl_size; i++) {
+		head = &ls->ls_lkbtbl[i].list;
+		while (!list_empty(head)) {
+			lkb = list_entry(head->next, struct dlm_lkb,
+					 lkb_idtbl_list);
+
+			list_del(&lkb->lkb_idtbl_list);
+
+			dlm_del_ast(lkb);
+
+			if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
+				dlm_free_lvb(lkb->lkb_lvbptr);
+
+			dlm_free_lkb(lkb);
+		}
+	}
+	dlm_astd_resume();
+
+	kfree(ls->ls_lkbtbl);
+
+	/*
+	 * Free all rsb's on rsbtbl[] lists
+	 */
+
+	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
+		head = &ls->ls_rsbtbl[i].list;
+		while (!list_empty(head)) {
+			rsb = list_entry(head->next, struct dlm_rsb,
+					 res_hashchain);
+
+			list_del(&rsb->res_hashchain);
+			dlm_free_rsb(rsb);
+		}
+
+		head = &ls->ls_rsbtbl[i].toss;
+		while (!list_empty(head)) {
+			rsb = list_entry(head->next, struct dlm_rsb,
+					 res_hashchain);
+			list_del(&rsb->res_hashchain);
+			dlm_free_rsb(rsb);
+		}
+	}
+
+	kfree(ls->ls_rsbtbl);
+
+	/*
+	 * Free structures on any other lists
+	 */
+
+	dlm_purge_requestqueue(ls);
+	kfree(ls->ls_recover_args);
+	dlm_clear_free_entries(ls);
+	dlm_clear_members(ls);
+	dlm_clear_members_gone(ls);
+	kfree(ls->ls_node_array);
+	log_debug(ls, "release_lockspace final free");
+	kobject_put(&ls->ls_kobj);
+	/* The ls structure will be freed when the kobject is done with */
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+/*
+ * Called when a system has released all its locks and is not going to use the
+ * lockspace any longer.  We free everything we're managing for this lockspace.
+ * Remaining nodes will go through the recovery process as if we'd died.  The
+ * lockspace must continue to function as usual, participating in recoveries,
+ * until this returns.
+ *
+ * Force has 4 possible values:
+ * 0 - don't destroy locksapce if it has any LKBs
+ * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
+ * 2 - destroy lockspace regardless of LKBs
+ * 3 - destroy lockspace as part of a forced shutdown
+ */
+
+int dlm_release_lockspace(void *lockspace, int force)
+{
+	struct dlm_ls *ls;
+	int error;
+
+	ls = dlm_find_lockspace_local(lockspace);
+	if (!ls)
+		return -EINVAL;
+	dlm_put_lockspace(ls);
+
+	mutex_lock(&ls_lock);
+	error = release_lockspace(ls, force);
+	if (!error)
+		ls_count--;
+	if (!ls_count)
+		threads_stop();
+	mutex_unlock(&ls_lock);
+
+	return error;
+}
+
+void dlm_stop_lockspaces(void)
+{
+	struct dlm_ls *ls;
+
+ restart:
+	spin_lock(&lslist_lock);
+	list_for_each_entry(ls, &lslist, ls_list) {
+		if (!test_bit(LSFL_RUNNING, &ls->ls_flags))
+			continue;
+		spin_unlock(&lslist_lock);
+		log_error(ls, "no userland control daemon, stopping lockspace");
+		dlm_ls_stop(ls);
+		goto restart;
+	}
+	spin_unlock(&lslist_lock);
+}
+
--- a/kernel/fs/dlm/lockspace.h
+++ b/kernel/fs/dlm/lockspace.h
@@ -0,0 +1,26 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __LOCKSPACE_DOT_H__
+#define __LOCKSPACE_DOT_H__
+
+int dlm_lockspace_init(void);
+void dlm_lockspace_exit(void);
+struct dlm_ls *dlm_find_lockspace_global(uint32_t id);
+struct dlm_ls *dlm_find_lockspace_local(void *id);
+struct dlm_ls *dlm_find_lockspace_device(int minor);
+void dlm_put_lockspace(struct dlm_ls *ls);
+void dlm_stop_lockspaces(void);
+
+#endif				/* __LOCKSPACE_DOT_H__ */
+
--- a/kernel/fs/dlm/lowcomms.c
+++ b/kernel/fs/dlm/lowcomms.c
--- a/kernel/fs/dlm/lowcomms.h
+++ b/kernel/fs/dlm/lowcomms.h
@@ -0,0 +1,25 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __LOWCOMMS_DOT_H__
+#define __LOWCOMMS_DOT_H__
+
+int dlm_lowcomms_start(void);
+void dlm_lowcomms_stop(void);
+int dlm_lowcomms_close(int nodeid);
+void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
+void dlm_lowcomms_commit_buffer(void *mh);
+int dlm_lowcomms_connect_node(int nodeid);
+
+#endif				/* __LOWCOMMS_DOT_H__ */
+
--- a/kernel/fs/dlm/lvb_table.h
+++ b/kernel/fs/dlm/lvb_table.h
@@ -0,0 +1,18 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __LVB_TABLE_DOT_H__
+#define __LVB_TABLE_DOT_H__
+
+extern const int dlm_lvb_operations[8][8];
+
+#endif
--- a/kernel/fs/dlm/main.c
+++ b/kernel/fs/dlm/main.c
@@ -0,0 +1,95 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "lockspace.h"
+#include "lock.h"
+#include "user.h"
+#include "memory.h"
+#include "config.h"
+
+static int __init init_dlm(void)
+{
+	int error;
+
+	error = dlm_memory_init();
+	if (error)
+		goto out;
+
+	error = dlm_lockspace_init();
+	if (error)
+		goto out_mem;
+
+	error = dlm_config_init();
+	if (error)
+		goto out_lockspace;
+
+	error = dlm_register_debugfs();
+	if (error)
+		goto out_config;
+
+	error = dlm_user_init();
+	if (error)
+		goto out_debug;
+
+	error = dlm_netlink_init();
+	if (error)
+		goto out_user;
+
+	error = dlm_plock_init();
+	if (error)
+		goto out_netlink;
+
+	printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
+
+	return 0;
+
+ out_netlink:
+	dlm_netlink_exit();
+ out_user:
+	dlm_user_exit();
+ out_debug:
+	dlm_unregister_debugfs();
+ out_config:
+	dlm_config_exit();
+ out_lockspace:
+	dlm_lockspace_exit();
+ out_mem:
+	dlm_memory_exit();
+ out:
+	return error;
+}
+
+static void __exit exit_dlm(void)
+{
+	dlm_plock_exit();
+	dlm_netlink_exit();
+	dlm_user_exit();
+	dlm_config_exit();
+	dlm_memory_exit();
+	dlm_lockspace_exit();
+	dlm_unregister_debugfs();
+}
+
+module_init(init_dlm);
+module_exit(exit_dlm);
+
+MODULE_DESCRIPTION("Distributed Lock Manager");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL_GPL(dlm_new_lockspace);
+EXPORT_SYMBOL_GPL(dlm_release_lockspace);
+EXPORT_SYMBOL_GPL(dlm_lock);
+EXPORT_SYMBOL_GPL(dlm_unlock);
+
--- a/kernel/fs/dlm/member.c
+++ b/kernel/fs/dlm/member.c
@@ -0,0 +1,390 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005-2009 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "lockspace.h"
+#include "member.h"
+#include "recoverd.h"
+#include "recover.h"
+#include "rcom.h"
+#include "config.h"
+#include "lowcomms.h"
+
+static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
+{
+	struct dlm_member *memb = NULL;
+	struct list_head *tmp;
+	struct list_head *newlist = &new->list;
+	struct list_head *head = &ls->ls_nodes;
+
+	list_for_each(tmp, head) {
+		memb = list_entry(tmp, struct dlm_member, list);
+		if (new->nodeid < memb->nodeid)
+			break;
+	}
+
+	if (!memb)
+		list_add_tail(newlist, head);
+	else {
+		/* FIXME: can use list macro here */
+		newlist->prev = tmp->prev;
+		newlist->next = tmp;
+		tmp->prev->next = newlist;
+		tmp->prev = newlist;
+	}
+}
+
+static int dlm_add_member(struct dlm_ls *ls, int nodeid)
+{
+	struct dlm_member *memb;
+	int w, error;
+
+	memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
+	if (!memb)
+		return -ENOMEM;
+
+	w = dlm_node_weight(ls->ls_name, nodeid);
+	if (w < 0) {
+		kfree(memb);
+		return w;
+	}
+
+	error = dlm_lowcomms_connect_node(nodeid);
+	if (error < 0) {
+		kfree(memb);
+		return error;
+	}
+
+	memb->nodeid = nodeid;
+	memb->weight = w;
+	add_ordered_member(ls, memb);
+	ls->ls_num_nodes++;
+	return 0;
+}
+
+static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb)
+{
+	list_move(&memb->list, &ls->ls_nodes_gone);
+	ls->ls_num_nodes--;
+}
+
+int dlm_is_member(struct dlm_ls *ls, int nodeid)
+{
+	struct dlm_member *memb;
+
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		if (memb->nodeid == nodeid)
+			return 1;
+	}
+	return 0;
+}
+
+int dlm_is_removed(struct dlm_ls *ls, int nodeid)
+{
+	struct dlm_member *memb;
+
+	list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
+		if (memb->nodeid == nodeid)
+			return 1;
+	}
+	return 0;
+}
+
+static void clear_memb_list(struct list_head *head)
+{
+	struct dlm_member *memb;
+
+	while (!list_empty(head)) {
+		memb = list_entry(head->next, struct dlm_member, list);
+		list_del(&memb->list);
+		kfree(memb);
+	}
+}
+
+void dlm_clear_members(struct dlm_ls *ls)
+{
+	clear_memb_list(&ls->ls_nodes);
+	ls->ls_num_nodes = 0;
+}
+
+void dlm_clear_members_gone(struct dlm_ls *ls)
+{
+	clear_memb_list(&ls->ls_nodes_gone);
+}
+
+static void make_member_array(struct dlm_ls *ls)
+{
+	struct dlm_member *memb;
+	int i, w, x = 0, total = 0, all_zero = 0, *array;
+
+	kfree(ls->ls_node_array);
+	ls->ls_node_array = NULL;
+
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		if (memb->weight)
+			total += memb->weight;
+	}
+
+	/* all nodes revert to weight of 1 if all have weight 0 */
+
+	if (!total) {
+		total = ls->ls_num_nodes;
+		all_zero = 1;
+	}
+
+	ls->ls_total_weight = total;
+
+	array = kmalloc(sizeof(int) * total, GFP_NOFS);
+	if (!array)
+		return;
+
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		if (!all_zero && !memb->weight)
+			continue;
+
+		if (all_zero)
+			w = 1;
+		else
+			w = memb->weight;
+
+		DLM_ASSERT(x < total, printk("total %d x %d\n", total, x););
+
+		for (i = 0; i < w; i++)
+			array[x++] = memb->nodeid;
+	}
+
+	ls->ls_node_array = array;
+}
+
+/* send a status request to all members just to establish comms connections */
+
+static int ping_members(struct dlm_ls *ls)
+{
+	struct dlm_member *memb;
+	int error = 0;
+
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		error = dlm_recovery_stopped(ls);
+		if (error)
+			break;
+		error = dlm_rcom_status(ls, memb->nodeid);
+		if (error)
+			break;
+	}
+	if (error)
+		log_debug(ls, "ping_members aborted %d last nodeid %d",
+			  error, ls->ls_recover_nodeid);
+	return error;
+}
+
+int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
+{
+	struct dlm_member *memb, *safe;
+	int i, error, found, pos = 0, neg = 0, low = -1;
+
+	/* previously removed members that we've not finished removing need to
+	   count as a negative change so the "neg" recovery steps will happen */
+
+	list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
+		log_debug(ls, "prev removed member %d", memb->nodeid);
+		neg++;
+	}
+
+	/* move departed members from ls_nodes to ls_nodes_gone */
+
+	list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
+		found = 0;
+		for (i = 0; i < rv->node_count; i++) {
+			if (memb->nodeid == rv->nodeids[i]) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (!found) {
+			neg++;
+			dlm_remove_member(ls, memb);
+			log_debug(ls, "remove member %d", memb->nodeid);
+		}
+	}
+
+	/* Add an entry to ls_nodes_gone for members that were removed and
+	   then added again, so that previous state for these nodes will be
+	   cleared during recovery. */
+
+	for (i = 0; i < rv->new_count; i++) {
+		if (!dlm_is_member(ls, rv->new[i]))
+			continue;
+		log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
+
+		memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
+		if (!memb)
+			return -ENOMEM;
+		memb->nodeid = rv->new[i];
+		list_add_tail(&memb->list, &ls->ls_nodes_gone);
+		neg++;
+	}
+
+	/* add new members to ls_nodes */
+
+	for (i = 0; i < rv->node_count; i++) {
+		if (dlm_is_member(ls, rv->nodeids[i]))
+			continue;
+		dlm_add_member(ls, rv->nodeids[i]);
+		pos++;
+		log_debug(ls, "add member %d", rv->nodeids[i]);
+	}
+
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		if (low == -1 || memb->nodeid < low)
+			low = memb->nodeid;
+	}
+	ls->ls_low_nodeid = low;
+
+	make_member_array(ls);
+	dlm_set_recover_status(ls, DLM_RS_NODES);
+	*neg_out = neg;
+
+	error = ping_members(ls);
+	if (!error || error == -EPROTO) {
+		/* new_lockspace() may be waiting to know if the config
+		   is good or bad */
+		ls->ls_members_result = error;
+		complete(&ls->ls_members_done);
+	}
+	if (error)
+		goto out;
+
+	error = dlm_recover_members_wait(ls);
+ out:
+	log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error);
+	return error;
+}
+
+/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
+   dlm_ls_start() is called on any of them to start the new recovery. */
+
+int dlm_ls_stop(struct dlm_ls *ls)
+{
+	int new;
+
+	/*
+	 * Prevent dlm_recv from being in the middle of something when we do
+	 * the stop.  This includes ensuring dlm_recv isn't processing a
+	 * recovery message (rcom), while dlm_recoverd is aborting and
+	 * resetting things from an in-progress recovery.  i.e. we want
+	 * dlm_recoverd to abort its recovery without worrying about dlm_recv
+	 * processing an rcom at the same time.  Stopping dlm_recv also makes
+	 * it easy for dlm_receive_message() to check locking stopped and add a
+	 * message to the requestqueue without races.
+	 */
+
+	down_write(&ls->ls_recv_active);
+
+	/*
+	 * Abort any recovery that's in progress (see RECOVERY_STOP,
+	 * dlm_recovery_stopped()) and tell any other threads running in the
+	 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
+	 */
+
+	spin_lock(&ls->ls_recover_lock);
+	set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+	new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
+	ls->ls_recover_seq++;
+	spin_unlock(&ls->ls_recover_lock);
+
+	/*
+	 * Let dlm_recv run again, now any normal messages will be saved on the
+	 * requestqueue for later.
+	 */
+
+	up_write(&ls->ls_recv_active);
+
+	/*
+	 * This in_recovery lock does two things:
+	 * 1) Keeps this function from returning until all threads are out
+	 *    of locking routines and locking is truely stopped.
+	 * 2) Keeps any new requests from being processed until it's unlocked
+	 *    when recovery is complete.
+	 */
+
+	if (new)
+		down_write(&ls->ls_in_recovery);
+
+	/*
+	 * The recoverd suspend/resume makes sure that dlm_recoverd (if
+	 * running) has noticed RECOVERY_STOP above and quit processing the
+	 * previous recovery.
+	 */
+
+	dlm_recoverd_suspend(ls);
+	ls->ls_recover_status = 0;
+	dlm_recoverd_resume(ls);
+
+	if (!ls->ls_recover_begin)
+		ls->ls_recover_begin = jiffies;
+	return 0;
+}
+
+int dlm_ls_start(struct dlm_ls *ls)
+{
+	struct dlm_recover *rv = NULL, *rv_old;
+	int *ids = NULL, *new = NULL;
+	int error, ids_count = 0, new_count = 0;
+
+	rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
+	if (!rv)
+		return -ENOMEM;
+
+	error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count,
+				&new, &new_count);
+	if (error < 0)
+		goto fail;
+
+	spin_lock(&ls->ls_recover_lock);
+
+	/* the lockspace needs to be stopped before it can be started */
+
+	if (!dlm_locking_stopped(ls)) {
+		spin_unlock(&ls->ls_recover_lock);
+		log_error(ls, "start ignored: lockspace running");
+		error = -EINVAL;
+		goto fail;
+	}
+
+	rv->nodeids = ids;
+	rv->node_count = ids_count;
+	rv->new = new;
+	rv->new_count = new_count;
+	rv->seq = ++ls->ls_recover_seq;
+	rv_old = ls->ls_recover_args;
+	ls->ls_recover_args = rv;
+	spin_unlock(&ls->ls_recover_lock);
+
+	if (rv_old) {
+		log_error(ls, "unused recovery %llx %d",
+			  (unsigned long long)rv_old->seq, rv_old->node_count);
+		kfree(rv_old->nodeids);
+		kfree(rv_old->new);
+		kfree(rv_old);
+	}
+
+	dlm_recoverd_kick(ls);
+	return 0;
+
+ fail:
+	kfree(rv);
+	kfree(ids);
+	kfree(new);
+	return error;
+}
+
--- a/kernel/fs/dlm/member.h
+++ b/kernel/fs/dlm/member.h
@@ -0,0 +1,25 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __MEMBER_DOT_H__
+#define __MEMBER_DOT_H__
+
+int dlm_ls_stop(struct dlm_ls *ls);
+int dlm_ls_start(struct dlm_ls *ls);
+void dlm_clear_members(struct dlm_ls *ls);
+void dlm_clear_members_gone(struct dlm_ls *ls);
+int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out);
+int dlm_is_removed(struct dlm_ls *ls, int nodeid);
+int dlm_is_member(struct dlm_ls *ls, int nodeid);
+
+#endif                          /* __MEMBER_DOT_H__ */
+
--- a/kernel/fs/dlm/memory.c
+++ b/kernel/fs/dlm/memory.c
@@ -0,0 +1,92 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "config.h"
+#include "memory.h"
+
+static struct kmem_cache *lkb_cache;
+
+
+int __init dlm_memory_init(void)
+{
+	int ret = 0;
+
+	lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb),
+				__alignof__(struct dlm_lkb), 0, NULL);
+	if (!lkb_cache)
+		ret = -ENOMEM;
+	return ret;
+}
+
+void dlm_memory_exit(void)
+{
+	if (lkb_cache)
+		kmem_cache_destroy(lkb_cache);
+}
+
+char *dlm_allocate_lvb(struct dlm_ls *ls)
+{
+	char *p;
+
+	p = kzalloc(ls->ls_lvblen, GFP_NOFS);
+	return p;
+}
+
+void dlm_free_lvb(char *p)
+{
+	kfree(p);
+}
+
+/* FIXME: have some minimal space built-in to rsb for the name and
+   kmalloc a separate name if needed, like dentries are done */
+
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
+{
+	struct dlm_rsb *r;
+
+	DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
+
+	r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
+	return r;
+}
+
+void dlm_free_rsb(struct dlm_rsb *r)
+{
+	if (r->res_lvbptr)
+		dlm_free_lvb(r->res_lvbptr);
+	kfree(r);
+}
+
+struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
+{
+	struct dlm_lkb *lkb;
+
+	lkb = kmem_cache_zalloc(lkb_cache, GFP_NOFS);
+	return lkb;
+}
+
+void dlm_free_lkb(struct dlm_lkb *lkb)
+{
+	if (lkb->lkb_flags & DLM_IFL_USER) {
+		struct dlm_user_args *ua;
+		ua = lkb->lkb_ua;
+		if (ua) {
+			if (ua->lksb.sb_lvbptr)
+				kfree(ua->lksb.sb_lvbptr);
+			kfree(ua);
+		}
+	}
+	kmem_cache_free(lkb_cache, lkb);
+}
+
--- a/kernel/fs/dlm/memory.h
+++ b/kernel/fs/dlm/memory.h
@@ -0,0 +1,27 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __MEMORY_DOT_H__
+#define __MEMORY_DOT_H__
+
+int dlm_memory_init(void);
+void dlm_memory_exit(void);
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen);
+void dlm_free_rsb(struct dlm_rsb *r);
+struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
+void dlm_free_lkb(struct dlm_lkb *l);
+char *dlm_allocate_lvb(struct dlm_ls *ls);
+void dlm_free_lvb(char *l);
+
+#endif		/* __MEMORY_DOT_H__ */
+
--- a/kernel/fs/dlm/midcomms.c
+++ b/kernel/fs/dlm/midcomms.c
@@ -0,0 +1,137 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/*
+ * midcomms.c
+ *
+ * This is the appallingly named "mid-level" comms layer.
+ *
+ * Its purpose is to take packets from the "real" comms layer,
+ * split them up into packets and pass them to the interested
+ * part of the locking mechanism.
+ *
+ * It also takes messages from the locking layer, formats them
+ * into packets and sends them to the comms layer.
+ */
+
+#include "dlm_internal.h"
+#include "lowcomms.h"
+#include "config.h"
+#include "lock.h"
+#include "midcomms.h"
+
+
+static void copy_from_cb(void *dst, const void *base, unsigned offset,
+			 unsigned len, unsigned limit)
+{
+	unsigned copy = len;
+
+	if ((copy + offset) > limit)
+		copy = limit - offset;
+	memcpy(dst, base + offset, copy);
+	len -= copy;
+	if (len)
+		memcpy(dst + copy, base, len);
+}
+
+/*
+ * Called from the low-level comms layer to process a buffer of
+ * commands.
+ *
+ * Only complete messages are processed here, any "spare" bytes from
+ * the end of a buffer are saved and tacked onto the front of the next
+ * message that comes in. I doubt this will happen very often but we
+ * need to be able to cope with it and I don't want the task to be waiting
+ * for packets to come in when there is useful work to be done.
+ */
+
+int dlm_process_incoming_buffer(int nodeid, const void *base,
+				unsigned offset, unsigned len, unsigned limit)
+{
+	union {
+		unsigned char __buf[DLM_INBUF_LEN];
+		/* this is to force proper alignment on some arches */
+		union dlm_packet p;
+	} __tmp;
+	union dlm_packet *p = &__tmp.p;
+	int ret = 0;
+	int err = 0;
+	uint16_t msglen;
+	uint32_t lockspace;
+
+	while (len > sizeof(struct dlm_header)) {
+
+		/* Copy just the header to check the total length.  The
+		   message may wrap around the end of the buffer back to the
+		   start, so we need to use a temp buffer and copy_from_cb. */
+
+		copy_from_cb(p, base, offset, sizeof(struct dlm_header),
+			     limit);
+
+		msglen = le16_to_cpu(p->header.h_length);
+		lockspace = p->header.h_lockspace;
+
+		err = -EINVAL;
+		if (msglen < sizeof(struct dlm_header))
+			break;
+		if (p->header.h_cmd == DLM_MSG) {
+			if (msglen < sizeof(struct dlm_message))
+				break;
+		} else {
+			if (msglen < sizeof(struct dlm_rcom))
+				break;
+		}
+		err = -E2BIG;
+		if (msglen > dlm_config.ci_buffer_size) {
+			log_print("message size %d from %d too big, buf len %d",
+				  msglen, nodeid, len);
+			break;
+		}
+		err = 0;
+
+		/* If only part of the full message is contained in this
+		   buffer, then do nothing and wait for lowcomms to call
+		   us again later with more data.  We return 0 meaning
+		   we've consumed none of the input buffer. */
+
+		if (msglen > len)
+			break;
+
+		/* Allocate a larger temp buffer if the full message won't fit
+		   in the buffer on the stack (which should work for most
+		   ordinary messages). */
+
+		if (msglen > sizeof(__tmp) && p == &__tmp.p) {
+			p = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
+			if (p == NULL)
+				return ret;
+		}
+
+		copy_from_cb(p, base, offset, msglen, limit);
+
+		BUG_ON(lockspace != p->header.h_lockspace);
+
+		ret += msglen;
+		offset += msglen;
+		offset &= (limit - 1);
+		len -= msglen;
+
+		dlm_receive_buffer(p, nodeid);
+	}
+
+	if (p != &__tmp.p)
+		kfree(p);
+
+	return err ? err : ret;
+}
+
--- a/kernel/fs/dlm/midcomms.h
+++ b/kernel/fs/dlm/midcomms.h
@@ -0,0 +1,21 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __MIDCOMMS_DOT_H__
+#define __MIDCOMMS_DOT_H__
+
+int dlm_process_incoming_buffer(int nodeid, const void *base, unsigned offset,
+				unsigned len, unsigned limit);
+
+#endif				/* __MIDCOMMS_DOT_H__ */
+
--- a/kernel/fs/dlm/netlink.c
+++ b/kernel/fs/dlm/netlink.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <net/genetlink.h>
+#include <linux/dlm.h>
+#include <linux/dlm_netlink.h>
+
+#include "dlm_internal.h"
+
+static uint32_t dlm_nl_seqnum;
+static uint32_t listener_nlpid;
+
+static struct genl_family family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= DLM_GENL_NAME,
+	.version	= DLM_GENL_VERSION,
+};
+
+static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+{
+	struct sk_buff *skb;
+	void *data;
+
+	skb = genlmsg_new(size, GFP_NOFS);
+	if (!skb)
+		return -ENOMEM;
+
+	/* add the message headers */
+	data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
+	if (!data) {
+		nlmsg_free(skb);
+		return -EINVAL;
+	}
+
+	*skbp = skb;
+	return 0;
+}
+
+static struct dlm_lock_data *mk_data(struct sk_buff *skb)
+{
+	struct nlattr *ret;
+
+	ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
+	if (!ret)
+		return NULL;
+	return nla_data(ret);
+}
+
+static int send_data(struct sk_buff *skb)
+{
+	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+	void *data = genlmsg_data(genlhdr);
+	int rv;
+
+	rv = genlmsg_end(skb, data);
+	if (rv < 0) {
+		nlmsg_free(skb);
+		return rv;
+	}
+
+	return genlmsg_unicast(&init_net, skb, listener_nlpid);
+}
+
+static int user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	listener_nlpid = info->snd_pid;
+	printk("user_cmd nlpid %u\n", listener_nlpid);
+	return 0;
+}
+
+static struct genl_ops dlm_nl_ops = {
+	.cmd		= DLM_CMD_HELLO,
+	.doit		= user_cmd,
+};
+
+int __init dlm_netlink_init(void)
+{
+	int rv;
+
+	rv = genl_register_family(&family);
+	if (rv)
+		return rv;
+
+	rv = genl_register_ops(&family, &dlm_nl_ops);
+	if (rv < 0)
+		goto err;
+	return 0;
+ err:
+	genl_unregister_family(&family);
+	return rv;
+}
+
+void dlm_netlink_exit(void)
+{
+	genl_unregister_ops(&family, &dlm_nl_ops);
+	genl_unregister_family(&family);
+}
+
+static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
+{
+	struct dlm_rsb *r = lkb->lkb_resource;
+
+	memset(data, 0, sizeof(struct dlm_lock_data));
+
+	data->version = DLM_LOCK_DATA_VERSION;
+	data->nodeid = lkb->lkb_nodeid;
+	data->ownpid = lkb->lkb_ownpid;
+	data->id = lkb->lkb_id;
+	data->remid = lkb->lkb_remid;
+	data->status = lkb->lkb_status;
+	data->grmode = lkb->lkb_grmode;
+	data->rqmode = lkb->lkb_rqmode;
+	if (lkb->lkb_ua)
+		data->xid = lkb->lkb_ua->xid;
+	if (r) {
+		data->lockspace_id = r->res_ls->ls_global_id;
+		data->resource_namelen = r->res_length;
+		memcpy(data->resource_name, r->res_name, r->res_length);
+	}
+}
+
+void dlm_timeout_warn(struct dlm_lkb *lkb)
+{
+	struct sk_buff *uninitialized_var(send_skb);
+	struct dlm_lock_data *data;
+	size_t size;
+	int rv;
+
+	size = nla_total_size(sizeof(struct dlm_lock_data)) +
+	       nla_total_size(0); /* why this? */
+
+	rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
+	if (rv < 0)
+		return;
+
+	data = mk_data(send_skb);
+	if (!data) {
+		nlmsg_free(send_skb);
+		return;
+	}
+
+	fill_data(data, lkb);
+
+	send_data(send_skb);
+}
+
--- a/kernel/fs/dlm/plock.c
+++ b/kernel/fs/dlm/plock.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/poll.h>
+#include <linux/dlm.h>
+#include <linux/dlm_plock.h>
+
+#include "dlm_internal.h"
+#include "lockspace.h"
+
+static spinlock_t ops_lock;
+static struct list_head send_list;
+static struct list_head recv_list;
+static wait_queue_head_t send_wq;
+static wait_queue_head_t recv_wq;
+
+struct plock_op {
+	struct list_head list;
+	int done;
+	struct dlm_plock_info info;
+};
+
+struct plock_xop {
+	struct plock_op xop;
+	void *callback;
+	void *fl;
+	void *file;
+	struct file_lock flc;
+};
+
+
+static inline void set_version(struct dlm_plock_info *info)
+{
+	info->version[0] = DLM_PLOCK_VERSION_MAJOR;
+	info->version[1] = DLM_PLOCK_VERSION_MINOR;
+	info->version[2] = DLM_PLOCK_VERSION_PATCH;
+}
+
+static int check_version(struct dlm_plock_info *info)
+{
+	if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
+	    (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
+		log_print("plock device version mismatch: "
+			  "kernel (%u.%u.%u), user (%u.%u.%u)",
+			  DLM_PLOCK_VERSION_MAJOR,
+			  DLM_PLOCK_VERSION_MINOR,
+			  DLM_PLOCK_VERSION_PATCH,
+			  info->version[0],
+			  info->version[1],
+			  info->version[2]);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void send_op(struct plock_op *op)
+{
+	set_version(&op->info);
+	INIT_LIST_HEAD(&op->list);
+	spin_lock(&ops_lock);
+	list_add_tail(&op->list, &send_list);
+	spin_unlock(&ops_lock);
+	wake_up(&send_wq);
+}
+
+int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+		   int cmd, struct file_lock *fl)
+{
+	struct dlm_ls *ls;
+	struct plock_op *op;
+	struct plock_xop *xop;
+	int rv;
+
+	ls = dlm_find_lockspace_local(lockspace);
+	if (!ls)
+		return -EINVAL;
+
+	xop = kzalloc(sizeof(*xop), GFP_NOFS);
+	if (!xop) {
+		rv = -ENOMEM;
+		goto out;
+	}
+
+	op = &xop->xop;
+	op->info.optype		= DLM_PLOCK_OP_LOCK;
+	op->info.pid		= fl->fl_pid;
+	op->info.ex		= (fl->fl_type == F_WRLCK);
+	op->info.wait		= IS_SETLKW(cmd);
+	op->info.fsid		= ls->ls_global_id;
+	op->info.number		= number;
+	op->info.start		= fl->fl_start;
+	op->info.end		= fl->fl_end;
+	if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+		/* fl_owner is lockd which doesn't distinguish
+		   processes on the nfs client */
+		op->info.owner	= (__u64) fl->fl_pid;
+		xop->callback	= fl->fl_lmops->fl_grant;
+		locks_init_lock(&xop->flc);
+		locks_copy_lock(&xop->flc, fl);
+		xop->fl		= fl;
+		xop->file	= file;
+	} else {
+		op->info.owner	= (__u64)(long) fl->fl_owner;
+		xop->callback	= NULL;
+	}
+
+	send_op(op);
+
+	if (xop->callback == NULL)
+		wait_event(recv_wq, (op->done != 0));
+	else {
+		rv = FILE_LOCK_DEFERRED;
+		goto out;
+	}
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		log_error(ls, "dlm_posix_lock: op on list %llx",
+			  (unsigned long long)number);
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	rv = op->info.rv;
+
+	if (!rv) {
+		if (posix_lock_file_wait(file, fl) < 0)
+			log_error(ls, "dlm_posix_lock: vfs lock error %llx",
+				  (unsigned long long)number);
+	}
+
+	kfree(xop);
+out:
+	dlm_put_lockspace(ls);
+	return rv;
+}
+EXPORT_SYMBOL_GPL(dlm_posix_lock);
+
+/* Returns failure iff a succesful lock operation should be canceled */
+static int dlm_plock_callback(struct plock_op *op)
+{
+	struct file *file;
+	struct file_lock *fl;
+	struct file_lock *flc;
+	int (*notify)(void *, void *, int) = NULL;
+	struct plock_xop *xop = (struct plock_xop *)op;
+	int rv = 0;
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		log_print("dlm_plock_callback: op on list %llx",
+			  (unsigned long long)op->info.number);
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	/* check if the following 2 are still valid or make a copy */
+	file = xop->file;
+	flc = &xop->flc;
+	fl = xop->fl;
+	notify = xop->callback;
+
+	if (op->info.rv) {
+		notify(fl, NULL, op->info.rv);
+		goto out;
+	}
+
+	/* got fs lock; bookkeep locally as well: */
+	flc->fl_flags &= ~FL_SLEEP;
+	if (posix_lock_file(file, flc, NULL)) {
+		/*
+		 * This can only happen in the case of kmalloc() failure.
+		 * The filesystem's own lock is the authoritative lock,
+		 * so a failure to get the lock locally is not a disaster.
+		 * As long as the fs cannot reliably cancel locks (especially
+		 * in a low-memory situation), we're better off ignoring
+		 * this failure than trying to recover.
+		 */
+		log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
+			  (unsigned long long)op->info.number, file, fl);
+	}
+
+	rv = notify(fl, NULL, 0);
+	if (rv) {
+		/* XXX: We need to cancel the fs lock here: */
+		log_print("dlm_plock_callback: lock granted after lock request "
+			  "failed; dangling lock!\n");
+		goto out;
+	}
+
+out:
+	kfree(xop);
+	return rv;
+}
+
+int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+		     struct file_lock *fl)
+{
+	struct dlm_ls *ls;
+	struct plock_op *op;
+	int rv;
+
+	ls = dlm_find_lockspace_local(lockspace);
+	if (!ls)
+		return -EINVAL;
+
+	op = kzalloc(sizeof(*op), GFP_NOFS);
+	if (!op) {
+		rv = -ENOMEM;
+		goto out;
+	}
+
+	if (posix_lock_file_wait(file, fl) < 0)
+		log_error(ls, "dlm_posix_unlock: vfs unlock error %llx",
+			  (unsigned long long)number);
+
+	op->info.optype		= DLM_PLOCK_OP_UNLOCK;
+	op->info.pid		= fl->fl_pid;
+	op->info.fsid		= ls->ls_global_id;
+	op->info.number		= number;
+	op->info.start		= fl->fl_start;
+	op->info.end		= fl->fl_end;
+	if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+		op->info.owner	= (__u64) fl->fl_pid;
+	else
+		op->info.owner	= (__u64)(long) fl->fl_owner;
+
+	send_op(op);
+	wait_event(recv_wq, (op->done != 0));
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		log_error(ls, "dlm_posix_unlock: op on list %llx",
+			  (unsigned long long)number);
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	rv = op->info.rv;
+
+	if (rv == -ENOENT)
+		rv = 0;
+
+	kfree(op);
+out:
+	dlm_put_lockspace(ls);
+	return rv;
+}
+EXPORT_SYMBOL_GPL(dlm_posix_unlock);
+
+int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+		  struct file_lock *fl)
+{
+	struct dlm_ls *ls;
+	struct plock_op *op;
+	int rv;
+
+	ls = dlm_find_lockspace_local(lockspace);
+	if (!ls)
+		return -EINVAL;
+
+	op = kzalloc(sizeof(*op), GFP_NOFS);
+	if (!op) {
+		rv = -ENOMEM;
+		goto out;
+	}
+
+	op->info.optype		= DLM_PLOCK_OP_GET;
+	op->info.pid		= fl->fl_pid;
+	op->info.ex		= (fl->fl_type == F_WRLCK);
+	op->info.fsid		= ls->ls_global_id;
+	op->info.number		= number;
+	op->info.start		= fl->fl_start;
+	op->info.end		= fl->fl_end;
+	if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+		op->info.owner	= (__u64) fl->fl_pid;
+	else
+		op->info.owner	= (__u64)(long) fl->fl_owner;
+
+	send_op(op);
+	wait_event(recv_wq, (op->done != 0));
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		log_error(ls, "dlm_posix_get: op on list %llx",
+			  (unsigned long long)number);
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
+	   -ENOENT if there are no locks on the file */
+
+	rv = op->info.rv;
+
+	fl->fl_type = F_UNLCK;
+	if (rv == -ENOENT)
+		rv = 0;
+	else if (rv > 0) {
+		locks_init_lock(fl);
+		fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
+		fl->fl_flags = FL_POSIX;
+		fl->fl_pid = op->info.pid;
+		fl->fl_start = op->info.start;
+		fl->fl_end = op->info.end;
+		rv = 0;
+	}
+
+	kfree(op);
+out:
+	dlm_put_lockspace(ls);
+	return rv;
+}
+EXPORT_SYMBOL_GPL(dlm_posix_get);
+
+/* a read copies out one plock request from the send list */
+static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+			loff_t *ppos)
+{
+	struct dlm_plock_info info;
+	struct plock_op *op = NULL;
+
+	if (count < sizeof(info))
+		return -EINVAL;
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&send_list)) {
+		op = list_entry(send_list.next, struct plock_op, list);
+		list_move(&op->list, &recv_list);
+		memcpy(&info, &op->info, sizeof(info));
+	}
+	spin_unlock(&ops_lock);
+
+	if (!op)
+		return -EAGAIN;
+
+	if (copy_to_user(u, &info, sizeof(info)))
+		return -EFAULT;
+	return sizeof(info);
+}
+
+/* a write copies in one plock result that should match a plock_op
+   on the recv list */
+static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
+			 loff_t *ppos)
+{
+	struct dlm_plock_info info;
+	struct plock_op *op;
+	int found = 0, do_callback = 0;
+
+	if (count != sizeof(info))
+		return -EINVAL;
+
+	if (copy_from_user(&info, u, sizeof(info)))
+		return -EFAULT;
+
+	if (check_version(&info))
+		return -EINVAL;
+
+	spin_lock(&ops_lock);
+	list_for_each_entry(op, &recv_list, list) {
+		if (op->info.fsid == info.fsid &&
+		    op->info.number == info.number &&
+		    op->info.owner == info.owner) {
+			struct plock_xop *xop = (struct plock_xop *)op;
+			list_del_init(&op->list);
+			memcpy(&op->info, &info, sizeof(info));
+			if (xop->callback)
+				do_callback = 1;
+			else
+				op->done = 1;
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock(&ops_lock);
+
+	if (found) {
+		if (do_callback)
+			dlm_plock_callback(op);
+		else
+			wake_up(&recv_wq);
+	} else
+		log_print("dev_write no op %x %llx", info.fsid,
+			  (unsigned long long)info.number);
+	return count;
+}
+
+static unsigned int dev_poll(struct file *file, poll_table *wait)
+{
+	unsigned int mask = 0;
+
+	poll_wait(file, &send_wq, wait);
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&send_list))
+		mask = POLLIN | POLLRDNORM;
+	spin_unlock(&ops_lock);
+
+	return mask;
+}
+
+static const struct file_operations dev_fops = {
+	.read    = dev_read,
+	.write   = dev_write,
+	.poll    = dev_poll,
+	.owner   = THIS_MODULE
+};
+
+static struct miscdevice plock_dev_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = DLM_PLOCK_MISC_NAME,
+	.fops = &dev_fops
+};
+
+int dlm_plock_init(void)
+{
+	int rv;
+
+	spin_lock_init(&ops_lock);
+	INIT_LIST_HEAD(&send_list);
+	INIT_LIST_HEAD(&recv_list);
+	init_waitqueue_head(&send_wq);
+	init_waitqueue_head(&recv_wq);
+
+	rv = misc_register(&plock_dev_misc);
+	if (rv)
+		log_print("dlm_plock_init: misc_register failed %d", rv);
+	return rv;
+}
+
+void dlm_plock_exit(void)
+{
+	if (misc_deregister(&plock_dev_misc) < 0)
+		log_print("dlm_plock_exit: misc_deregister failed");
+}
+
--- a/kernel/fs/dlm/rcom.c
+++ b/kernel/fs/dlm/rcom.c
@@ -0,0 +1,511 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "lockspace.h"
+#include "member.h"
+#include "lowcomms.h"
+#include "midcomms.h"
+#include "rcom.h"
+#include "recover.h"
+#include "dir.h"
+#include "config.h"
+#include "memory.h"
+#include "lock.h"
+#include "util.h"
+
+
+static int rcom_response(struct dlm_ls *ls)
+{
+	return test_bit(LSFL_RCOM_READY, &ls->ls_flags);
+}
+
+static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
+		       struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret)
+{
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	char *mb;
+	int mb_len = sizeof(struct dlm_rcom) + len;
+
+	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
+	if (!mh) {
+		log_print("create_rcom to %d type %d len %d ENOBUFS",
+			  to_nodeid, type, len);
+		return -ENOBUFS;
+	}
+	memset(mb, 0, mb_len);
+
+	rc = (struct dlm_rcom *) mb;
+
+	rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+	rc->rc_header.h_lockspace = ls->ls_global_id;
+	rc->rc_header.h_nodeid = dlm_our_nodeid();
+	rc->rc_header.h_length = mb_len;
+	rc->rc_header.h_cmd = DLM_RCOM;
+
+	rc->rc_type = type;
+
+	spin_lock(&ls->ls_recover_lock);
+	rc->rc_seq = ls->ls_recover_seq;
+	spin_unlock(&ls->ls_recover_lock);
+
+	*mh_ret = mh;
+	*rc_ret = rc;
+	return 0;
+}
+
+static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
+		      struct dlm_rcom *rc)
+{
+	dlm_rcom_out(rc);
+	dlm_lowcomms_commit_buffer(mh);
+}
+
+/* When replying to a status request, a node also sends back its
+   configuration values.  The requesting node then checks that the remote
+   node is configured the same way as itself. */
+
+static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
+{
+	rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen);
+	rf->rf_lsflags = cpu_to_le32(ls->ls_exflags);
+}
+
+static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
+{
+	struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
+	size_t conf_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
+
+	if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
+		log_error(ls, "version mismatch: %x nodeid %d: %x",
+			  DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
+			  rc->rc_header.h_version);
+		return -EPROTO;
+	}
+
+	if (rc->rc_header.h_length < conf_size) {
+		log_error(ls, "config too short: %d nodeid %d",
+			  rc->rc_header.h_length, nodeid);
+		return -EPROTO;
+	}
+
+	if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen ||
+	    le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) {
+		log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
+			  ls->ls_lvblen, ls->ls_exflags, nodeid,
+			  le32_to_cpu(rf->rf_lvblen),
+			  le32_to_cpu(rf->rf_lsflags));
+		return -EPROTO;
+	}
+	return 0;
+}
+
+static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq)
+{
+	spin_lock(&ls->ls_rcom_spin);
+	*new_seq = ++ls->ls_rcom_seq;
+	set_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+	spin_unlock(&ls->ls_rcom_spin);
+}
+
+static void disallow_sync_reply(struct dlm_ls *ls)
+{
+	spin_lock(&ls->ls_rcom_spin);
+	clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+	clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	spin_unlock(&ls->ls_rcom_spin);
+}
+
+int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
+{
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	int error = 0;
+
+	ls->ls_recover_nodeid = nodeid;
+
+	if (nodeid == dlm_our_nodeid()) {
+		rc = ls->ls_recover_buf;
+		rc->rc_result = dlm_recover_status(ls);
+		goto out;
+	}
+
+	error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh);
+	if (error)
+		goto out;
+
+	allow_sync_reply(ls, &rc->rc_id);
+	memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
+
+	send_rcom(ls, mh, rc);
+
+	error = dlm_wait_function(ls, &rcom_response);
+	disallow_sync_reply(ls);
+	if (error)
+		goto out;
+
+	rc = ls->ls_recover_buf;
+
+	if (rc->rc_result == -ESRCH) {
+		/* we pretend the remote lockspace exists with 0 status */
+		log_debug(ls, "remote node %d not ready", nodeid);
+		rc->rc_result = 0;
+	} else
+		error = check_config(ls, rc, nodeid);
+	/* the caller looks at rc_result for the remote recovery status */
+ out:
+	return error;
+}
+
+static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+{
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	int error, nodeid = rc_in->rc_header.h_nodeid;
+
+	error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY,
+			    sizeof(struct rcom_config), &rc, &mh);
+	if (error)
+		return;
+	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
+	rc->rc_result = dlm_recover_status(ls);
+	make_config(ls, (struct rcom_config *) rc->rc_buf);
+
+	send_rcom(ls, mh, rc);
+}
+
+static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+{
+	spin_lock(&ls->ls_rcom_spin);
+	if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
+	    rc_in->rc_id != ls->ls_rcom_seq) {
+		log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
+			  rc_in->rc_type, rc_in->rc_header.h_nodeid,
+			  (unsigned long long)rc_in->rc_id,
+			  (unsigned long long)ls->ls_rcom_seq);
+		goto out;
+	}
+	memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
+	set_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+	wake_up(&ls->ls_wait_general);
+ out:
+	spin_unlock(&ls->ls_rcom_spin);
+}
+
+int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
+{
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	int error = 0;
+	int max_size = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
+
+	ls->ls_recover_nodeid = nodeid;
+
+	if (nodeid == dlm_our_nodeid()) {
+		ls->ls_recover_buf->rc_header.h_length =
+			dlm_config.ci_buffer_size;
+		dlm_copy_master_names(ls, last_name, last_len,
+		                      ls->ls_recover_buf->rc_buf,
+		                      max_size, nodeid);
+		goto out;
+	}
+
+	error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
+	if (error)
+		goto out;
+	memcpy(rc->rc_buf, last_name, last_len);
+
+	allow_sync_reply(ls, &rc->rc_id);
+	memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
+
+	send_rcom(ls, mh, rc);
+
+	error = dlm_wait_function(ls, &rcom_response);
+	disallow_sync_reply(ls);
+ out:
+	return error;
+}
+
+static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+{
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	int error, inlen, outlen, nodeid;
+
+	nodeid = rc_in->rc_header.h_nodeid;
+	inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
+	outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
+
+	error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
+	if (error)
+		return;
+	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
+
+	dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
+			      nodeid);
+	send_rcom(ls, mh, rc);
+}
+
+int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
+{
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	struct dlm_ls *ls = r->res_ls;
+	int error;
+
+	error = create_rcom(ls, dir_nodeid, DLM_RCOM_LOOKUP, r->res_length,
+			    &rc, &mh);
+	if (error)
+		goto out;
+	memcpy(rc->rc_buf, r->res_name, r->res_length);
+	rc->rc_id = (unsigned long) r;
+
+	send_rcom(ls, mh, rc);
+ out:
+	return error;
+}
+
+static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+{
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid;
+	int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
+
+	error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh);
+	if (error)
+		return;
+
+	error = dlm_dir_lookup(ls, nodeid, rc_in->rc_buf, len, &ret_nodeid);
+	if (error)
+		ret_nodeid = error;
+	rc->rc_result = ret_nodeid;
+	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
+
+	send_rcom(ls, mh, rc);
+}
+
+static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+{
+	dlm_recover_master_reply(ls, rc_in);
+}
+
+static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb,
+			   struct rcom_lock *rl)
+{
+	memset(rl, 0, sizeof(*rl));
+
+	rl->rl_ownpid = cpu_to_le32(lkb->lkb_ownpid);
+	rl->rl_lkid = cpu_to_le32(lkb->lkb_id);
+	rl->rl_exflags = cpu_to_le32(lkb->lkb_exflags);
+	rl->rl_flags = cpu_to_le32(lkb->lkb_flags);
+	rl->rl_lvbseq = cpu_to_le32(lkb->lkb_lvbseq);
+	rl->rl_rqmode = lkb->lkb_rqmode;
+	rl->rl_grmode = lkb->lkb_grmode;
+	rl->rl_status = lkb->lkb_status;
+	rl->rl_wait_type = cpu_to_le16(lkb->lkb_wait_type);
+
+	if (lkb->lkb_bastfn)
+		rl->rl_asts |= AST_BAST;
+	if (lkb->lkb_astfn)
+		rl->rl_asts |= AST_COMP;
+
+	rl->rl_namelen = cpu_to_le16(r->res_length);
+	memcpy(rl->rl_name, r->res_name, r->res_length);
+
+	/* FIXME: might we have an lvb without DLM_LKF_VALBLK set ?
+	   If so, receive_rcom_lock_args() won't take this copy. */
+
+	if (lkb->lkb_lvbptr)
+		memcpy(rl->rl_lvb, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
+}
+
+int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
+{
+	struct dlm_ls *ls = r->res_ls;
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	struct rcom_lock *rl;
+	int error, len = sizeof(struct rcom_lock);
+
+	if (lkb->lkb_lvbptr)
+		len += ls->ls_lvblen;
+
+	error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh);
+	if (error)
+		goto out;
+
+	rl = (struct rcom_lock *) rc->rc_buf;
+	pack_rcom_lock(r, lkb, rl);
+	rc->rc_id = (unsigned long) r;
+
+	send_rcom(ls, mh, rc);
+ out:
+	return error;
+}
+
+/* needs at least dlm_rcom + rcom_lock */
+static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
+{
+	struct dlm_rcom *rc;
+	struct dlm_mhandle *mh;
+	int error, nodeid = rc_in->rc_header.h_nodeid;
+
+	dlm_recover_master_copy(ls, rc_in);
+
+	error = create_rcom(ls, nodeid, DLM_RCOM_LOCK_REPLY,
+			    sizeof(struct rcom_lock), &rc, &mh);
+	if (error)
+		return;
+
+	/* We send back the same rcom_lock struct we received, but
+	   dlm_recover_master_copy() has filled in rl_remid and rl_result */
+
+	memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
+	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
+
+	send_rcom(ls, mh, rc);
+}
+
+/* If the lockspace doesn't exist then still send a status message
+   back; it's possible that it just doesn't have its global_id yet. */
+
+int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+{
+	struct dlm_rcom *rc;
+	struct rcom_config *rf;
+	struct dlm_mhandle *mh;
+	char *mb;
+	int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
+
+	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_NOFS, &mb);
+	if (!mh)
+		return -ENOBUFS;
+	memset(mb, 0, mb_len);
+
+	rc = (struct dlm_rcom *) mb;
+
+	rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+	rc->rc_header.h_lockspace = rc_in->rc_header.h_lockspace;
+	rc->rc_header.h_nodeid = dlm_our_nodeid();
+	rc->rc_header.h_length = mb_len;
+	rc->rc_header.h_cmd = DLM_RCOM;
+
+	rc->rc_type = DLM_RCOM_STATUS_REPLY;
+	rc->rc_id = rc_in->rc_id;
+	rc->rc_seq_reply = rc_in->rc_seq;
+	rc->rc_result = -ESRCH;
+
+	rf = (struct rcom_config *) rc->rc_buf;
+	rf->rf_lvblen = cpu_to_le32(~0U);
+
+	dlm_rcom_out(rc);
+	dlm_lowcomms_commit_buffer(mh);
+
+	return 0;
+}
+
+static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
+{
+	uint64_t seq;
+	int rv = 0;
+
+	switch (rc->rc_type) {
+	case DLM_RCOM_STATUS_REPLY:
+	case DLM_RCOM_NAMES_REPLY:
+	case DLM_RCOM_LOOKUP_REPLY:
+	case DLM_RCOM_LOCK_REPLY:
+		spin_lock(&ls->ls_recover_lock);
+		seq = ls->ls_recover_seq;
+		spin_unlock(&ls->ls_recover_lock);
+		if (rc->rc_seq_reply != seq) {
+			log_debug(ls, "ignoring old reply %x from %d "
+				      "seq_reply %llx expect %llx",
+				      rc->rc_type, rc->rc_header.h_nodeid,
+				      (unsigned long long)rc->rc_seq_reply,
+				      (unsigned long long)seq);
+			rv = 1;
+		}
+	}
+	return rv;
+}
+
+/* Called by dlm_recv; corresponds to dlm_receive_message() but special
+   recovery-only comms are sent through here. */
+
+void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
+{
+	int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock);
+
+	if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
+		log_debug(ls, "ignoring recovery message %x from %d",
+			  rc->rc_type, nodeid);
+		goto out;
+	}
+
+	if (is_old_reply(ls, rc))
+		goto out;
+
+	switch (rc->rc_type) {
+	case DLM_RCOM_STATUS:
+		receive_rcom_status(ls, rc);
+		break;
+
+	case DLM_RCOM_NAMES:
+		receive_rcom_names(ls, rc);
+		break;
+
+	case DLM_RCOM_LOOKUP:
+		receive_rcom_lookup(ls, rc);
+		break;
+
+	case DLM_RCOM_LOCK:
+		if (rc->rc_header.h_length < lock_size)
+			goto Eshort;
+		receive_rcom_lock(ls, rc);
+		break;
+
+	case DLM_RCOM_STATUS_REPLY:
+		receive_sync_reply(ls, rc);
+		break;
+
+	case DLM_RCOM_NAMES_REPLY:
+		receive_sync_reply(ls, rc);
+		break;
+
+	case DLM_RCOM_LOOKUP_REPLY:
+		receive_rcom_lookup_reply(ls, rc);
+		break;
+
+	case DLM_RCOM_LOCK_REPLY:
+		if (rc->rc_header.h_length < lock_size)
+			goto Eshort;
+		dlm_recover_process_copy(ls, rc);
+		break;
+
+	default:
+		log_error(ls, "receive_rcom bad type %d", rc->rc_type);
+	}
+out:
+	return;
+Eshort:
+	log_error(ls, "recovery message %x from %d is too short",
+			  rc->rc_type, nodeid);
+}
+
--- a/kernel/fs/dlm/rcom.h
+++ b/kernel/fs/dlm/rcom.h
@@ -0,0 +1,25 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __RCOM_DOT_H__
+#define __RCOM_DOT_H__
+
+int dlm_rcom_status(struct dlm_ls *ls, int nodeid);
+int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
+int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
+int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid);
+int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in);
+
+#endif
+
--- a/kernel/fs/dlm/recover.c
+++ b/kernel/fs/dlm/recover.c
@@ -0,0 +1,789 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "lockspace.h"
+#include "dir.h"
+#include "config.h"
+#include "ast.h"
+#include "memory.h"
+#include "rcom.h"
+#include "lock.h"
+#include "lowcomms.h"
+#include "member.h"
+#include "recover.h"
+
+
+/*
+ * Recovery waiting routines: these functions wait for a particular reply from
+ * a remote node, or for the remote node to report a certain status.  They need
+ * to abort if the lockspace is stopped indicating a node has failed (perhaps
+ * the one being waited for).
+ */
+
+/*
+ * Wait until given function returns non-zero or lockspace is stopped
+ * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes).  When another
+ * function thinks it could have completed the waited-on task, they should wake
+ * up ls_wait_general to get an immediate response rather than waiting for the
+ * timer to detect the result.  A timer wakes us up periodically while waiting
+ * to see if we should abort due to a node failure.  This should only be called
+ * by the dlm_recoverd thread.
+ */
+
+static void dlm_wait_timer_fn(unsigned long data)
+{
+	struct dlm_ls *ls = (struct dlm_ls *) data;
+	mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
+	wake_up(&ls->ls_wait_general);
+}
+
+int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
+{
+	int error = 0;
+
+	init_timer(&ls->ls_timer);
+	ls->ls_timer.function = dlm_wait_timer_fn;
+	ls->ls_timer.data = (long) ls;
+	ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
+	add_timer(&ls->ls_timer);
+
+	wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
+	del_timer_sync(&ls->ls_timer);
+
+	if (dlm_recovery_stopped(ls)) {
+		log_debug(ls, "dlm_wait_function aborted");
+		error = -EINTR;
+	}
+	return error;
+}
+
+/*
+ * An efficient way for all nodes to wait for all others to have a certain
+ * status.  The node with the lowest nodeid polls all the others for their
+ * status (wait_status_all) and all the others poll the node with the low id
+ * for its accumulated result (wait_status_low).  When all nodes have set
+ * status flag X, then status flag X_ALL will be set on the low nodeid.
+ */
+
+uint32_t dlm_recover_status(struct dlm_ls *ls)
+{
+	uint32_t status;
+	spin_lock(&ls->ls_recover_lock);
+	status = ls->ls_recover_status;
+	spin_unlock(&ls->ls_recover_lock);
+	return status;
+}
+
+void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
+{
+	spin_lock(&ls->ls_recover_lock);
+	ls->ls_recover_status |= status;
+	spin_unlock(&ls->ls_recover_lock);
+}
+
+static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status)
+{
+	struct dlm_rcom *rc = ls->ls_recover_buf;
+	struct dlm_member *memb;
+	int error = 0, delay;
+
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		delay = 0;
+		for (;;) {
+			if (dlm_recovery_stopped(ls)) {
+				error = -EINTR;
+				goto out;
+			}
+
+			error = dlm_rcom_status(ls, memb->nodeid);
+			if (error)
+				goto out;
+
+			if (rc->rc_result & wait_status)
+				break;
+			if (delay < 1000)
+				delay += 20;
+			msleep(delay);
+		}
+	}
+ out:
+	return error;
+}
+
+static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status)
+{
+	struct dlm_rcom *rc = ls->ls_recover_buf;
+	int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
+
+	for (;;) {
+		if (dlm_recovery_stopped(ls)) {
+			error = -EINTR;
+			goto out;
+		}
+
+		error = dlm_rcom_status(ls, nodeid);
+		if (error)
+			break;
+
+		if (rc->rc_result & wait_status)
+			break;
+		if (delay < 1000)
+			delay += 20;
+		msleep(delay);
+	}
+ out:
+	return error;
+}
+
+static int wait_status(struct dlm_ls *ls, uint32_t status)
+{
+	uint32_t status_all = status << 1;
+	int error;
+
+	if (ls->ls_low_nodeid == dlm_our_nodeid()) {
+		error = wait_status_all(ls, status);
+		if (!error)
+			dlm_set_recover_status(ls, status_all);
+	} else
+		error = wait_status_low(ls, status_all);
+
+	return error;
+}
+
+int dlm_recover_members_wait(struct dlm_ls *ls)
+{
+	return wait_status(ls, DLM_RS_NODES);
+}
+
+int dlm_recover_directory_wait(struct dlm_ls *ls)
+{
+	return wait_status(ls, DLM_RS_DIR);
+}
+
+int dlm_recover_locks_wait(struct dlm_ls *ls)
+{
+	return wait_status(ls, DLM_RS_LOCKS);
+}
+
+int dlm_recover_done_wait(struct dlm_ls *ls)
+{
+	return wait_status(ls, DLM_RS_DONE);
+}
+
+/*
+ * The recover_list contains all the rsb's for which we've requested the new
+ * master nodeid.  As replies are returned from the resource directories the
+ * rsb's are removed from the list.  When the list is empty we're done.
+ *
+ * The recover_list is later similarly used for all rsb's for which we've sent
+ * new lkb's and need to receive new corresponding lkid's.
+ *
+ * We use the address of the rsb struct as a simple local identifier for the
+ * rsb so we can match an rcom reply with the rsb it was sent for.
+ */
+
+static int recover_list_empty(struct dlm_ls *ls)
+{
+	int empty;
+
+	spin_lock(&ls->ls_recover_list_lock);
+	empty = list_empty(&ls->ls_recover_list);
+	spin_unlock(&ls->ls_recover_list_lock);
+
+	return empty;
+}
+
+static void recover_list_add(struct dlm_rsb *r)
+{
+	struct dlm_ls *ls = r->res_ls;
+
+	spin_lock(&ls->ls_recover_list_lock);
+	if (list_empty(&r->res_recover_list)) {
+		list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
+		ls->ls_recover_list_count++;
+		dlm_hold_rsb(r);
+	}
+	spin_unlock(&ls->ls_recover_list_lock);
+}
+
+static void recover_list_del(struct dlm_rsb *r)
+{
+	struct dlm_ls *ls = r->res_ls;
+
+	spin_lock(&ls->ls_recover_list_lock);
+	list_del_init(&r->res_recover_list);
+	ls->ls_recover_list_count--;
+	spin_unlock(&ls->ls_recover_list_lock);
+
+	dlm_put_rsb(r);
+}
+
+static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id)
+{
+	struct dlm_rsb *r = NULL;
+
+	spin_lock(&ls->ls_recover_list_lock);
+
+	list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) {
+		if (id == (unsigned long) r)
+			goto out;
+	}
+	r = NULL;
+ out:
+	spin_unlock(&ls->ls_recover_list_lock);
+	return r;
+}
+
+static void recover_list_clear(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r, *s;
+
+	spin_lock(&ls->ls_recover_list_lock);
+	list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
+		list_del_init(&r->res_recover_list);
+		r->res_recover_locks_count = 0;
+		dlm_put_rsb(r);
+		ls->ls_recover_list_count--;
+	}
+
+	if (ls->ls_recover_list_count != 0) {
+		log_error(ls, "warning: recover_list_count %d",
+			  ls->ls_recover_list_count);
+		ls->ls_recover_list_count = 0;
+	}
+	spin_unlock(&ls->ls_recover_list_lock);
+}
+
+
+/* Master recovery: find new master node for rsb's that were
+   mastered on nodes that have been removed.
+
+   dlm_recover_masters
+   recover_master
+   dlm_send_rcom_lookup            ->  receive_rcom_lookup
+                                       dlm_dir_lookup
+   receive_rcom_lookup_reply       <-
+   dlm_recover_master_reply
+   set_new_master
+   set_master_lkbs
+   set_lock_master
+*/
+
+/*
+ * Set the lock master for all LKBs in a lock queue
+ * If we are the new master of the rsb, we may have received new
+ * MSTCPY locks from other nodes already which we need to ignore
+ * when setting the new nodeid.
+ */
+
+static void set_lock_master(struct list_head *queue, int nodeid)
+{
+	struct dlm_lkb *lkb;
+
+	list_for_each_entry(lkb, queue, lkb_statequeue)
+		if (!(lkb->lkb_flags & DLM_IFL_MSTCPY))
+			lkb->lkb_nodeid = nodeid;
+}
+
+static void set_master_lkbs(struct dlm_rsb *r)
+{
+	set_lock_master(&r->res_grantqueue, r->res_nodeid);
+	set_lock_master(&r->res_convertqueue, r->res_nodeid);
+	set_lock_master(&r->res_waitqueue, r->res_nodeid);
+}
+
+/*
+ * Propogate the new master nodeid to locks
+ * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
+ * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
+ * rsb's to consider.
+ */
+
+static void set_new_master(struct dlm_rsb *r, int nodeid)
+{
+	lock_rsb(r);
+	r->res_nodeid = nodeid;
+	set_master_lkbs(r);
+	rsb_set_flag(r, RSB_NEW_MASTER);
+	rsb_set_flag(r, RSB_NEW_MASTER2);
+	unlock_rsb(r);
+}
+
+/*
+ * We do async lookups on rsb's that need new masters.  The rsb's
+ * waiting for a lookup reply are kept on the recover_list.
+ */
+
+static int recover_master(struct dlm_rsb *r)
+{
+	struct dlm_ls *ls = r->res_ls;
+	int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
+
+	dir_nodeid = dlm_dir_nodeid(r);
+
+	if (dir_nodeid == our_nodeid) {
+		error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
+				       r->res_length, &ret_nodeid);
+		if (error)
+			log_error(ls, "recover dir lookup error %d", error);
+
+		if (ret_nodeid == our_nodeid)
+			ret_nodeid = 0;
+		set_new_master(r, ret_nodeid);
+	} else {
+		recover_list_add(r);
+		error = dlm_send_rcom_lookup(r, dir_nodeid);
+	}
+
+	return error;
+}
+
+/*
+ * When not using a directory, most resource names will hash to a new static
+ * master nodeid and the resource will need to be remastered.
+ */
+
+static int recover_master_static(struct dlm_rsb *r)
+{
+	int master = dlm_dir_nodeid(r);
+
+	if (master == dlm_our_nodeid())
+		master = 0;
+
+	if (r->res_nodeid != master) {
+		if (is_master(r))
+			dlm_purge_mstcpy_locks(r);
+		set_new_master(r, master);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Go through local root resources and for each rsb which has a master which
+ * has departed, get the new master nodeid from the directory.  The dir will
+ * assign mastery to the first node to look up the new master.  That means
+ * we'll discover in this lookup if we're the new master of any rsb's.
+ *
+ * We fire off all the dir lookup requests individually and asynchronously to
+ * the correct dir node.
+ */
+
+int dlm_recover_masters(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r;
+	int error = 0, count = 0;
+
+	log_debug(ls, "dlm_recover_masters");
+
+	down_read(&ls->ls_root_sem);
+	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
+		if (dlm_recovery_stopped(ls)) {
+			up_read(&ls->ls_root_sem);
+			error = -EINTR;
+			goto out;
+		}
+
+		if (dlm_no_directory(ls))
+			count += recover_master_static(r);
+		else if (!is_master(r) &&
+			 (dlm_is_removed(ls, r->res_nodeid) ||
+			  rsb_flag(r, RSB_NEW_MASTER))) {
+			recover_master(r);
+			count++;
+		}
+
+		schedule();
+	}
+	up_read(&ls->ls_root_sem);
+
+	log_debug(ls, "dlm_recover_masters %d resources", count);
+
+	error = dlm_wait_function(ls, &recover_list_empty);
+ out:
+	if (error)
+		recover_list_clear(ls);
+	return error;
+}
+
+int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
+{
+	struct dlm_rsb *r;
+	int nodeid;
+
+	r = recover_list_find(ls, rc->rc_id);
+	if (!r) {
+		log_error(ls, "dlm_recover_master_reply no id %llx",
+			  (unsigned long long)rc->rc_id);
+		goto out;
+	}
+
+	nodeid = rc->rc_result;
+	if (nodeid == dlm_our_nodeid())
+		nodeid = 0;
+
+	set_new_master(r, nodeid);
+	recover_list_del(r);
+
+	if (recover_list_empty(ls))
+		wake_up(&ls->ls_wait_general);
+ out:
+	return 0;
+}
+
+
+/* Lock recovery: rebuild the process-copy locks we hold on a
+   remastered rsb on the new rsb master.
+
+   dlm_recover_locks
+   recover_locks
+   recover_locks_queue
+   dlm_send_rcom_lock              ->  receive_rcom_lock
+                                       dlm_recover_master_copy
+   receive_rcom_lock_reply         <-
+   dlm_recover_process_copy
+*/
+
+
+/*
+ * keep a count of the number of lkb's we send to the new master; when we get
+ * an equal number of replies then recovery for the rsb is done
+ */
+
+static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
+{
+	struct dlm_lkb *lkb;
+	int error = 0;
+
+	list_for_each_entry(lkb, head, lkb_statequeue) {
+	   	error = dlm_send_rcom_lock(r, lkb);
+		if (error)
+			break;
+		r->res_recover_locks_count++;
+	}
+
+	return error;
+}
+
+static int recover_locks(struct dlm_rsb *r)
+{
+	int error = 0;
+
+	lock_rsb(r);
+
+	DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r););
+
+	error = recover_locks_queue(r, &r->res_grantqueue);
+	if (error)
+		goto out;
+	error = recover_locks_queue(r, &r->res_convertqueue);
+	if (error)
+		goto out;
+	error = recover_locks_queue(r, &r->res_waitqueue);
+	if (error)
+		goto out;
+
+	if (r->res_recover_locks_count)
+		recover_list_add(r);
+	else
+		rsb_clear_flag(r, RSB_NEW_MASTER);
+ out:
+	unlock_rsb(r);
+	return error;
+}
+
+int dlm_recover_locks(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r;
+	int error, count = 0;
+
+	log_debug(ls, "dlm_recover_locks");
+
+	down_read(&ls->ls_root_sem);
+	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
+		if (is_master(r)) {
+			rsb_clear_flag(r, RSB_NEW_MASTER);
+			continue;
+		}
+
+		if (!rsb_flag(r, RSB_NEW_MASTER))
+			continue;
+
+		if (dlm_recovery_stopped(ls)) {
+			error = -EINTR;
+			up_read(&ls->ls_root_sem);
+			goto out;
+		}
+
+		error = recover_locks(r);
+		if (error) {
+			up_read(&ls->ls_root_sem);
+			goto out;
+		}
+
+		count += r->res_recover_locks_count;
+	}
+	up_read(&ls->ls_root_sem);
+
+	log_debug(ls, "dlm_recover_locks %d locks", count);
+
+	error = dlm_wait_function(ls, &recover_list_empty);
+ out:
+	if (error)
+		recover_list_clear(ls);
+	else
+		dlm_set_recover_status(ls, DLM_RS_LOCKS);
+	return error;
+}
+
+void dlm_recovered_lock(struct dlm_rsb *r)
+{
+	DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r););
+
+	r->res_recover_locks_count--;
+	if (!r->res_recover_locks_count) {
+		rsb_clear_flag(r, RSB_NEW_MASTER);
+		recover_list_del(r);
+	}
+
+	if (recover_list_empty(r->res_ls))
+		wake_up(&r->res_ls->ls_wait_general);
+}
+
+/*
+ * The lvb needs to be recovered on all master rsb's.  This includes setting
+ * the VALNOTVALID flag if necessary, and determining the correct lvb contents
+ * based on the lvb's of the locks held on the rsb.
+ *
+ * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb.  If it
+ * was already set prior to recovery, it's not cleared, regardless of locks.
+ *
+ * The LVB contents are only considered for changing when this is a new master
+ * of the rsb (NEW_MASTER2).  Then, the rsb's lvb is taken from any lkb with
+ * mode > CR.  If no lkb's exist with mode above CR, the lvb contents are taken
+ * from the lkb with the largest lvb sequence number.
+ */
+
+static void recover_lvb(struct dlm_rsb *r)
+{
+	struct dlm_lkb *lkb, *high_lkb = NULL;
+	uint32_t high_seq = 0;
+	int lock_lvb_exists = 0;
+	int big_lock_exists = 0;
+	int lvblen = r->res_ls->ls_lvblen;
+
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
+		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
+			continue;
+
+		lock_lvb_exists = 1;
+
+		if (lkb->lkb_grmode > DLM_LOCK_CR) {
+			big_lock_exists = 1;
+			goto setflag;
+		}
+
+		if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
+			high_lkb = lkb;
+			high_seq = lkb->lkb_lvbseq;
+		}
+	}
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
+		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
+			continue;
+
+		lock_lvb_exists = 1;
+
+		if (lkb->lkb_grmode > DLM_LOCK_CR) {
+			big_lock_exists = 1;
+			goto setflag;
+		}
+
+		if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
+			high_lkb = lkb;
+			high_seq = lkb->lkb_lvbseq;
+		}
+	}
+
+ setflag:
+	if (!lock_lvb_exists)
+		goto out;
+
+	if (!big_lock_exists)
+		rsb_set_flag(r, RSB_VALNOTVALID);
+
+	/* don't mess with the lvb unless we're the new master */
+	if (!rsb_flag(r, RSB_NEW_MASTER2))
+		goto out;
+
+	if (!r->res_lvbptr) {
+		r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
+		if (!r->res_lvbptr)
+			goto out;
+	}
+
+	if (big_lock_exists) {
+		r->res_lvbseq = lkb->lkb_lvbseq;
+		memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
+	} else if (high_lkb) {
+		r->res_lvbseq = high_lkb->lkb_lvbseq;
+		memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
+	} else {
+		r->res_lvbseq = 0;
+		memset(r->res_lvbptr, 0, lvblen);
+	}
+ out:
+	return;
+}
+
+/* All master rsb's flagged RECOVER_CONVERT need to be looked at.  The locks
+   converting PR->CW or CW->PR need to have their lkb_grmode set. */
+
+static void recover_conversion(struct dlm_rsb *r)
+{
+	struct dlm_lkb *lkb;
+	int grmode = -1;
+
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
+		if (lkb->lkb_grmode == DLM_LOCK_PR ||
+		    lkb->lkb_grmode == DLM_LOCK_CW) {
+			grmode = lkb->lkb_grmode;
+			break;
+		}
+	}
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
+		if (lkb->lkb_grmode != DLM_LOCK_IV)
+			continue;
+		if (grmode == -1)
+			lkb->lkb_grmode = lkb->lkb_rqmode;
+		else
+			lkb->lkb_grmode = grmode;
+	}
+}
+
+/* We've become the new master for this rsb and waiting/converting locks may
+   need to be granted in dlm_grant_after_purge() due to locks that may have
+   existed from a removed node. */
+
+static void set_locks_purged(struct dlm_rsb *r)
+{
+	if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
+		rsb_set_flag(r, RSB_LOCKS_PURGED);
+}
+
+void dlm_recover_rsbs(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r;
+	int count = 0;
+
+	log_debug(ls, "dlm_recover_rsbs");
+
+	down_read(&ls->ls_root_sem);
+	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
+		lock_rsb(r);
+		if (is_master(r)) {
+			if (rsb_flag(r, RSB_RECOVER_CONVERT))
+				recover_conversion(r);
+			if (rsb_flag(r, RSB_NEW_MASTER2))
+				set_locks_purged(r);
+			recover_lvb(r);
+			count++;
+		}
+		rsb_clear_flag(r, RSB_RECOVER_CONVERT);
+		rsb_clear_flag(r, RSB_NEW_MASTER2);
+		unlock_rsb(r);
+	}
+	up_read(&ls->ls_root_sem);
+
+	log_debug(ls, "dlm_recover_rsbs %d rsbs", count);
+}
+
+/* Create a single list of all root rsb's to be used during recovery */
+
+int dlm_create_root_list(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r;
+	int i, error = 0;
+
+	down_write(&ls->ls_root_sem);
+	if (!list_empty(&ls->ls_root_list)) {
+		log_error(ls, "root list not empty");
+		error = -EINVAL;
+		goto out;
+	}
+
+	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
+		spin_lock(&ls->ls_rsbtbl[i].lock);
+		list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) {
+			list_add(&r->res_root_list, &ls->ls_root_list);
+			dlm_hold_rsb(r);
+		}
+
+		/* If we're using a directory, add tossed rsbs to the root
+		   list; they'll have entries created in the new directory,
+		   but no other recovery steps should do anything with them. */
+
+		if (dlm_no_directory(ls)) {
+			spin_unlock(&ls->ls_rsbtbl[i].lock);
+			continue;
+		}
+
+		list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) {
+			list_add(&r->res_root_list, &ls->ls_root_list);
+			dlm_hold_rsb(r);
+		}
+		spin_unlock(&ls->ls_rsbtbl[i].lock);
+	}
+ out:
+	up_write(&ls->ls_root_sem);
+	return error;
+}
+
+void dlm_release_root_list(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r, *safe;
+
+	down_write(&ls->ls_root_sem);
+	list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
+		list_del_init(&r->res_root_list);
+		dlm_put_rsb(r);
+	}
+	up_write(&ls->ls_root_sem);
+}
+
+/* If not using a directory, clear the entire toss list, there's no benefit to
+   caching the master value since it's fixed.  If we are using a dir, keep the
+   rsb's we're the master of.  Recovery will add them to the root list and from
+   there they'll be entered in the rebuilt directory. */
+
+void dlm_clear_toss_list(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r, *safe;
+	int i;
+
+	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
+		spin_lock(&ls->ls_rsbtbl[i].lock);
+		list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss,
+					 res_hashchain) {
+			if (dlm_no_directory(ls) || !is_master(r)) {
+				list_del(&r->res_hashchain);
+				dlm_free_rsb(r);
+			}
+		}
+		spin_unlock(&ls->ls_rsbtbl[i].lock);
+	}
+}
+
--- a/kernel/fs/dlm/recover.h
+++ b/kernel/fs/dlm/recover.h
@@ -0,0 +1,34 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __RECOVER_DOT_H__
+#define __RECOVER_DOT_H__
+
+int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls));
+uint32_t dlm_recover_status(struct dlm_ls *ls);
+void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status);
+int dlm_recover_members_wait(struct dlm_ls *ls);
+int dlm_recover_directory_wait(struct dlm_ls *ls);
+int dlm_recover_locks_wait(struct dlm_ls *ls);
+int dlm_recover_done_wait(struct dlm_ls *ls);
+int dlm_recover_masters(struct dlm_ls *ls);
+int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc);
+int dlm_recover_locks(struct dlm_ls *ls);
+void dlm_recovered_lock(struct dlm_rsb *r);
+int dlm_create_root_list(struct dlm_ls *ls);
+void dlm_release_root_list(struct dlm_ls *ls);
+void dlm_clear_toss_list(struct dlm_ls *ls);
+void dlm_recover_rsbs(struct dlm_ls *ls);
+
+#endif				/* __RECOVER_DOT_H__ */
+
--- a/kernel/fs/dlm/recoverd.c
+++ b/kernel/fs/dlm/recoverd.c
@@ -0,0 +1,323 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "lockspace.h"
+#include "member.h"
+#include "dir.h"
+#include "ast.h"
+#include "recover.h"
+#include "lowcomms.h"
+#include "lock.h"
+#include "requestqueue.h"
+#include "recoverd.h"
+
+
+/* If the start for which we're re-enabling locking (seq) has been superseded
+   by a newer stop (ls_recover_seq), we need to leave locking disabled.
+
+   We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
+   locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
+   enables locking and clears the requestqueue between a and b. */
+
+static int enable_locking(struct dlm_ls *ls, uint64_t seq)
+{
+	int error = -EINTR;
+
+	down_write(&ls->ls_recv_active);
+
+	spin_lock(&ls->ls_recover_lock);
+	if (ls->ls_recover_seq == seq) {
+		set_bit(LSFL_RUNNING, &ls->ls_flags);
+		/* unblocks processes waiting to enter the dlm */
+		up_write(&ls->ls_in_recovery);
+		error = 0;
+	}
+	spin_unlock(&ls->ls_recover_lock);
+
+	up_write(&ls->ls_recv_active);
+	return error;
+}
+
+static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
+{
+	unsigned long start;
+	int error, neg = 0;
+
+	log_debug(ls, "recover %llx", (unsigned long long)rv->seq);
+
+	mutex_lock(&ls->ls_recoverd_active);
+
+	/*
+	 * Suspending and resuming dlm_astd ensures that no lkb's from this ls
+	 * will be processed by dlm_astd during recovery.
+	 */
+
+	dlm_astd_suspend();
+	dlm_astd_resume();
+
+	/*
+	 * Free non-master tossed rsb's.  Master rsb's are kept on toss
+	 * list and put on root list to be included in resdir recovery.
+	 */
+
+	dlm_clear_toss_list(ls);
+
+	/*
+	 * This list of root rsb's will be the basis of most of the recovery
+	 * routines.
+	 */
+
+	dlm_create_root_list(ls);
+
+	/*
+	 * Add or remove nodes from the lockspace's ls_nodes list.
+	 * Also waits for all nodes to complete dlm_recover_members.
+	 */
+
+	error = dlm_recover_members(ls, rv, &neg);
+	if (error) {
+		log_debug(ls, "recover_members failed %d", error);
+		goto fail;
+	}
+	start = jiffies;
+
+	/*
+	 * Rebuild our own share of the directory by collecting from all other
+	 * nodes their master rsb names that hash to us.
+	 */
+
+	error = dlm_recover_directory(ls);
+	if (error) {
+		log_debug(ls, "recover_directory failed %d", error);
+		goto fail;
+	}
+
+	/*
+	 * Wait for all nodes to complete directory rebuild.
+	 */
+
+	error = dlm_recover_directory_wait(ls);
+	if (error) {
+		log_debug(ls, "recover_directory_wait failed %d", error);
+		goto fail;
+	}
+
+	/*
+	 * We may have outstanding operations that are waiting for a reply from
+	 * a failed node.  Mark these to be resent after recovery.  Unlock and
+	 * cancel ops can just be completed.
+	 */
+
+	dlm_recover_waiters_pre(ls);
+
+	error = dlm_recovery_stopped(ls);
+	if (error)
+		goto fail;
+
+	if (neg || dlm_no_directory(ls)) {
+		/*
+		 * Clear lkb's for departed nodes.
+		 */
+
+		dlm_purge_locks(ls);
+
+		/*
+		 * Get new master nodeid's for rsb's that were mastered on
+		 * departed nodes.
+		 */
+
+		error = dlm_recover_masters(ls);
+		if (error) {
+			log_debug(ls, "recover_masters failed %d", error);
+			goto fail;
+		}
+
+		/*
+		 * Send our locks on remastered rsb's to the new masters.
+		 */
+
+		error = dlm_recover_locks(ls);
+		if (error) {
+			log_debug(ls, "recover_locks failed %d", error);
+			goto fail;
+		}
+
+		error = dlm_recover_locks_wait(ls);
+		if (error) {
+			log_debug(ls, "recover_locks_wait failed %d", error);
+			goto fail;
+		}
+
+		/*
+		 * Finalize state in master rsb's now that all locks can be
+		 * checked.  This includes conversion resolution and lvb
+		 * settings.
+		 */
+
+		dlm_recover_rsbs(ls);
+	} else {
+		/*
+		 * Other lockspace members may be going through the "neg" steps
+		 * while also adding us to the lockspace, in which case they'll
+		 * be doing the recover_locks (RS_LOCKS) barrier.
+		 */
+		dlm_set_recover_status(ls, DLM_RS_LOCKS);
+
+		error = dlm_recover_locks_wait(ls);
+		if (error) {
+			log_debug(ls, "recover_locks_wait failed %d", error);
+			goto fail;
+		}
+	}
+
+	dlm_release_root_list(ls);
+
+	/*
+	 * Purge directory-related requests that are saved in requestqueue.
+	 * All dir requests from before recovery are invalid now due to the dir
+	 * rebuild and will be resent by the requesting nodes.
+	 */
+
+	dlm_purge_requestqueue(ls);
+
+	dlm_set_recover_status(ls, DLM_RS_DONE);
+	error = dlm_recover_done_wait(ls);
+	if (error) {
+		log_debug(ls, "recover_done_wait failed %d", error);
+		goto fail;
+	}
+
+	dlm_clear_members_gone(ls);
+
+	dlm_adjust_timeouts(ls);
+
+	error = enable_locking(ls, rv->seq);
+	if (error) {
+		log_debug(ls, "enable_locking failed %d", error);
+		goto fail;
+	}
+
+	error = dlm_process_requestqueue(ls);
+	if (error) {
+		log_debug(ls, "process_requestqueue failed %d", error);
+		goto fail;
+	}
+
+	error = dlm_recover_waiters_post(ls);
+	if (error) {
+		log_debug(ls, "recover_waiters_post failed %d", error);
+		goto fail;
+	}
+
+	dlm_grant_after_purge(ls);
+
+	dlm_astd_wake();
+
+	log_debug(ls, "recover %llx done: %u ms",
+		  (unsigned long long)rv->seq,
+		  jiffies_to_msecs(jiffies - start));
+	mutex_unlock(&ls->ls_recoverd_active);
+
+	return 0;
+
+ fail:
+	dlm_release_root_list(ls);
+	log_debug(ls, "recover %llx error %d",
+		  (unsigned long long)rv->seq, error);
+	mutex_unlock(&ls->ls_recoverd_active);
+	return error;
+}
+
+/* The dlm_ls_start() that created the rv we take here may already have been
+   stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
+   flag set. */
+
+static void do_ls_recovery(struct dlm_ls *ls)
+{
+	struct dlm_recover *rv = NULL;
+
+	spin_lock(&ls->ls_recover_lock);
+	rv = ls->ls_recover_args;
+	ls->ls_recover_args = NULL;
+	if (rv && ls->ls_recover_seq == rv->seq)
+		clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+	spin_unlock(&ls->ls_recover_lock);
+
+	if (rv) {
+		ls_recover(ls, rv);
+		kfree(rv->nodeids);
+		kfree(rv->new);
+		kfree(rv);
+	}
+}
+
+static int dlm_recoverd(void *arg)
+{
+	struct dlm_ls *ls;
+
+	ls = dlm_find_lockspace_local(arg);
+	if (!ls) {
+		log_print("dlm_recoverd: no lockspace %p", arg);
+		return -1;
+	}
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!test_bit(LSFL_WORK, &ls->ls_flags))
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags))
+			do_ls_recovery(ls);
+	}
+
+	dlm_put_lockspace(ls);
+	return 0;
+}
+
+void dlm_recoverd_kick(struct dlm_ls *ls)
+{
+	set_bit(LSFL_WORK, &ls->ls_flags);
+	wake_up_process(ls->ls_recoverd_task);
+}
+
+int dlm_recoverd_start(struct dlm_ls *ls)
+{
+	struct task_struct *p;
+	int error = 0;
+
+	p = kthread_run(dlm_recoverd, ls, "dlm_recoverd");
+	if (IS_ERR(p))
+		error = PTR_ERR(p);
+	else
+                ls->ls_recoverd_task = p;
+	return error;
+}
+
+void dlm_recoverd_stop(struct dlm_ls *ls)
+{
+	kthread_stop(ls->ls_recoverd_task);
+}
+
+void dlm_recoverd_suspend(struct dlm_ls *ls)
+{
+	wake_up(&ls->ls_wait_general);
+	mutex_lock(&ls->ls_recoverd_active);
+}
+
+void dlm_recoverd_resume(struct dlm_ls *ls)
+{
+	mutex_unlock(&ls->ls_recoverd_active);
+}
+
--- a/kernel/fs/dlm/recoverd.h
+++ b/kernel/fs/dlm/recoverd.h
@@ -0,0 +1,24 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __RECOVERD_DOT_H__
+#define __RECOVERD_DOT_H__
+
+void dlm_recoverd_kick(struct dlm_ls *ls);
+void dlm_recoverd_stop(struct dlm_ls *ls);
+int dlm_recoverd_start(struct dlm_ls *ls);
+void dlm_recoverd_suspend(struct dlm_ls *ls);
+void dlm_recoverd_resume(struct dlm_ls *ls);
+
+#endif				/* __RECOVERD_DOT_H__ */
+
--- a/kernel/fs/dlm/requestqueue.c
+++ b/kernel/fs/dlm/requestqueue.c
@@ -0,0 +1,188 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "member.h"
+#include "lock.h"
+#include "dir.h"
+#include "config.h"
+#include "requestqueue.h"
+
+struct rq_entry {
+	struct list_head list;
+	int nodeid;
+	struct dlm_message request;
+};
+
+/*
+ * Requests received while the lockspace is in recovery get added to the
+ * request queue and processed when recovery is complete.  This happens when
+ * the lockspace is suspended on some nodes before it is on others, or the
+ * lockspace is enabled on some while still suspended on others.
+ */
+
+void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
+{
+	struct rq_entry *e;
+	int length = ms->m_header.h_length - sizeof(struct dlm_message);
+
+	e = kmalloc(sizeof(struct rq_entry) + length, GFP_NOFS);
+	if (!e) {
+		log_print("dlm_add_requestqueue: out of memory len %d", length);
+		return;
+	}
+
+	e->nodeid = nodeid;
+	memcpy(&e->request, ms, ms->m_header.h_length);
+
+	mutex_lock(&ls->ls_requestqueue_mutex);
+	list_add_tail(&e->list, &ls->ls_requestqueue);
+	mutex_unlock(&ls->ls_requestqueue_mutex);
+}
+
+/*
+ * Called by dlm_recoverd to process normal messages saved while recovery was
+ * happening.  Normal locking has been enabled before this is called.  dlm_recv
+ * upon receiving a message, will wait for all saved messages to be drained
+ * here before processing the message it got.  If a new dlm_ls_stop() arrives
+ * while we're processing these saved messages, it may block trying to suspend
+ * dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue.  In that
+ * case, we don't abort since locking_stopped is still 0.  If dlm_recv is not
+ * waiting for us, then this processing may be aborted due to locking_stopped.
+ */
+
+int dlm_process_requestqueue(struct dlm_ls *ls)
+{
+	struct rq_entry *e;
+	int error = 0;
+
+	mutex_lock(&ls->ls_requestqueue_mutex);
+
+	for (;;) {
+		if (list_empty(&ls->ls_requestqueue)) {
+			mutex_unlock(&ls->ls_requestqueue_mutex);
+			error = 0;
+			break;
+		}
+		e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list);
+		mutex_unlock(&ls->ls_requestqueue_mutex);
+
+		dlm_receive_message_saved(ls, &e->request);
+
+		mutex_lock(&ls->ls_requestqueue_mutex);
+		list_del(&e->list);
+		kfree(e);
+
+		if (dlm_locking_stopped(ls)) {
+			log_debug(ls, "process_requestqueue abort running");
+			mutex_unlock(&ls->ls_requestqueue_mutex);
+			error = -EINTR;
+			break;
+		}
+		schedule();
+	}
+
+	return error;
+}
+
+/*
+ * After recovery is done, locking is resumed and dlm_recoverd takes all the
+ * saved requests and processes them as they would have been by dlm_recv.  At
+ * the same time, dlm_recv will start receiving new requests from remote nodes.
+ * We want to delay dlm_recv processing new requests until dlm_recoverd has
+ * finished processing the old saved requests.  We don't check for locking
+ * stopped here because dlm_ls_stop won't stop locking until it's suspended us
+ * (dlm_recv).
+ */
+
+void dlm_wait_requestqueue(struct dlm_ls *ls)
+{
+	for (;;) {
+		mutex_lock(&ls->ls_requestqueue_mutex);
+		if (list_empty(&ls->ls_requestqueue))
+			break;
+		mutex_unlock(&ls->ls_requestqueue_mutex);
+		schedule();
+	}
+	mutex_unlock(&ls->ls_requestqueue_mutex);
+}
+
+static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
+{
+	uint32_t type = ms->m_type;
+
+	/* the ls is being cleaned up and freed by release_lockspace */
+	if (!ls->ls_count)
+		return 1;
+
+	if (dlm_is_removed(ls, nodeid))
+		return 1;
+
+	/* directory operations are always purged because the directory is
+	   always rebuilt during recovery and the lookups resent */
+
+	if (type == DLM_MSG_REMOVE ||
+	    type == DLM_MSG_LOOKUP ||
+	    type == DLM_MSG_LOOKUP_REPLY)
+		return 1;
+
+	if (!dlm_no_directory(ls))
+		return 0;
+
+	/* with no directory, the master is likely to change as a part of
+	   recovery; requests to/from the defunct master need to be purged */
+
+	switch (type) {
+	case DLM_MSG_REQUEST:
+	case DLM_MSG_CONVERT:
+	case DLM_MSG_UNLOCK:
+	case DLM_MSG_CANCEL:
+		/* we're no longer the master of this resource, the sender
+		   will resend to the new master (see waiter_needs_recovery) */
+
+		if (dlm_hash2nodeid(ls, ms->m_hash) != dlm_our_nodeid())
+			return 1;
+		break;
+
+	case DLM_MSG_REQUEST_REPLY:
+	case DLM_MSG_CONVERT_REPLY:
+	case DLM_MSG_UNLOCK_REPLY:
+	case DLM_MSG_CANCEL_REPLY:
+	case DLM_MSG_GRANT:
+		/* this reply is from the former master of the resource,
+		   we'll resend to the new master if needed */
+
+		if (dlm_hash2nodeid(ls, ms->m_hash) != nodeid)
+			return 1;
+		break;
+	}
+
+	return 0;
+}
+
+void dlm_purge_requestqueue(struct dlm_ls *ls)
+{
+	struct dlm_message *ms;
+	struct rq_entry *e, *safe;
+
+	mutex_lock(&ls->ls_requestqueue_mutex);
+	list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) {
+		ms =  &e->request;
+
+		if (purge_request(ls, ms, e->nodeid)) {
+			list_del(&e->list);
+			kfree(e);
+		}
+	}
+	mutex_unlock(&ls->ls_requestqueue_mutex);
+}
+
--- a/kernel/fs/dlm/requestqueue.h
+++ b/kernel/fs/dlm/requestqueue.h
@@ -0,0 +1,22 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __REQUESTQUEUE_DOT_H__
+#define __REQUESTQUEUE_DOT_H__
+
+void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms);
+int dlm_process_requestqueue(struct dlm_ls *ls);
+void dlm_wait_requestqueue(struct dlm_ls *ls);
+void dlm_purge_requestqueue(struct dlm_ls *ls);
+
+#endif
+
--- a/kernel/fs/dlm/user.c
+++ b/kernel/fs/dlm/user.c
--- a/kernel/fs/dlm/user.h
+++ b/kernel/fs/dlm/user.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2006-2010 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __USER_DOT_H__
+#define __USER_DOT_H__
+
+void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode);
+int dlm_user_init(void);
+void dlm_user_exit(void);
+int dlm_device_deregister(struct dlm_ls *ls);
+int dlm_user_daemon_available(void);
+
+#endif
--- a/kernel/fs/dlm/util.c
+++ b/kernel/fs/dlm/util.c
@@ -0,0 +1,154 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "dlm_internal.h"
+#include "rcom.h"
+#include "util.h"
+
+#define DLM_ERRNO_EDEADLK		35
+#define DLM_ERRNO_EBADR			53
+#define DLM_ERRNO_EBADSLT		57
+#define DLM_ERRNO_EPROTO		71
+#define DLM_ERRNO_EOPNOTSUPP		95
+#define DLM_ERRNO_ETIMEDOUT	       110
+#define DLM_ERRNO_EINPROGRESS	       115
+
+static void header_out(struct dlm_header *hd)
+{
+	hd->h_version		= cpu_to_le32(hd->h_version);
+	hd->h_lockspace		= cpu_to_le32(hd->h_lockspace);
+	hd->h_nodeid		= cpu_to_le32(hd->h_nodeid);
+	hd->h_length		= cpu_to_le16(hd->h_length);
+}
+
+static void header_in(struct dlm_header *hd)
+{
+	hd->h_version		= le32_to_cpu(hd->h_version);
+	hd->h_lockspace		= le32_to_cpu(hd->h_lockspace);
+	hd->h_nodeid		= le32_to_cpu(hd->h_nodeid);
+	hd->h_length		= le16_to_cpu(hd->h_length);
+}
+
+/* higher errno values are inconsistent across architectures, so select
+   one set of values for on the wire */
+
+static int to_dlm_errno(int err)
+{
+	switch (err) {
+	case -EDEADLK:
+		return -DLM_ERRNO_EDEADLK;
+	case -EBADR:
+		return -DLM_ERRNO_EBADR;
+	case -EBADSLT:
+		return -DLM_ERRNO_EBADSLT;
+	case -EPROTO:
+		return -DLM_ERRNO_EPROTO;
+	case -EOPNOTSUPP:
+		return -DLM_ERRNO_EOPNOTSUPP;
+	case -ETIMEDOUT:
+		return -DLM_ERRNO_ETIMEDOUT;
+	case -EINPROGRESS:
+		return -DLM_ERRNO_EINPROGRESS;
+	}
+	return err;
+}
+
+static int from_dlm_errno(int err)
+{
+	switch (err) {
+	case -DLM_ERRNO_EDEADLK:
+		return -EDEADLK;
+	case -DLM_ERRNO_EBADR:
+		return -EBADR;
+	case -DLM_ERRNO_EBADSLT:
+		return -EBADSLT;
+	case -DLM_ERRNO_EPROTO:
+		return -EPROTO;
+	case -DLM_ERRNO_EOPNOTSUPP:
+		return -EOPNOTSUPP;
+	case -DLM_ERRNO_ETIMEDOUT:
+		return -ETIMEDOUT;
+	case -DLM_ERRNO_EINPROGRESS:
+		return -EINPROGRESS;
+	}
+	return err;
+}
+
+void dlm_message_out(struct dlm_message *ms)
+{
+	header_out(&ms->m_header);
+
+	ms->m_type		= cpu_to_le32(ms->m_type);
+	ms->m_nodeid		= cpu_to_le32(ms->m_nodeid);
+	ms->m_pid		= cpu_to_le32(ms->m_pid);
+	ms->m_lkid		= cpu_to_le32(ms->m_lkid);
+	ms->m_remid		= cpu_to_le32(ms->m_remid);
+	ms->m_parent_lkid	= cpu_to_le32(ms->m_parent_lkid);
+	ms->m_parent_remid	= cpu_to_le32(ms->m_parent_remid);
+	ms->m_exflags		= cpu_to_le32(ms->m_exflags);
+	ms->m_sbflags		= cpu_to_le32(ms->m_sbflags);
+	ms->m_flags		= cpu_to_le32(ms->m_flags);
+	ms->m_lvbseq		= cpu_to_le32(ms->m_lvbseq);
+	ms->m_hash		= cpu_to_le32(ms->m_hash);
+	ms->m_status		= cpu_to_le32(ms->m_status);
+	ms->m_grmode		= cpu_to_le32(ms->m_grmode);
+	ms->m_rqmode		= cpu_to_le32(ms->m_rqmode);
+	ms->m_bastmode		= cpu_to_le32(ms->m_bastmode);
+	ms->m_asts		= cpu_to_le32(ms->m_asts);
+	ms->m_result		= cpu_to_le32(to_dlm_errno(ms->m_result));
+}
+
+void dlm_message_in(struct dlm_message *ms)
+{
+	header_in(&ms->m_header);
+
+	ms->m_type		= le32_to_cpu(ms->m_type);
+	ms->m_nodeid		= le32_to_cpu(ms->m_nodeid);
+	ms->m_pid		= le32_to_cpu(ms->m_pid);
+	ms->m_lkid		= le32_to_cpu(ms->m_lkid);
+	ms->m_remid		= le32_to_cpu(ms->m_remid);
+	ms->m_parent_lkid	= le32_to_cpu(ms->m_parent_lkid);
+	ms->m_parent_remid	= le32_to_cpu(ms->m_parent_remid);
+	ms->m_exflags		= le32_to_cpu(ms->m_exflags);
+	ms->m_sbflags		= le32_to_cpu(ms->m_sbflags);
+	ms->m_flags		= le32_to_cpu(ms->m_flags);
+	ms->m_lvbseq		= le32_to_cpu(ms->m_lvbseq);
+	ms->m_hash		= le32_to_cpu(ms->m_hash);
+	ms->m_status		= le32_to_cpu(ms->m_status);
+	ms->m_grmode		= le32_to_cpu(ms->m_grmode);
+	ms->m_rqmode		= le32_to_cpu(ms->m_rqmode);
+	ms->m_bastmode		= le32_to_cpu(ms->m_bastmode);
+	ms->m_asts		= le32_to_cpu(ms->m_asts);
+	ms->m_result		= from_dlm_errno(le32_to_cpu(ms->m_result));
+}
+
+void dlm_rcom_out(struct dlm_rcom *rc)
+{
+	header_out(&rc->rc_header);
+
+	rc->rc_type		= cpu_to_le32(rc->rc_type);
+	rc->rc_result		= cpu_to_le32(rc->rc_result);
+	rc->rc_id		= cpu_to_le64(rc->rc_id);
+	rc->rc_seq		= cpu_to_le64(rc->rc_seq);
+	rc->rc_seq_reply	= cpu_to_le64(rc->rc_seq_reply);
+}
+
+void dlm_rcom_in(struct dlm_rcom *rc)
+{
+	header_in(&rc->rc_header);
+
+	rc->rc_type		= le32_to_cpu(rc->rc_type);
+	rc->rc_result		= le32_to_cpu(rc->rc_result);
+	rc->rc_id		= le64_to_cpu(rc->rc_id);
+	rc->rc_seq		= le64_to_cpu(rc->rc_seq);
+	rc->rc_seq_reply	= le64_to_cpu(rc->rc_seq_reply);
+}
--- a/kernel/fs/dlm/util.h
+++ b/kernel/fs/dlm/util.h
@@ -0,0 +1,22 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef __UTIL_DOT_H__
+#define __UTIL_DOT_H__
+
+void dlm_message_out(struct dlm_message *ms);
+void dlm_message_in(struct dlm_message *ms);
+void dlm_rcom_out(struct dlm_rcom *rc);
+void dlm_rcom_in(struct dlm_rcom *rc);
+
+#endif
+