add idl4k kernel firmware version 1.13.0.105

2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions
--- a/kernel/kernel/.gitignore
+++ b/kernel/kernel/.gitignore
@@ -0,0 +1,6 @@
+#
+# Generated files
+#
+config_data.h
+config_data.gz
+timeconst.h
--- a/kernel/kernel/Kconfig.freezer
+++ b/kernel/kernel/Kconfig.freezer
@@ -0,0 +1,2 @@
+config FREEZER
+	def_bool PM_SLEEP || CGROUP_FREEZER
--- a/kernel/kernel/Kconfig.hz
+++ b/kernel/kernel/Kconfig.hz
@@ -0,0 +1,58 @@
+#
+# Timer Interrupt Frequency Configuration
+#
+
+choice
+	prompt "Timer frequency"
+	default HZ_250
+	help
+	 Allows the configuration of the timer frequency. It is customary
+	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
+	 beneficial for servers and NUMA systems that do not need to have
+	 a fast response for user interaction and that may experience bus
+	 contention and cacheline bounces as a result of timer interrupts.
+	 Note that the timer interrupt occurs on each processor in an SMP
+	 environment leading to NR_CPUS * HZ number of timer interrupts
+	 per second.
+
+
+	config HZ_100
+		bool "100 HZ"
+	help
+	  100 Hz is a typical choice for servers, SMP and NUMA systems
+	  with lots of processors that may show reduced performance if
+	  too many timer interrupts are occurring.
+
+	config HZ_250
+		bool "250 HZ"
+	help
+	 250 Hz is a good compromise choice allowing server performance
+	 while also showing good interactive responsiveness even
+	 on SMP and NUMA systems. If you are going to be using NTSC video
+	 or multimedia, selected 300Hz instead.
+
+	config HZ_300
+		bool "300 HZ"
+	help
+	 300 Hz is a good compromise choice allowing server performance
+	 while also showing good interactive responsiveness even
+	 on SMP and NUMA systems and exactly dividing by both PAL and
+	 NTSC frame rates for video and multimedia work.
+
+	config HZ_1000
+		bool "1000 HZ"
+	help
+	 1000 Hz is the preferred choice for desktop systems and other
+	 systems requiring fast interactive responses to events.
+
+endchoice
+
+config HZ
+	int
+	default 100 if HZ_100
+	default 250 if HZ_250
+	default 300 if HZ_300
+	default 1000 if HZ_1000
+
+config SCHED_HRTICK
+	def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
--- a/kernel/kernel/Kconfig.preempt
+++ b/kernel/kernel/Kconfig.preempt
@@ -0,0 +1,54 @@
+
+choice
+	prompt "Preemption Model"
+	default PREEMPT_NONE
+
+config PREEMPT_NONE
+	bool "No Forced Preemption (Server)"
+	help
+	  This is the traditional Linux preemption model, geared towards
+	  throughput. It will still provide good latencies most of the
+	  time, but there are no guarantees and occasional longer delays
+	  are possible.
+
+	  Select this option if you are building a kernel for a server or
+	  scientific/computation system, or if you want to maximize the
+	  raw processing power of the kernel, irrespective of scheduling
+	  latencies.
+
+config PREEMPT_VOLUNTARY
+	bool "Voluntary Kernel Preemption (Desktop)"
+	help
+	  This option reduces the latency of the kernel by adding more
+	  "explicit preemption points" to the kernel code. These new
+	  preemption points have been selected to reduce the maximum
+	  latency of rescheduling, providing faster application reactions,
+	  at the cost of slightly lower throughput.
+
+	  This allows reaction to interactive events by allowing a
+	  low priority process to voluntarily preempt itself even if it
+	  is in kernel mode executing a system call. This allows
+	  applications to run more 'smoothly' even when the system is
+	  under load.
+
+	  Select this if you are building a kernel for a desktop system.
+
+config PREEMPT
+	bool "Preemptible Kernel (Low-Latency Desktop)"
+	help
+	  This option reduces the latency of the kernel by making
+	  all kernel code (that is not executing in a critical section)
+	  preemptible.  This allows reaction to interactive events by
+	  permitting a low priority process to be preempted involuntarily
+	  even if it is in kernel mode executing a system call and would
+	  otherwise not be about to reach a natural preemption point.
+	  This allows applications to run more 'smoothly' even when the
+	  system is under load, at the cost of slightly lower throughput
+	  and a slight runtime overhead to kernel code.
+
+	  Select this if you are building a kernel for a desktop or
+	  embedded system with latency requirements in the milliseconds
+	  range.
+
+endchoice
+
--- a/kernel/kernel/Makefile
+++ b/kernel/kernel/Makefile
@@ -0,0 +1,129 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
+	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
+	    sysctl.o capability.o ptrace.o timer.o user.o \
+	    signal.o sys.o kmod.o workqueue.o pid.o \
+	    rcupdate.o extable.o params.o posix-timers.o \
+	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
+	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
+	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
+	    async.o
+obj-y += groups.o
+
+ifdef CONFIG_FUNCTION_TRACER
+# Do not trace debug files and internal ftrace files
+CFLAGS_REMOVE_lockdep.o = -pg
+CFLAGS_REMOVE_lockdep_proc.o = -pg
+CFLAGS_REMOVE_mutex-debug.o = -pg
+CFLAGS_REMOVE_rtmutex-debug.o = -pg
+CFLAGS_REMOVE_cgroup-debug.o = -pg
+CFLAGS_REMOVE_sched_clock.o = -pg
+endif
+
+obj-$(CONFIG_FREEZER) += freezer.o
+obj-$(CONFIG_PROFILING) += profile.o
+obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-y += time/
+obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
+obj-$(CONFIG_LOCKDEP) += lockdep.o
+ifeq ($(CONFIG_PROC_FS),y)
+obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
+endif
+obj-$(CONFIG_FUTEX) += futex.o
+ifeq ($(CONFIG_COMPAT),y)
+obj-$(CONFIG_FUTEX) += futex_compat.o
+endif
+obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
+obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
+obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
+obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
+ifneq ($(CONFIG_SMP),y)
+obj-y += up.o
+endif
+obj-$(CONFIG_SMP) += spinlock.o
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+obj-$(CONFIG_UID16) += uid16.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_KALLSYMS) += kallsyms.o
+obj-$(CONFIG_PM) += power/
+obj-$(CONFIG_FREEZER) += power/
+obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
+obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
+obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_CGROUPS) += cgroup.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
+obj-$(CONFIG_CPUSETS) += cpuset.o
+obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
+obj-$(CONFIG_UTS_NS) += utsname.o
+obj-$(CONFIG_USER_NS) += user_namespace.o
+obj-$(CONFIG_PID_NS) += pid_namespace.o
+obj-$(CONFIG_IKCONFIG) += configs.o
+obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
+obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
+obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
+obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_GCOV_KERNEL) += gcov/
+obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_KGDB) += kgdb.o
+obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
+obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
+obj-$(CONFIG_SECCOMP) += seccomp.o
+obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+obj-$(CONFIG_TREE_RCU) += rcutree.o
+obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
+obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
+obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
+obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
+obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
+obj-$(CONFIG_LATENCYTOP) += latencytop.o
+obj-$(CONFIG_FUNCTION_TRACER) += trace/
+obj-$(CONFIG_TRACING) += trace/
+obj-$(CONFIG_X86_DS) += trace/
+obj-$(CONFIG_RING_BUFFER) += trace/
+obj-$(CONFIG_SMP) += sched_cpupri.o
+obj-$(CONFIG_SLOW_WORK) += slow-work.o
+obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+
+ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
+# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
+# needed for x86 only.  Why this used to be enabled for all architectures is beyond
+# me.  I suspect most platforms don't need this, but until we know that for sure
+# I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
+# to get a correct value for the wait-channel (WCHAN in ps). --davidm
+CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
+endif
+
+$(obj)/configs.o: $(obj)/config_data.h
+
+# config_data.h contains the same information as ikconfig.h but gzipped.
+# Info from config_data can be extracted from /proc/config*
+targets += config_data.gz
+$(obj)/config_data.gz: .config FORCE
+	$(call if_changed,gzip)
+
+quiet_cmd_ikconfiggz = IKCFG   $@
+      cmd_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
+targets += config_data.h
+$(obj)/config_data.h: $(obj)/config_data.gz FORCE
+	$(call if_changed,ikconfiggz)
+
+$(obj)/time.o: $(obj)/timeconst.h
+
+quiet_cmd_timeconst  = TIMEC   $@
+      cmd_timeconst  = $(PERL) $< $(CONFIG_HZ) > $@
+targets += timeconst.h
+$(obj)/timeconst.h: $(src)/timeconst.pl FORCE
+	$(call if_changed,timeconst)
--- a/kernel/kernel/acct.c
+++ b/kernel/kernel/acct.c
@@ -0,0 +1,684 @@
+/*
+ *  linux/kernel/acct.c
+ *
+ *  BSD Process Accounting for Linux
+ *
+ *  Author: Marco van Wieringen <mvw@planets.elm.net>
+ *
+ *  Some code based on ideas and code from:
+ *  Thomas K. Dyas <tdyas@eden.rutgers.edu>
+ *
+ *  This file implements BSD-style process accounting. Whenever any
+ *  process exits, an accounting record of type "struct acct" is
+ *  written to the file specified with the acct() system call. It is
+ *  up to user-level programs to do useful things with the accounting
+ *  log. The kernel just provides the raw accounting information.
+ *
+ * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
+ *
+ *  Plugged two leaks. 1) It didn't return acct_file into the free_filps if
+ *  the file happened to be read-only. 2) If the accounting was suspended
+ *  due to the lack of space it happily allowed to reopen it and completely
+ *  lost the old acct_file. 3/10/98, Al Viro.
+ *
+ *  Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
+ *  XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
+ *
+ *  Fixed a nasty interaction with with sys_umount(). If the accointing
+ *  was suspeneded we failed to stop it on umount(). Messy.
+ *  Another one: remount to readonly didn't stop accounting.
+ *	Question: what should we do if we have CAP_SYS_ADMIN but not
+ *  CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
+ *  unless we are messing with the root. In that case we are getting a
+ *  real mess with do_remount_sb(). 9/11/98, AV.
+ *
+ *  Fixed a bunch of races (and pair of leaks). Probably not the best way,
+ *  but this one obviously doesn't introduce deadlocks. Later. BTW, found
+ *  one race (and leak) in BSD implementation.
+ *  OK, that's better. ANOTHER race and leak in BSD variant. There always
+ *  is one more bug... 10/11/98, AV.
+ *
+ *	Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
+ * ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks
+ * a struct file opened for write. Fixed. 2/6/2000, AV.
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/acct.h>
+#include <linux/capability.h>
+#include <linux/file.h>
+#include <linux/tty.h>
+#include <linux/security.h>
+#include <linux/vfs.h>
+#include <linux/jiffies.h>
+#include <linux/times.h>
+#include <linux/syscalls.h>
+#include <linux/mount.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+#include <linux/blkdev.h> /* sector_div */
+#include <linux/pid_namespace.h>
+
+/*
+ * These constants control the amount of freespace that suspend and
+ * resume the process accounting system, and the time delay between
+ * each check.
+ * Turned into sysctl-controllable parameters. AV, 12/11/98
+ */
+
+int acct_parm[3] = {4, 2, 30};
+#define RESUME		(acct_parm[0])	/* >foo% free space - resume */
+#define SUSPEND		(acct_parm[1])	/* <foo% free space - suspend */
+#define ACCT_TIMEOUT	(acct_parm[2])	/* foo second timeout between checks */
+
+/*
+ * External references and all of the globals.
+ */
+static void do_acct_process(struct bsd_acct_struct *acct,
+		struct pid_namespace *ns, struct file *);
+
+/*
+ * This structure is used so that all the data protected by lock
+ * can be placed in the same cache line as the lock.  This primes
+ * the cache line to have the data after getting the lock.
+ */
+struct bsd_acct_struct {
+	volatile int		active;
+	volatile int		needcheck;
+	struct file		*file;
+	struct pid_namespace	*ns;
+	struct timer_list	timer;
+	struct list_head	list;
+};
+
+static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
+
+/*
+ * Called whenever the timer says to check the free space.
+ */
+static void acct_timeout(unsigned long x)
+{
+	struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
+	acct->needcheck = 1;
+}
+
+/*
+ * Check the amount of free space and suspend/resume accordingly.
+ */
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
+{
+	struct kstatfs sbuf;
+	int res;
+	int act;
+	sector_t resume;
+	sector_t suspend;
+
+	spin_lock(&acct_lock);
+	res = acct->active;
+	if (!file || !acct->needcheck)
+		goto out;
+	spin_unlock(&acct_lock);
+
+	/* May block */
+	if (vfs_statfs(file->f_path.dentry, &sbuf))
+		return res;
+	suspend = sbuf.f_blocks * SUSPEND;
+	resume = sbuf.f_blocks * RESUME;
+
+	sector_div(suspend, 100);
+	sector_div(resume, 100);
+
+	if (sbuf.f_bavail <= suspend)
+		act = -1;
+	else if (sbuf.f_bavail >= resume)
+		act = 1;
+	else
+		act = 0;
+
+	/*
+	 * If some joker switched acct->file under us we'ld better be
+	 * silent and _not_ touch anything.
+	 */
+	spin_lock(&acct_lock);
+	if (file != acct->file) {
+		if (act)
+			res = act>0;
+		goto out;
+	}
+
+	if (acct->active) {
+		if (act < 0) {
+			acct->active = 0;
+			printk(KERN_INFO "Process accounting paused\n");
+		}
+	} else {
+		if (act > 0) {
+			acct->active = 1;
+			printk(KERN_INFO "Process accounting resumed\n");
+		}
+	}
+
+	del_timer(&acct->timer);
+	acct->needcheck = 0;
+	acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+	add_timer(&acct->timer);
+	res = acct->active;
+out:
+	spin_unlock(&acct_lock);
+	return res;
+}
+
+/*
+ * Close the old accounting file (if currently open) and then replace
+ * it with file (if non-NULL).
+ *
+ * NOTE: acct_lock MUST be held on entry and exit.
+ */
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+		struct pid_namespace *ns)
+{
+	struct file *old_acct = NULL;
+	struct pid_namespace *old_ns = NULL;
+
+	if (acct->file) {
+		old_acct = acct->file;
+		old_ns = acct->ns;
+		del_timer(&acct->timer);
+		acct->active = 0;
+		acct->needcheck = 0;
+		acct->file = NULL;
+		acct->ns = NULL;
+		list_del(&acct->list);
+	}
+	if (file) {
+		acct->file = file;
+		acct->ns = ns;
+		acct->needcheck = 0;
+		acct->active = 1;
+		list_add(&acct->list, &acct_list);
+		/* It's been deleted if it was used before so this is safe */
+		setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
+		acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+		add_timer(&acct->timer);
+	}
+	if (old_acct) {
+		mnt_unpin(old_acct->f_path.mnt);
+		spin_unlock(&acct_lock);
+		do_acct_process(acct, old_ns, old_acct);
+		filp_close(old_acct, NULL);
+		spin_lock(&acct_lock);
+	}
+}
+
+static int acct_on(char *name)
+{
+	struct file *file;
+	struct vfsmount *mnt;
+	int error;
+	struct pid_namespace *ns;
+	struct bsd_acct_struct *acct = NULL;
+
+	/* Difference from BSD - they don't do O_APPEND */
+	file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) {
+		filp_close(file, NULL);
+		return -EACCES;
+	}
+
+	if (!file->f_op->write) {
+		filp_close(file, NULL);
+		return -EIO;
+	}
+
+	ns = task_active_pid_ns(current);
+	if (ns->bacct == NULL) {
+		acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+		if (acct == NULL) {
+			filp_close(file, NULL);
+			return -ENOMEM;
+		}
+	}
+
+	error = security_acct(file);
+	if (error) {
+		kfree(acct);
+		filp_close(file, NULL);
+		return error;
+	}
+
+	spin_lock(&acct_lock);
+	if (ns->bacct == NULL) {
+		ns->bacct = acct;
+		acct = NULL;
+	}
+
+	mnt = file->f_path.mnt;
+	mnt_pin(mnt);
+	acct_file_reopen(ns->bacct, file, ns);
+	spin_unlock(&acct_lock);
+
+	mntput(mnt); /* it's pinned, now give up active reference */
+	kfree(acct);
+
+	return 0;
+}
+
+/**
+ * sys_acct - enable/disable process accounting
+ * @name: file name for accounting records or NULL to shutdown accounting
+ *
+ * Returns 0 for success or negative errno values for failure.
+ *
+ * sys_acct() is the only system call needed to implement process
+ * accounting. It takes the name of the file where accounting records
+ * should be written. If the filename is NULL, accounting will be
+ * shutdown.
+ */
+SYSCALL_DEFINE1(acct, const char __user *, name)
+{
+	int error;
+
+	if (!capable(CAP_SYS_PACCT))
+		return -EPERM;
+
+	if (name) {
+		char *tmp = getname(name);
+		if (IS_ERR(tmp))
+			return (PTR_ERR(tmp));
+		error = acct_on(tmp);
+		putname(tmp);
+	} else {
+		struct bsd_acct_struct *acct;
+
+		acct = task_active_pid_ns(current)->bacct;
+		if (acct == NULL)
+			return 0;
+
+		error = security_acct(NULL);
+		if (!error) {
+			spin_lock(&acct_lock);
+			acct_file_reopen(acct, NULL, NULL);
+			spin_unlock(&acct_lock);
+		}
+	}
+	return error;
+}
+
+/**
+ * acct_auto_close - turn off a filesystem's accounting if it is on
+ * @m: vfsmount being shut down
+ *
+ * If the accounting is turned on for a file in the subtree pointed to
+ * to by m, turn accounting off.  Done when m is about to die.
+ */
+void acct_auto_close_mnt(struct vfsmount *m)
+{
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+restart:
+	list_for_each_entry(acct, &acct_list, list)
+		if (acct->file && acct->file->f_path.mnt == m) {
+			acct_file_reopen(acct, NULL, NULL);
+			goto restart;
+		}
+	spin_unlock(&acct_lock);
+}
+
+/**
+ * acct_auto_close - turn off a filesystem's accounting if it is on
+ * @sb: super block for the filesystem
+ *
+ * If the accounting is turned on for a file in the filesystem pointed
+ * to by sb, turn accounting off.
+ */
+void acct_auto_close(struct super_block *sb)
+{
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+restart:
+	list_for_each_entry(acct, &acct_list, list)
+		if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+			acct_file_reopen(acct, NULL, NULL);
+			goto restart;
+		}
+	spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+	acct = ns->bacct;
+	if (acct != NULL) {
+		if (acct->file != NULL)
+			acct_file_reopen(acct, NULL, NULL);
+
+		kfree(acct);
+	}
+	spin_unlock(&acct_lock);
+}
+
+/*
+ *  encode an unsigned long into a comp_t
+ *
+ *  This routine has been adopted from the encode_comp_t() function in
+ *  the kern_acct.c file of the FreeBSD operating system. The encoding
+ *  is a 13-bit fraction with a 3-bit (base 8) exponent.
+ */
+
+#define	MANTSIZE	13			/* 13 bit mantissa. */
+#define	EXPSIZE		3			/* Base 8 (3 bit) exponent. */
+#define	MAXFRACT	((1 << MANTSIZE) - 1)	/* Maximum fractional value. */
+
+static comp_t encode_comp_t(unsigned long value)
+{
+	int exp, rnd;
+
+	exp = rnd = 0;
+	while (value > MAXFRACT) {
+		rnd = value & (1 << (EXPSIZE - 1));	/* Round up? */
+		value >>= EXPSIZE;	/* Base 8 exponent == 3 bit shift. */
+		exp++;
+	}
+
+	/*
+	 * If we need to round up, do it (and handle overflow correctly).
+	 */
+	if (rnd && (++value > MAXFRACT)) {
+		value >>= EXPSIZE;
+		exp++;
+	}
+
+	/*
+	 * Clean it up and polish it off.
+	 */
+	exp <<= MANTSIZE;		/* Shift the exponent into place */
+	exp += value;			/* and add on the mantissa. */
+	return exp;
+}
+
+#if ACCT_VERSION==1 || ACCT_VERSION==2
+/*
+ * encode an u64 into a comp2_t (24 bits)
+ *
+ * Format: 5 bit base 2 exponent, 20 bits mantissa.
+ * The leading bit of the mantissa is not stored, but implied for
+ * non-zero exponents.
+ * Largest encodable value is 50 bits.
+ */
+
+#define MANTSIZE2       20                      /* 20 bit mantissa. */
+#define EXPSIZE2        5                       /* 5 bit base 2 exponent. */
+#define MAXFRACT2       ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
+#define MAXEXP2         ((1 <<EXPSIZE2) - 1)    /* Maximum exponent. */
+
+static comp2_t encode_comp2_t(u64 value)
+{
+	int exp, rnd;
+
+	exp = (value > (MAXFRACT2>>1));
+	rnd = 0;
+	while (value > MAXFRACT2) {
+		rnd = value & 1;
+		value >>= 1;
+		exp++;
+	}
+
+	/*
+	 * If we need to round up, do it (and handle overflow correctly).
+	 */
+	if (rnd && (++value > MAXFRACT2)) {
+		value >>= 1;
+		exp++;
+	}
+
+	if (exp > MAXEXP2) {
+		/* Overflow. Return largest representable number instead. */
+		return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1;
+	} else {
+		return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1));
+	}
+}
+#endif
+
+#if ACCT_VERSION==3
+/*
+ * encode an u64 into a 32 bit IEEE float
+ */
+static u32 encode_float(u64 value)
+{
+	unsigned exp = 190;
+	unsigned u;
+
+	if (value==0) return 0;
+	while ((s64)value > 0){
+		value <<= 1;
+		exp--;
+	}
+	u = (u32)(value >> 40) & 0x7fffffu;
+	return u | (exp << 23);
+}
+#endif
+
+/*
+ *  Write an accounting entry for an exiting process
+ *
+ *  The acct_process() call is the workhorse of the process
+ *  accounting system. The struct acct is built here and then written
+ *  into the accounting file. This function should only be called from
+ *  do_exit() or when switching to a different output file.
+ */
+
+/*
+ *  do_acct_process does all actual work. Caller holds the reference to file.
+ */
+static void do_acct_process(struct bsd_acct_struct *acct,
+		struct pid_namespace *ns, struct file *file)
+{
+	struct pacct_struct *pacct = &current->signal->pacct;
+	acct_t ac;
+	mm_segment_t fs;
+	unsigned long flim;
+	u64 elapsed;
+	u64 run_time;
+	struct timespec uptime;
+	struct tty_struct *tty;
+	const struct cred *orig_cred;
+
+	/* Perform file operations on behalf of whoever enabled accounting */
+	orig_cred = override_creds(file->f_cred);
+
+	/*
+	 * First check to see if there is enough free_space to continue
+	 * the process accounting system.
+	 */
+	if (!check_free_space(acct, file))
+		goto out;
+
+	/*
+	 * Fill the accounting struct with the needed info as recorded
+	 * by the different kernel functions.
+	 */
+	memset((caddr_t)&ac, 0, sizeof(acct_t));
+
+	ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER;
+	strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm));
+
+	/* calculate run_time in nsec*/
+	do_posix_clock_monotonic_gettime(&uptime);
+	run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
+	run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC
+		       + current->group_leader->start_time.tv_nsec;
+	/* convert nsec -> AHZ */
+	elapsed = nsec_to_AHZ(run_time);
+#if ACCT_VERSION==3
+	ac.ac_etime = encode_float(elapsed);
+#else
+	ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
+	                       (unsigned long) elapsed : (unsigned long) -1l);
+#endif
+#if ACCT_VERSION==1 || ACCT_VERSION==2
+	{
+		/* new enlarged etime field */
+		comp2_t etime = encode_comp2_t(elapsed);
+		ac.ac_etime_hi = etime >> 16;
+		ac.ac_etime_lo = (u16) etime;
+	}
+#endif
+	do_div(elapsed, AHZ);
+	ac.ac_btime = get_seconds() - elapsed;
+	/* we really need to bite the bullet and change layout */
+	ac.ac_uid = orig_cred->uid;
+	ac.ac_gid = orig_cred->gid;
+#if ACCT_VERSION==2
+	ac.ac_ahz = AHZ;
+#endif
+#if ACCT_VERSION==1 || ACCT_VERSION==2
+	/* backward-compatible 16 bit fields */
+	ac.ac_uid16 = ac.ac_uid;
+	ac.ac_gid16 = ac.ac_gid;
+#endif
+#if ACCT_VERSION==3
+	ac.ac_pid = task_tgid_nr_ns(current, ns);
+	rcu_read_lock();
+	ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
+	rcu_read_unlock();
+#endif
+
+	spin_lock_irq(&current->sighand->siglock);
+	tty = current->signal->tty;	/* Safe as we hold the siglock */
+	ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
+	ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
+	ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
+	ac.ac_flag = pacct->ac_flag;
+	ac.ac_mem = encode_comp_t(pacct->ac_mem);
+	ac.ac_minflt = encode_comp_t(pacct->ac_minflt);
+	ac.ac_majflt = encode_comp_t(pacct->ac_majflt);
+	ac.ac_exitcode = pacct->ac_exitcode;
+	spin_unlock_irq(&current->sighand->siglock);
+	ac.ac_io = encode_comp_t(0 /* current->io_usage */);	/* %% */
+	ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
+	ac.ac_swaps = encode_comp_t(0);
+
+	/*
+	 * Kernel segment override to datasegment and write it
+	 * to the accounting file.
+	 */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	/*
+	 * Accounting records are not subject to resource limits.
+	 */
+	flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+	file->f_op->write(file, (char *)&ac,
+			       sizeof(acct_t), &file->f_pos);
+	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
+	set_fs(fs);
+out:
+	revert_creds(orig_cred);
+}
+
+/**
+ * acct_init_pacct - initialize a new pacct_struct
+ * @pacct: per-process accounting info struct to initialize
+ */
+void acct_init_pacct(struct pacct_struct *pacct)
+{
+	memset(pacct, 0, sizeof(struct pacct_struct));
+	pacct->ac_utime = pacct->ac_stime = cputime_zero;
+}
+
+/**
+ * acct_collect - collect accounting information into pacct_struct
+ * @exitcode: task exit code
+ * @group_dead: not 0, if this thread is the last one in the process.
+ */
+void acct_collect(long exitcode, int group_dead)
+{
+	struct pacct_struct *pacct = &current->signal->pacct;
+	unsigned long vsize = 0;
+
+	if (group_dead && current->mm) {
+		struct vm_area_struct *vma;
+		down_read(&current->mm->mmap_sem);
+		vma = current->mm->mmap;
+		while (vma) {
+			vsize += vma->vm_end - vma->vm_start;
+			vma = vma->vm_next;
+		}
+		up_read(&current->mm->mmap_sem);
+	}
+
+	spin_lock_irq(&current->sighand->siglock);
+	if (group_dead)
+		pacct->ac_mem = vsize / 1024;
+	if (thread_group_leader(current)) {
+		pacct->ac_exitcode = exitcode;
+		if (current->flags & PF_FORKNOEXEC)
+			pacct->ac_flag |= AFORK;
+	}
+	if (current->flags & PF_SUPERPRIV)
+		pacct->ac_flag |= ASU;
+	if (current->flags & PF_DUMPCORE)
+		pacct->ac_flag |= ACORE;
+	if (current->flags & PF_SIGNALED)
+		pacct->ac_flag |= AXSIG;
+	pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
+	pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
+	pacct->ac_minflt += current->min_flt;
+	pacct->ac_majflt += current->maj_flt;
+	spin_unlock_irq(&current->sighand->siglock);
+}
+
+static void acct_process_in_ns(struct pid_namespace *ns)
+{
+	struct file *file = NULL;
+	struct bsd_acct_struct *acct;
+
+	acct = ns->bacct;
+	/*
+	 * accelerate the common fastpath:
+	 */
+	if (!acct || !acct->file)
+		return;
+
+	spin_lock(&acct_lock);
+	file = acct->file;
+	if (unlikely(!file)) {
+		spin_unlock(&acct_lock);
+		return;
+	}
+	get_file(file);
+	spin_unlock(&acct_lock);
+
+	do_acct_process(acct, ns, file);
+	fput(file);
+}
+
+/**
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+	struct pid_namespace *ns;
+
+	/*
+	 * This loop is safe lockless, since current is still
+	 * alive and holds its namespace, which in turn holds
+	 * its parent.
+	 */
+	for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+		acct_process_in_ns(ns);
+}
--- a/kernel/kernel/async.c
+++ b/kernel/kernel/async.c
@@ -0,0 +1,397 @@
+/*
+ * async.c: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+
+/*
+
+Goals and Theory of Operation
+
+The primary goal of this feature is to reduce the kernel boot time,
+by doing various independent hardware delays and discovery operations
+decoupled and not strictly serialized.
+
+More specifically, the asynchronous function call concept allows
+certain operations (primarily during system boot) to happen
+asynchronously, out of order, while these operations still
+have their externally visible parts happen sequentially and in-order.
+(not unlike how out-of-order CPUs retire their instructions in order)
+
+Key to the asynchronous function call implementation is the concept of
+a "sequence cookie" (which, although it has an abstracted type, can be
+thought of as a monotonically incrementing number).
+
+The async core will assign each scheduled event such a sequence cookie and
+pass this to the called functions.
+
+The asynchronously called function should before doing a globally visible
+operation, such as registering device numbers, call the
+async_synchronize_cookie() function and pass in its own cookie. The
+async_synchronize_cookie() function will make sure that all asynchronous
+operations that were scheduled prior to the operation corresponding with the
+cookie have completed.
+
+Subsystem/driver initialization code that scheduled asynchronous probe
+functions, but which shares global resources with other drivers/subsystems
+that do not use the asynchronous call feature, need to do a full
+synchronization with the async_synchronize_full() function, before returning
+from their init function. This is to maintain strict ordering between the
+asynchronous and synchronous parts of the kernel.
+
+*/
+
+#include <linux/async.h>
+#include <linux/bug.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <asm/atomic.h>
+
+static async_cookie_t next_cookie = 1;
+
+#define MAX_THREADS	256
+#define MAX_WORK	32768
+
+static LIST_HEAD(async_pending);
+static LIST_HEAD(async_running);
+static DEFINE_SPINLOCK(async_lock);
+
+static int async_enabled = 0;
+
+struct async_entry {
+	struct list_head list;
+	async_cookie_t   cookie;
+	async_func_ptr	 *func;
+	void             *data;
+	struct list_head *running;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(async_done);
+static DECLARE_WAIT_QUEUE_HEAD(async_new);
+
+static atomic_t entry_count;
+static atomic_t thread_count;
+
+extern int initcall_debug;
+
+
+/*
+ * MUST be called with the lock held!
+ */
+static async_cookie_t  __lowest_in_progress(struct list_head *running)
+{
+	struct async_entry *entry;
+
+	if (!list_empty(running)) {
+		entry = list_first_entry(running,
+			struct async_entry, list);
+		return entry->cookie;
+	}
+
+	list_for_each_entry(entry, &async_pending, list)
+		if (entry->running == running)
+			return entry->cookie;
+
+	return next_cookie;	/* "infinity" value */
+}
+
+static async_cookie_t  lowest_in_progress(struct list_head *running)
+{
+	unsigned long flags;
+	async_cookie_t ret;
+
+	spin_lock_irqsave(&async_lock, flags);
+	ret = __lowest_in_progress(running);
+	spin_unlock_irqrestore(&async_lock, flags);
+	return ret;
+}
+/*
+ * pick the first pending entry and run it
+ */
+static void run_one_entry(void)
+{
+	unsigned long flags;
+	struct async_entry *entry;
+	ktime_t calltime, delta, rettime;
+
+	/* 1) pick one task from the pending queue */
+
+	spin_lock_irqsave(&async_lock, flags);
+	if (list_empty(&async_pending))
+		goto out;
+	entry = list_first_entry(&async_pending, struct async_entry, list);
+
+	/* 2) move it to the running queue */
+	list_move_tail(&entry->list, entry->running);
+	spin_unlock_irqrestore(&async_lock, flags);
+
+	/* 3) run it (and print duration)*/
+	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+		printk("calling  %lli_%pF @ %i\n", (long long)entry->cookie,
+			entry->func, task_pid_nr(current));
+		calltime = ktime_get();
+	}
+	entry->func(entry->data, entry->cookie);
+	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		printk("initcall %lli_%pF returned 0 after %lld usecs\n",
+			(long long)entry->cookie,
+			entry->func,
+			(long long)ktime_to_ns(delta) >> 10);
+	}
+
+	/* 4) remove it from the running queue */
+	spin_lock_irqsave(&async_lock, flags);
+	list_del(&entry->list);
+
+	/* 5) free the entry  */
+	kfree(entry);
+	atomic_dec(&entry_count);
+
+	spin_unlock_irqrestore(&async_lock, flags);
+
+	/* 6) wake up any waiters. */
+	wake_up(&async_done);
+	return;
+
+out:
+	spin_unlock_irqrestore(&async_lock, flags);
+}
+
+
+static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running)
+{
+	struct async_entry *entry;
+	unsigned long flags;
+	async_cookie_t newcookie;
+	
+
+	/* allow irq-off callers */
+	entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
+
+	/*
+	 * If we're out of memory or if there's too much work
+	 * pending already, we execute synchronously.
+	 */
+	if (!async_enabled || !entry || atomic_read(&entry_count) > MAX_WORK) {
+		kfree(entry);
+		spin_lock_irqsave(&async_lock, flags);
+		newcookie = next_cookie++;
+		spin_unlock_irqrestore(&async_lock, flags);
+
+		/* low on memory.. run synchronously */
+		ptr(data, newcookie);
+		return newcookie;
+	}
+	entry->func = ptr;
+	entry->data = data;
+	entry->running = running;
+
+	spin_lock_irqsave(&async_lock, flags);
+	newcookie = entry->cookie = next_cookie++;
+	list_add_tail(&entry->list, &async_pending);
+	atomic_inc(&entry_count);
+	spin_unlock_irqrestore(&async_lock, flags);
+	wake_up(&async_new);
+	return newcookie;
+}
+
+/**
+ * async_schedule - schedule a function for asynchronous execution
+ * @ptr: function to execute asynchronously
+ * @data: data pointer to pass to the function
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * Note: This function may be called from atomic or non-atomic contexts.
+ */
+async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
+{
+	return __async_schedule(ptr, data, &async_running);
+}
+EXPORT_SYMBOL_GPL(async_schedule);
+
+/**
+ * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
+ * @ptr: function to execute asynchronously
+ * @data: data pointer to pass to the function
+ * @running: running list for the domain
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * @running may be used in the async_synchronize_*_domain() functions
+ * to wait within a certain synchronization domain rather than globally.
+ * A synchronization domain is specified via the running queue @running to use.
+ * Note: This function may be called from atomic or non-atomic contexts.
+ */
+async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
+				     struct list_head *running)
+{
+	return __async_schedule(ptr, data, running);
+}
+EXPORT_SYMBOL_GPL(async_schedule_domain);
+
+/**
+ * async_synchronize_full - synchronize all asynchronous function calls
+ *
+ * This function waits until all asynchronous function calls have been done.
+ */
+void async_synchronize_full(void)
+{
+	do {
+		async_synchronize_cookie(next_cookie);
+	} while (!list_empty(&async_running) || !list_empty(&async_pending));
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full);
+
+/**
+ * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
+ * @list: running list to synchronize on
+ *
+ * This function waits until all asynchronous function calls for the
+ * synchronization domain specified by the running list @list have been done.
+ */
+void async_synchronize_full_domain(struct list_head *list)
+{
+	async_synchronize_cookie_domain(next_cookie, list);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
+
+/**
+ * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
+ * @cookie: async_cookie_t to use as checkpoint
+ * @running: running list to synchronize on
+ *
+ * This function waits until all asynchronous function calls for the
+ * synchronization domain specified by the running list @list submitted
+ * prior to @cookie have been done.
+ */
+void async_synchronize_cookie_domain(async_cookie_t cookie,
+				     struct list_head *running)
+{
+	ktime_t starttime, delta, endtime;
+
+	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+		printk("async_waiting @ %i\n", task_pid_nr(current));
+		starttime = ktime_get();
+	}
+
+	wait_event(async_done, lowest_in_progress(running) >= cookie);
+
+	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+		endtime = ktime_get();
+		delta = ktime_sub(endtime, starttime);
+
+		printk("async_continuing @ %i after %lli usec\n",
+			task_pid_nr(current),
+			(long long)ktime_to_ns(delta) >> 10);
+	}
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
+
+/**
+ * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing
+ * @cookie: async_cookie_t to use as checkpoint
+ *
+ * This function waits until all asynchronous function calls prior to @cookie
+ * have been done.
+ */
+void async_synchronize_cookie(async_cookie_t cookie)
+{
+	async_synchronize_cookie_domain(cookie, &async_running);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+
+static int async_thread(void *unused)
+{
+	DECLARE_WAITQUEUE(wq, current);
+	add_wait_queue(&async_new, &wq);
+
+	while (!kthread_should_stop()) {
+		int ret = HZ;
+		set_current_state(TASK_INTERRUPTIBLE);
+		/*
+		 * check the list head without lock.. false positives
+		 * are dealt with inside run_one_entry() while holding
+		 * the lock.
+		 */
+		rmb();
+		if (!list_empty(&async_pending))
+			run_one_entry();
+		else
+			ret = schedule_timeout(HZ);
+
+		if (ret == 0) {
+			/*
+			 * we timed out, this means we as thread are redundant.
+			 * we sign off and die, but we to avoid any races there
+			 * is a last-straw check to see if work snuck in.
+			 */
+			atomic_dec(&thread_count);
+			wmb(); /* manager must see our departure first */
+			if (list_empty(&async_pending))
+				break;
+			/*
+			 * woops work came in between us timing out and us
+			 * signing off; we need to stay alive and keep working.
+			 */
+			atomic_inc(&thread_count);
+		}
+	}
+	remove_wait_queue(&async_new, &wq);
+
+	return 0;
+}
+
+static int async_manager_thread(void *unused)
+{
+	DECLARE_WAITQUEUE(wq, current);
+	add_wait_queue(&async_new, &wq);
+
+	while (!kthread_should_stop()) {
+		int tc, ec;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		tc = atomic_read(&thread_count);
+		rmb();
+		ec = atomic_read(&entry_count);
+
+		while (tc < ec && tc < MAX_THREADS) {
+			if (IS_ERR(kthread_run(async_thread, NULL, "async/%i",
+					       tc))) {
+				msleep(100);
+				continue;
+			}
+			atomic_inc(&thread_count);
+			tc++;
+		}
+
+		schedule();
+	}
+	remove_wait_queue(&async_new, &wq);
+
+	return 0;
+}
+
+static int __init async_init(void)
+{
+	async_enabled =
+		!IS_ERR(kthread_run(async_manager_thread, NULL, "async/mgr"));
+
+	WARN_ON(!async_enabled);
+	return 0;
+}
+
+core_initcall(async_init);
--- a/kernel/kernel/audit.c
+++ b/kernel/kernel/audit.c
--- a/kernel/kernel/audit.h
+++ b/kernel/kernel/audit.h
@@ -0,0 +1,167 @@
+/* audit -- definition of audit_context structure and supporting types 
+ *
+ * Copyright 2003-2004 Red Hat, Inc.
+ * Copyright 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright 2005 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/fs.h>
+#include <linux/audit.h>
+#include <linux/skbuff.h>
+
+/* 0 = no checking
+   1 = put_count checking
+   2 = verbose put_count checking
+*/
+#define AUDIT_DEBUG 0
+
+/* At task start time, the audit_state is set in the audit_context using
+   a per-task filter.  At syscall entry, the audit_state is augmented by
+   the syscall filter. */
+enum audit_state {
+	AUDIT_DISABLED,		/* Do not create per-task audit_context.
+				 * No syscall-specific audit records can
+				 * be generated. */
+	AUDIT_SETUP_CONTEXT,	/* Create the per-task audit_context,
+				 * but don't necessarily fill it in at
+				 * syscall entry time (i.e., filter
+				 * instead). */
+	AUDIT_BUILD_CONTEXT,	/* Create the per-task audit_context,
+				 * and always fill it in at syscall
+				 * entry time.  This makes a full
+				 * syscall record available if some
+				 * other part of the kernel decides it
+				 * should be recorded. */
+	AUDIT_RECORD_CONTEXT	/* Create the per-task audit_context,
+				 * always fill it in at syscall entry
+				 * time, and always write out the audit
+				 * record at syscall exit time.  */
+};
+
+/* Rule lists */
+struct audit_watch;
+struct audit_tree;
+struct audit_chunk;
+
+struct audit_entry {
+	struct list_head	list;
+	struct rcu_head		rcu;
+	struct audit_krule	rule;
+};
+
+#ifdef CONFIG_AUDIT
+extern int audit_enabled;
+extern int audit_ever_enabled;
+#endif
+
+extern int audit_pid;
+
+#define AUDIT_INODE_BUCKETS	32
+extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+
+static inline int audit_hash_ino(u32 ino)
+{
+	return (ino & (AUDIT_INODE_BUCKETS-1));
+}
+
+extern int audit_match_class(int class, unsigned syscall);
+extern int audit_comparator(const u32 left, const u32 op, const u32 right);
+extern int audit_compare_dname_path(const char *dname, const char *path,
+				    int *dirlen);
+extern struct sk_buff *	    audit_make_reply(int pid, int seq, int type,
+					     int done, int multi,
+					     void *payload, int size);
+extern void		    audit_send_reply(int pid, int seq, int type,
+					     int done, int multi,
+					     void *payload, int size);
+extern void		    audit_panic(const char *message);
+
+struct audit_netlink_list {
+	int pid;
+	struct sk_buff_head q;
+};
+
+int audit_send_list(void *);
+
+extern int selinux_audit_rule_update(void);
+
+extern struct mutex audit_filter_mutex;
+extern void audit_free_rule_rcu(struct rcu_head *);
+extern struct list_head audit_filter_list[];
+
+/* audit watch functions */
+extern unsigned long audit_watch_inode(struct audit_watch *watch);
+extern dev_t audit_watch_dev(struct audit_watch *watch);
+extern void audit_put_watch(struct audit_watch *watch);
+extern void audit_get_watch(struct audit_watch *watch);
+extern int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op);
+extern int audit_add_watch(struct audit_krule *krule);
+extern void audit_remove_watch(struct audit_watch *watch);
+extern void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list);
+extern void audit_inotify_unregister(struct list_head *in_list);
+extern char *audit_watch_path(struct audit_watch *watch);
+extern struct list_head *audit_watch_rules(struct audit_watch *watch);
+
+extern struct audit_entry *audit_dupe_rule(struct audit_krule *old,
+					   struct audit_watch *watch);
+
+#ifdef CONFIG_AUDIT_TREE
+extern struct audit_chunk *audit_tree_lookup(const struct inode *);
+extern void audit_put_chunk(struct audit_chunk *);
+extern int audit_tree_match(struct audit_chunk *, struct audit_tree *);
+extern int audit_make_tree(struct audit_krule *, char *, u32);
+extern int audit_add_tree_rule(struct audit_krule *);
+extern int audit_remove_tree_rule(struct audit_krule *);
+extern void audit_trim_trees(void);
+extern int audit_tag_tree(char *old, char *new);
+extern const char *audit_tree_path(struct audit_tree *);
+extern void audit_put_tree(struct audit_tree *);
+extern void audit_kill_trees(struct list_head *);
+#else
+#define audit_remove_tree_rule(rule) BUG()
+#define audit_add_tree_rule(rule) -EINVAL
+#define audit_make_tree(rule, str, op) -EINVAL
+#define audit_trim_trees() (void)0
+#define audit_put_tree(tree) (void)0
+#define audit_tag_tree(old, new) -EINVAL
+#define audit_tree_path(rule) ""	/* never called */
+#define audit_kill_trees(list) BUG()
+#endif
+
+extern char *audit_unpack_string(void **, size_t *, size_t);
+
+extern pid_t audit_sig_pid;
+extern uid_t audit_sig_uid;
+extern u32 audit_sig_sid;
+
+#ifdef CONFIG_AUDITSYSCALL
+extern int __audit_signal_info(int sig, struct task_struct *t);
+static inline int audit_signal_info(int sig, struct task_struct *t)
+{
+	if (unlikely((audit_pid && t->tgid == audit_pid) ||
+		     (audit_signals && !audit_dummy_context())))
+		return __audit_signal_info(sig, t);
+	return 0;
+}
+extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
+extern struct list_head *audit_killed_trees(void);
+#else
+#define audit_signal_info(s,t) AUDIT_DISABLED
+#define audit_filter_inodes(t,c) AUDIT_DISABLED
+#endif
+
+extern struct mutex audit_cmd_mutex;
--- a/kernel/kernel/audit_tree.c
+++ b/kernel/kernel/audit_tree.c
@@ -0,0 +1,966 @@
+#include "audit.h"
+#include <linux/inotify.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/kthread.h>
+
+struct audit_tree;
+struct audit_chunk;
+
+struct audit_tree {
+	atomic_t count;
+	int goner;
+	struct audit_chunk *root;
+	struct list_head chunks;
+	struct list_head rules;
+	struct list_head list;
+	struct list_head same_root;
+	struct rcu_head head;
+	char pathname[];
+};
+
+struct audit_chunk {
+	struct list_head hash;
+	struct inotify_watch watch;
+	struct list_head trees;		/* with root here */
+	int dead;
+	int count;
+	atomic_long_t refs;
+	struct rcu_head head;
+	struct node {
+		struct list_head list;
+		struct audit_tree *owner;
+		unsigned index;		/* index; upper bit indicates 'will prune' */
+	} owners[];
+};
+
+static LIST_HEAD(tree_list);
+static LIST_HEAD(prune_list);
+
+/*
+ * One struct chunk is attached to each inode of interest.
+ * We replace struct chunk on tagging/untagging.
+ * Rules have pointer to struct audit_tree.
+ * Rules have struct list_head rlist forming a list of rules over
+ * the same tree.
+ * References to struct chunk are collected at audit_inode{,_child}()
+ * time and used in AUDIT_TREE rule matching.
+ * These references are dropped at the same time we are calling
+ * audit_free_names(), etc.
+ *
+ * Cyclic lists galore:
+ * tree.chunks anchors chunk.owners[].list			hash_lock
+ * tree.rules anchors rule.rlist				audit_filter_mutex
+ * chunk.trees anchors tree.same_root				hash_lock
+ * chunk.hash is a hash with middle bits of watch.inode as
+ * a hash function.						RCU, hash_lock
+ *
+ * tree is refcounted; one reference for "some rules on rules_list refer to
+ * it", one for each chunk with pointer to it.
+ *
+ * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount
+ * of watch contributes 1 to .refs).
+ *
+ * node.index allows to get from node.list to containing chunk.
+ * MSB of that sucker is stolen to mark taggings that we might have to
+ * revert - several operations have very unpleasant cleanup logics and
+ * that makes a difference.  Some.
+ */
+
+static struct inotify_handle *rtree_ih;
+
+static struct audit_tree *alloc_tree(const char *s)
+{
+	struct audit_tree *tree;
+
+	tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
+	if (tree) {
+		atomic_set(&tree->count, 1);
+		tree->goner = 0;
+		INIT_LIST_HEAD(&tree->chunks);
+		INIT_LIST_HEAD(&tree->rules);
+		INIT_LIST_HEAD(&tree->list);
+		INIT_LIST_HEAD(&tree->same_root);
+		tree->root = NULL;
+		strcpy(tree->pathname, s);
+	}
+	return tree;
+}
+
+static inline void get_tree(struct audit_tree *tree)
+{
+	atomic_inc(&tree->count);
+}
+
+static void __put_tree(struct rcu_head *rcu)
+{
+	struct audit_tree *tree = container_of(rcu, struct audit_tree, head);
+	kfree(tree);
+}
+
+static inline void put_tree(struct audit_tree *tree)
+{
+	if (atomic_dec_and_test(&tree->count))
+		call_rcu(&tree->head, __put_tree);
+}
+
+/* to avoid bringing the entire thing in audit.h */
+const char *audit_tree_path(struct audit_tree *tree)
+{
+	return tree->pathname;
+}
+
+static struct audit_chunk *alloc_chunk(int count)
+{
+	struct audit_chunk *chunk;
+	size_t size;
+	int i;
+
+	size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
+	chunk = kzalloc(size, GFP_KERNEL);
+	if (!chunk)
+		return NULL;
+
+	INIT_LIST_HEAD(&chunk->hash);
+	INIT_LIST_HEAD(&chunk->trees);
+	chunk->count = count;
+	atomic_long_set(&chunk->refs, 1);
+	for (i = 0; i < count; i++) {
+		INIT_LIST_HEAD(&chunk->owners[i].list);
+		chunk->owners[i].index = i;
+	}
+	inotify_init_watch(&chunk->watch);
+	return chunk;
+}
+
+static void free_chunk(struct audit_chunk *chunk)
+{
+	int i;
+
+	for (i = 0; i < chunk->count; i++) {
+		if (chunk->owners[i].owner)
+			put_tree(chunk->owners[i].owner);
+	}
+	kfree(chunk);
+}
+
+void audit_put_chunk(struct audit_chunk *chunk)
+{
+	if (atomic_long_dec_and_test(&chunk->refs))
+		free_chunk(chunk);
+}
+
+static void __put_chunk(struct rcu_head *rcu)
+{
+	struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
+	audit_put_chunk(chunk);
+}
+
+enum {HASH_SIZE = 128};
+static struct list_head chunk_hash_heads[HASH_SIZE];
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
+
+static inline struct list_head *chunk_hash(const struct inode *inode)
+{
+	unsigned long n = (unsigned long)inode / L1_CACHE_BYTES;
+	return chunk_hash_heads + n % HASH_SIZE;
+}
+
+/* hash_lock is held by caller */
+static void insert_hash(struct audit_chunk *chunk)
+{
+	struct list_head *list = chunk_hash(chunk->watch.inode);
+	list_add_rcu(&chunk->hash, list);
+}
+
+/* called under rcu_read_lock */
+struct audit_chunk *audit_tree_lookup(const struct inode *inode)
+{
+	struct list_head *list = chunk_hash(inode);
+	struct audit_chunk *p;
+
+	list_for_each_entry_rcu(p, list, hash) {
+		if (p->watch.inode == inode) {
+			atomic_long_inc(&p->refs);
+			return p;
+		}
+	}
+	return NULL;
+}
+
+int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
+{
+	int n;
+	for (n = 0; n < chunk->count; n++)
+		if (chunk->owners[n].owner == tree)
+			return 1;
+	return 0;
+}
+
+/* tagging and untagging inodes with trees */
+
+static struct audit_chunk *find_chunk(struct node *p)
+{
+	int index = p->index & ~(1U<<31);
+	p -= index;
+	return container_of(p, struct audit_chunk, owners[0]);
+}
+
+static void untag_chunk(struct node *p)
+{
+	struct audit_chunk *chunk = find_chunk(p);
+	struct audit_chunk *new;
+	struct audit_tree *owner;
+	int size = chunk->count - 1;
+	int i, j;
+
+	if (!pin_inotify_watch(&chunk->watch)) {
+		/*
+		 * Filesystem is shutting down; all watches are getting
+		 * evicted, just take it off the node list for this
+		 * tree and let the eviction logics take care of the
+		 * rest.
+		 */
+		owner = p->owner;
+		if (owner->root == chunk) {
+			list_del_init(&owner->same_root);
+			owner->root = NULL;
+		}
+		list_del_init(&p->list);
+		p->owner = NULL;
+		put_tree(owner);
+		return;
+	}
+
+	spin_unlock(&hash_lock);
+
+	/*
+	 * pin_inotify_watch() succeeded, so the watch won't go away
+	 * from under us.
+	 */
+	mutex_lock(&chunk->watch.inode->inotify_mutex);
+	if (chunk->dead) {
+		mutex_unlock(&chunk->watch.inode->inotify_mutex);
+		goto out;
+	}
+
+	owner = p->owner;
+
+	if (!size) {
+		chunk->dead = 1;
+		spin_lock(&hash_lock);
+		list_del_init(&chunk->trees);
+		if (owner->root == chunk)
+			owner->root = NULL;
+		list_del_init(&p->list);
+		list_del_rcu(&chunk->hash);
+		spin_unlock(&hash_lock);
+		inotify_evict_watch(&chunk->watch);
+		mutex_unlock(&chunk->watch.inode->inotify_mutex);
+		put_inotify_watch(&chunk->watch);
+		goto out;
+	}
+
+	new = alloc_chunk(size);
+	if (!new)
+		goto Fallback;
+	if (inotify_clone_watch(&chunk->watch, &new->watch) < 0) {
+		free_chunk(new);
+		goto Fallback;
+	}
+
+	chunk->dead = 1;
+	spin_lock(&hash_lock);
+	list_replace_init(&chunk->trees, &new->trees);
+	if (owner->root == chunk) {
+		list_del_init(&owner->same_root);
+		owner->root = NULL;
+	}
+
+	for (i = j = 0; j <= size; i++, j++) {
+		struct audit_tree *s;
+		if (&chunk->owners[j] == p) {
+			list_del_init(&p->list);
+			i--;
+			continue;
+		}
+		s = chunk->owners[j].owner;
+		new->owners[i].owner = s;
+		new->owners[i].index = chunk->owners[j].index - j + i;
+		if (!s) /* result of earlier fallback */
+			continue;
+		get_tree(s);
+		list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
+	}
+
+	list_replace_rcu(&chunk->hash, &new->hash);
+	list_for_each_entry(owner, &new->trees, same_root)
+		owner->root = new;
+	spin_unlock(&hash_lock);
+	inotify_evict_watch(&chunk->watch);
+	mutex_unlock(&chunk->watch.inode->inotify_mutex);
+	put_inotify_watch(&chunk->watch);
+	goto out;
+
+Fallback:
+	// do the best we can
+	spin_lock(&hash_lock);
+	if (owner->root == chunk) {
+		list_del_init(&owner->same_root);
+		owner->root = NULL;
+	}
+	list_del_init(&p->list);
+	p->owner = NULL;
+	put_tree(owner);
+	spin_unlock(&hash_lock);
+	mutex_unlock(&chunk->watch.inode->inotify_mutex);
+out:
+	unpin_inotify_watch(&chunk->watch);
+	spin_lock(&hash_lock);
+}
+
+static int create_chunk(struct inode *inode, struct audit_tree *tree)
+{
+	struct audit_chunk *chunk = alloc_chunk(1);
+	if (!chunk)
+		return -ENOMEM;
+
+	if (inotify_add_watch(rtree_ih, &chunk->watch, inode, IN_IGNORED | IN_DELETE_SELF) < 0) {
+		free_chunk(chunk);
+		return -ENOSPC;
+	}
+
+	mutex_lock(&inode->inotify_mutex);
+	spin_lock(&hash_lock);
+	if (tree->goner) {
+		spin_unlock(&hash_lock);
+		chunk->dead = 1;
+		inotify_evict_watch(&chunk->watch);
+		mutex_unlock(&inode->inotify_mutex);
+		put_inotify_watch(&chunk->watch);
+		return 0;
+	}
+	chunk->owners[0].index = (1U << 31);
+	chunk->owners[0].owner = tree;
+	get_tree(tree);
+	list_add(&chunk->owners[0].list, &tree->chunks);
+	if (!tree->root) {
+		tree->root = chunk;
+		list_add(&tree->same_root, &chunk->trees);
+	}
+	insert_hash(chunk);
+	spin_unlock(&hash_lock);
+	mutex_unlock(&inode->inotify_mutex);
+	return 0;
+}
+
+/* the first tagged inode becomes root of tree */
+static int tag_chunk(struct inode *inode, struct audit_tree *tree)
+{
+	struct inotify_watch *watch;
+	struct audit_tree *owner;
+	struct audit_chunk *chunk, *old;
+	struct node *p;
+	int n;
+
+	if (inotify_find_watch(rtree_ih, inode, &watch) < 0)
+		return create_chunk(inode, tree);
+
+	old = container_of(watch, struct audit_chunk, watch);
+
+	/* are we already there? */
+	spin_lock(&hash_lock);
+	for (n = 0; n < old->count; n++) {
+		if (old->owners[n].owner == tree) {
+			spin_unlock(&hash_lock);
+			put_inotify_watch(&old->watch);
+			return 0;
+		}
+	}
+	spin_unlock(&hash_lock);
+
+	chunk = alloc_chunk(old->count + 1);
+	if (!chunk) {
+		put_inotify_watch(&old->watch);
+		return -ENOMEM;
+	}
+
+	mutex_lock(&inode->inotify_mutex);
+	if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
+		mutex_unlock(&inode->inotify_mutex);
+		put_inotify_watch(&old->watch);
+		free_chunk(chunk);
+		return -ENOSPC;
+	}
+	spin_lock(&hash_lock);
+	if (tree->goner) {
+		spin_unlock(&hash_lock);
+		chunk->dead = 1;
+		inotify_evict_watch(&chunk->watch);
+		mutex_unlock(&inode->inotify_mutex);
+		put_inotify_watch(&old->watch);
+		put_inotify_watch(&chunk->watch);
+		return 0;
+	}
+	list_replace_init(&old->trees, &chunk->trees);
+	for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
+		struct audit_tree *s = old->owners[n].owner;
+		p->owner = s;
+		p->index = old->owners[n].index;
+		if (!s) /* result of fallback in untag */
+			continue;
+		get_tree(s);
+		list_replace_init(&old->owners[n].list, &p->list);
+	}
+	p->index = (chunk->count - 1) | (1U<<31);
+	p->owner = tree;
+	get_tree(tree);
+	list_add(&p->list, &tree->chunks);
+	list_replace_rcu(&old->hash, &chunk->hash);
+	list_for_each_entry(owner, &chunk->trees, same_root)
+		owner->root = chunk;
+	old->dead = 1;
+	if (!tree->root) {
+		tree->root = chunk;
+		list_add(&tree->same_root, &chunk->trees);
+	}
+	spin_unlock(&hash_lock);
+	inotify_evict_watch(&old->watch);
+	mutex_unlock(&inode->inotify_mutex);
+	put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
+	put_inotify_watch(&old->watch); /* and kill it */
+	return 0;
+}
+
+static void kill_rules(struct audit_tree *tree)
+{
+	struct audit_krule *rule, *next;
+	struct audit_entry *entry;
+	struct audit_buffer *ab;
+
+	list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
+		entry = container_of(rule, struct audit_entry, rule);
+
+		list_del_init(&rule->rlist);
+		if (rule->tree) {
+			/* not a half-baked one */
+			ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+			audit_log_format(ab, "op=");
+			audit_log_string(ab, "remove rule");
+			audit_log_format(ab, " dir=");
+			audit_log_untrustedstring(ab, rule->tree->pathname);
+			audit_log_key(ab, rule->filterkey);
+			audit_log_format(ab, " list=%d res=1", rule->listnr);
+			audit_log_end(ab);
+			rule->tree = NULL;
+			list_del_rcu(&entry->list);
+			list_del(&entry->rule.list);
+			call_rcu(&entry->rcu, audit_free_rule_rcu);
+		}
+	}
+}
+
+/*
+ * finish killing struct audit_tree
+ */
+static void prune_one(struct audit_tree *victim)
+{
+	spin_lock(&hash_lock);
+	while (!list_empty(&victim->chunks)) {
+		struct node *p;
+
+		p = list_entry(victim->chunks.next, struct node, list);
+
+		untag_chunk(p);
+	}
+	spin_unlock(&hash_lock);
+	put_tree(victim);
+}
+
+/* trim the uncommitted chunks from tree */
+
+static void trim_marked(struct audit_tree *tree)
+{
+	struct list_head *p, *q;
+	spin_lock(&hash_lock);
+	if (tree->goner) {
+		spin_unlock(&hash_lock);
+		return;
+	}
+	/* reorder */
+	for (p = tree->chunks.next; p != &tree->chunks; p = q) {
+		struct node *node = list_entry(p, struct node, list);
+		q = p->next;
+		if (node->index & (1U<<31)) {
+			list_del_init(p);
+			list_add(p, &tree->chunks);
+		}
+	}
+
+	while (!list_empty(&tree->chunks)) {
+		struct node *node;
+
+		node = list_entry(tree->chunks.next, struct node, list);
+
+		/* have we run out of marked? */
+		if (!(node->index & (1U<<31)))
+			break;
+
+		untag_chunk(node);
+	}
+	if (!tree->root && !tree->goner) {
+		tree->goner = 1;
+		spin_unlock(&hash_lock);
+		mutex_lock(&audit_filter_mutex);
+		kill_rules(tree);
+		list_del_init(&tree->list);
+		mutex_unlock(&audit_filter_mutex);
+		prune_one(tree);
+	} else {
+		spin_unlock(&hash_lock);
+	}
+}
+
+static void audit_schedule_prune(void);
+
+/* called with audit_filter_mutex */
+int audit_remove_tree_rule(struct audit_krule *rule)
+{
+	struct audit_tree *tree;
+	tree = rule->tree;
+	if (tree) {
+		spin_lock(&hash_lock);
+		list_del_init(&rule->rlist);
+		if (list_empty(&tree->rules) && !tree->goner) {
+			tree->root = NULL;
+			list_del_init(&tree->same_root);
+			tree->goner = 1;
+			list_move(&tree->list, &prune_list);
+			rule->tree = NULL;
+			spin_unlock(&hash_lock);
+			audit_schedule_prune();
+			return 1;
+		}
+		rule->tree = NULL;
+		spin_unlock(&hash_lock);
+		return 1;
+	}
+	return 0;
+}
+
+void audit_trim_trees(void)
+{
+	struct list_head cursor;
+
+	mutex_lock(&audit_filter_mutex);
+	list_add(&cursor, &tree_list);
+	while (cursor.next != &tree_list) {
+		struct audit_tree *tree;
+		struct path path;
+		struct vfsmount *root_mnt;
+		struct node *node;
+		struct list_head list;
+		int err;
+
+		tree = container_of(cursor.next, struct audit_tree, list);
+		get_tree(tree);
+		list_del(&cursor);
+		list_add(&cursor, &tree->list);
+		mutex_unlock(&audit_filter_mutex);
+
+		err = kern_path(tree->pathname, 0, &path);
+		if (err)
+			goto skip_it;
+
+		root_mnt = collect_mounts(&path);
+		path_put(&path);
+		if (!root_mnt)
+			goto skip_it;
+
+		list_add_tail(&list, &root_mnt->mnt_list);
+		spin_lock(&hash_lock);
+		list_for_each_entry(node, &tree->chunks, list) {
+			struct audit_chunk *chunk = find_chunk(node);
+			struct inode *inode = chunk->watch.inode;
+			struct vfsmount *mnt;
+			node->index |= 1U<<31;
+			list_for_each_entry(mnt, &list, mnt_list) {
+				if (mnt->mnt_root->d_inode == inode) {
+					node->index &= ~(1U<<31);
+					break;
+				}
+			}
+		}
+		spin_unlock(&hash_lock);
+		trim_marked(tree);
+		put_tree(tree);
+		list_del_init(&list);
+		drop_collected_mounts(root_mnt);
+skip_it:
+		mutex_lock(&audit_filter_mutex);
+	}
+	list_del(&cursor);
+	mutex_unlock(&audit_filter_mutex);
+}
+
+static int is_under(struct vfsmount *mnt, struct dentry *dentry,
+		    struct path *path)
+{
+	if (mnt != path->mnt) {
+		for (;;) {
+			if (mnt->mnt_parent == mnt)
+				return 0;
+			if (mnt->mnt_parent == path->mnt)
+					break;
+			mnt = mnt->mnt_parent;
+		}
+		dentry = mnt->mnt_mountpoint;
+	}
+	return is_subdir(dentry, path->dentry);
+}
+
+int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
+{
+
+	if (pathname[0] != '/' ||
+	    rule->listnr != AUDIT_FILTER_EXIT ||
+	    op != Audit_equal ||
+	    rule->inode_f || rule->watch || rule->tree)
+		return -EINVAL;
+	rule->tree = alloc_tree(pathname);
+	if (!rule->tree)
+		return -ENOMEM;
+	return 0;
+}
+
+void audit_put_tree(struct audit_tree *tree)
+{
+	put_tree(tree);
+}
+
+/* called with audit_filter_mutex */
+int audit_add_tree_rule(struct audit_krule *rule)
+{
+	struct audit_tree *seed = rule->tree, *tree;
+	struct path path;
+	struct vfsmount *mnt, *p;
+	struct list_head list;
+	int err;
+
+	list_for_each_entry(tree, &tree_list, list) {
+		if (!strcmp(seed->pathname, tree->pathname)) {
+			put_tree(seed);
+			rule->tree = tree;
+			list_add(&rule->rlist, &tree->rules);
+			return 0;
+		}
+	}
+	tree = seed;
+	list_add(&tree->list, &tree_list);
+	list_add(&rule->rlist, &tree->rules);
+	/* do not set rule->tree yet */
+	mutex_unlock(&audit_filter_mutex);
+
+	err = kern_path(tree->pathname, 0, &path);
+	if (err)
+		goto Err;
+	mnt = collect_mounts(&path);
+	path_put(&path);
+	if (!mnt) {
+		err = -ENOMEM;
+		goto Err;
+	}
+	list_add_tail(&list, &mnt->mnt_list);
+
+	get_tree(tree);
+	list_for_each_entry(p, &list, mnt_list) {
+		err = tag_chunk(p->mnt_root->d_inode, tree);
+		if (err)
+			break;
+	}
+
+	list_del(&list);
+	drop_collected_mounts(mnt);
+
+	if (!err) {
+		struct node *node;
+		spin_lock(&hash_lock);
+		list_for_each_entry(node, &tree->chunks, list)
+			node->index &= ~(1U<<31);
+		spin_unlock(&hash_lock);
+	} else {
+		trim_marked(tree);
+		goto Err;
+	}
+
+	mutex_lock(&audit_filter_mutex);
+	if (list_empty(&rule->rlist)) {
+		put_tree(tree);
+		return -ENOENT;
+	}
+	rule->tree = tree;
+	put_tree(tree);
+
+	return 0;
+Err:
+	mutex_lock(&audit_filter_mutex);
+	list_del_init(&tree->list);
+	list_del_init(&tree->rules);
+	put_tree(tree);
+	return err;
+}
+
+int audit_tag_tree(char *old, char *new)
+{
+	struct list_head cursor, barrier;
+	int failed = 0;
+	struct path path;
+	struct vfsmount *tagged;
+	struct list_head list;
+	struct vfsmount *mnt;
+	struct dentry *dentry;
+	int err;
+
+	err = kern_path(new, 0, &path);
+	if (err)
+		return err;
+	tagged = collect_mounts(&path);
+	path_put(&path);
+	if (!tagged)
+		return -ENOMEM;
+
+	err = kern_path(old, 0, &path);
+	if (err) {
+		drop_collected_mounts(tagged);
+		return err;
+	}
+	mnt = mntget(path.mnt);
+	dentry = dget(path.dentry);
+	path_put(&path);
+
+	list_add_tail(&list, &tagged->mnt_list);
+
+	mutex_lock(&audit_filter_mutex);
+	list_add(&barrier, &tree_list);
+	list_add(&cursor, &barrier);
+
+	while (cursor.next != &tree_list) {
+		struct audit_tree *tree;
+		struct vfsmount *p;
+
+		tree = container_of(cursor.next, struct audit_tree, list);
+		get_tree(tree);
+		list_del(&cursor);
+		list_add(&cursor, &tree->list);
+		mutex_unlock(&audit_filter_mutex);
+
+		err = kern_path(tree->pathname, 0, &path);
+		if (err) {
+			put_tree(tree);
+			mutex_lock(&audit_filter_mutex);
+			continue;
+		}
+
+		spin_lock(&vfsmount_lock);
+		if (!is_under(mnt, dentry, &path)) {
+			spin_unlock(&vfsmount_lock);
+			path_put(&path);
+			put_tree(tree);
+			mutex_lock(&audit_filter_mutex);
+			continue;
+		}
+		spin_unlock(&vfsmount_lock);
+		path_put(&path);
+
+		list_for_each_entry(p, &list, mnt_list) {
+			failed = tag_chunk(p->mnt_root->d_inode, tree);
+			if (failed)
+				break;
+		}
+
+		if (failed) {
+			put_tree(tree);
+			mutex_lock(&audit_filter_mutex);
+			break;
+		}
+
+		mutex_lock(&audit_filter_mutex);
+		spin_lock(&hash_lock);
+		if (!tree->goner) {
+			list_del(&tree->list);
+			list_add(&tree->list, &tree_list);
+		}
+		spin_unlock(&hash_lock);
+		put_tree(tree);
+	}
+
+	while (barrier.prev != &tree_list) {
+		struct audit_tree *tree;
+
+		tree = container_of(barrier.prev, struct audit_tree, list);
+		get_tree(tree);
+		list_del(&tree->list);
+		list_add(&tree->list, &barrier);
+		mutex_unlock(&audit_filter_mutex);
+
+		if (!failed) {
+			struct node *node;
+			spin_lock(&hash_lock);
+			list_for_each_entry(node, &tree->chunks, list)
+				node->index &= ~(1U<<31);
+			spin_unlock(&hash_lock);
+		} else {
+			trim_marked(tree);
+		}
+
+		put_tree(tree);
+		mutex_lock(&audit_filter_mutex);
+	}
+	list_del(&barrier);
+	list_del(&cursor);
+	list_del(&list);
+	mutex_unlock(&audit_filter_mutex);
+	dput(dentry);
+	mntput(mnt);
+	drop_collected_mounts(tagged);
+	return failed;
+}
+
+/*
+ * That gets run when evict_chunk() ends up needing to kill audit_tree.
+ * Runs from a separate thread.
+ */
+static int prune_tree_thread(void *unused)
+{
+	mutex_lock(&audit_cmd_mutex);
+	mutex_lock(&audit_filter_mutex);
+
+	while (!list_empty(&prune_list)) {
+		struct audit_tree *victim;
+
+		victim = list_entry(prune_list.next, struct audit_tree, list);
+		list_del_init(&victim->list);
+
+		mutex_unlock(&audit_filter_mutex);
+
+		prune_one(victim);
+
+		mutex_lock(&audit_filter_mutex);
+	}
+
+	mutex_unlock(&audit_filter_mutex);
+	mutex_unlock(&audit_cmd_mutex);
+	return 0;
+}
+
+static void audit_schedule_prune(void)
+{
+	kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+}
+
+/*
+ * ... and that one is done if evict_chunk() decides to delay until the end
+ * of syscall.  Runs synchronously.
+ */
+void audit_kill_trees(struct list_head *list)
+{
+	mutex_lock(&audit_cmd_mutex);
+	mutex_lock(&audit_filter_mutex);
+
+	while (!list_empty(list)) {
+		struct audit_tree *victim;
+
+		victim = list_entry(list->next, struct audit_tree, list);
+		kill_rules(victim);
+		list_del_init(&victim->list);
+
+		mutex_unlock(&audit_filter_mutex);
+
+		prune_one(victim);
+
+		mutex_lock(&audit_filter_mutex);
+	}
+
+	mutex_unlock(&audit_filter_mutex);
+	mutex_unlock(&audit_cmd_mutex);
+}
+
+/*
+ *  Here comes the stuff asynchronous to auditctl operations
+ */
+
+/* inode->inotify_mutex is locked */
+static void evict_chunk(struct audit_chunk *chunk)
+{
+	struct audit_tree *owner;
+	struct list_head *postponed = audit_killed_trees();
+	int need_prune = 0;
+	int n;
+
+	if (chunk->dead)
+		return;
+
+	chunk->dead = 1;
+	mutex_lock(&audit_filter_mutex);
+	spin_lock(&hash_lock);
+	while (!list_empty(&chunk->trees)) {
+		owner = list_entry(chunk->trees.next,
+				   struct audit_tree, same_root);
+		owner->goner = 1;
+		owner->root = NULL;
+		list_del_init(&owner->same_root);
+		spin_unlock(&hash_lock);
+		if (!postponed) {
+			kill_rules(owner);
+			list_move(&owner->list, &prune_list);
+			need_prune = 1;
+		} else {
+			list_move(&owner->list, postponed);
+		}
+		spin_lock(&hash_lock);
+	}
+	list_del_rcu(&chunk->hash);
+	for (n = 0; n < chunk->count; n++)
+		list_del_init(&chunk->owners[n].list);
+	spin_unlock(&hash_lock);
+	if (need_prune)
+		audit_schedule_prune();
+	mutex_unlock(&audit_filter_mutex);
+}
+
+static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
+                         u32 cookie, const char *dname, struct inode *inode)
+{
+	struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
+
+	if (mask & IN_IGNORED) {
+		evict_chunk(chunk);
+		put_inotify_watch(watch);
+	}
+}
+
+static void destroy_watch(struct inotify_watch *watch)
+{
+	struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
+	call_rcu(&chunk->head, __put_chunk);
+}
+
+static const struct inotify_operations rtree_inotify_ops = {
+	.handle_event	= handle_event,
+	.destroy_watch	= destroy_watch,
+};
+
+static int __init audit_tree_init(void)
+{
+	int i;
+
+	rtree_ih = inotify_init(&rtree_inotify_ops);
+	if (IS_ERR(rtree_ih))
+		audit_panic("cannot initialize inotify handle for rectree watches");
+
+	for (i = 0; i < HASH_SIZE; i++)
+		INIT_LIST_HEAD(&chunk_hash_heads[i]);
+
+	return 0;
+}
+__initcall(audit_tree_init);
--- a/kernel/kernel/audit_watch.c
+++ b/kernel/kernel/audit_watch.c
@@ -0,0 +1,543 @@
+/* audit_watch.c -- watching inodes
+ *
+ * Copyright 2003-2009 Red Hat, Inc.
+ * Copyright 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright 2005 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/audit.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/netlink.h>
+#include <linux/sched.h>
+#include <linux/inotify.h>
+#include <linux/security.h>
+#include "audit.h"
+
+/*
+ * Reference counting:
+ *
+ * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
+ * 	event.  Each audit_watch holds a reference to its associated parent.
+ *
+ * audit_watch: if added to lists, lifetime is from audit_init_watch() to
+ * 	audit_remove_watch().  Additionally, an audit_watch may exist
+ * 	temporarily to assist in searching existing filter data.  Each
+ * 	audit_krule holds a reference to its associated watch.
+ */
+
+struct audit_watch {
+	atomic_t		count;	/* reference count */
+	dev_t			dev;	/* associated superblock device */
+	char			*path;	/* insertion path */
+	unsigned long		ino;	/* associated inode number */
+	struct audit_parent	*parent; /* associated parent */
+	struct list_head	wlist;	/* entry in parent->watches list */
+	struct list_head	rules;	/* associated rules */
+};
+
+struct audit_parent {
+	struct list_head	ilist;	/* entry in inotify registration list */
+	struct list_head	watches; /* associated watches */
+	struct inotify_watch	wdata;	/* inotify watch data */
+	unsigned		flags;	/* status flags */
+};
+
+/* Inotify handle. */
+struct inotify_handle *audit_ih;
+
+/*
+ * audit_parent status flags:
+ *
+ * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
+ * a filesystem event to ensure we're adding audit watches to a valid parent.
+ * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
+ * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
+ * we can receive while holding nameidata.
+ */
+#define AUDIT_PARENT_INVALID	0x001
+
+/* Inotify events we care about. */
+#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
+
+static void audit_free_parent(struct inotify_watch *i_watch)
+{
+	struct audit_parent *parent;
+
+	parent = container_of(i_watch, struct audit_parent, wdata);
+	WARN_ON(!list_empty(&parent->watches));
+	kfree(parent);
+}
+
+void audit_get_watch(struct audit_watch *watch)
+{
+	atomic_inc(&watch->count);
+}
+
+void audit_put_watch(struct audit_watch *watch)
+{
+	if (atomic_dec_and_test(&watch->count)) {
+		WARN_ON(watch->parent);
+		WARN_ON(!list_empty(&watch->rules));
+		kfree(watch->path);
+		kfree(watch);
+	}
+}
+
+void audit_remove_watch(struct audit_watch *watch)
+{
+	list_del(&watch->wlist);
+	put_inotify_watch(&watch->parent->wdata);
+	watch->parent = NULL;
+	audit_put_watch(watch); /* match initial get */
+}
+
+char *audit_watch_path(struct audit_watch *watch)
+{
+	return watch->path;
+}
+
+struct list_head *audit_watch_rules(struct audit_watch *watch)
+{
+	return &watch->rules;
+}
+
+unsigned long audit_watch_inode(struct audit_watch *watch)
+{
+	return watch->ino;
+}
+
+dev_t audit_watch_dev(struct audit_watch *watch)
+{
+	return watch->dev;
+}
+
+/* Initialize a parent watch entry. */
+static struct audit_parent *audit_init_parent(struct nameidata *ndp)
+{
+	struct audit_parent *parent;
+	s32 wd;
+
+	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+	if (unlikely(!parent))
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&parent->watches);
+	parent->flags = 0;
+
+	inotify_init_watch(&parent->wdata);
+	/* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
+	get_inotify_watch(&parent->wdata);
+	wd = inotify_add_watch(audit_ih, &parent->wdata,
+			       ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
+	if (wd < 0) {
+		audit_free_parent(&parent->wdata);
+		return ERR_PTR(wd);
+	}
+
+	return parent;
+}
+
+/* Initialize a watch entry. */
+static struct audit_watch *audit_init_watch(char *path)
+{
+	struct audit_watch *watch;
+
+	watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+	if (unlikely(!watch))
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&watch->rules);
+	atomic_set(&watch->count, 1);
+	watch->path = path;
+	watch->dev = (dev_t)-1;
+	watch->ino = (unsigned long)-1;
+
+	return watch;
+}
+
+/* Translate a watch string to kernel respresentation. */
+int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op)
+{
+	struct audit_watch *watch;
+
+	if (!audit_ih)
+		return -EOPNOTSUPP;
+
+	if (path[0] != '/' || path[len-1] == '/' ||
+	    krule->listnr != AUDIT_FILTER_EXIT ||
+	    op != Audit_equal ||
+	    krule->inode_f || krule->watch || krule->tree)
+		return -EINVAL;
+
+	watch = audit_init_watch(path);
+	if (IS_ERR(watch))
+		return PTR_ERR(watch);
+
+	audit_get_watch(watch);
+	krule->watch = watch;
+
+	return 0;
+}
+
+/* Duplicate the given audit watch.  The new watch's rules list is initialized
+ * to an empty list and wlist is undefined. */
+static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
+{
+	char *path;
+	struct audit_watch *new;
+
+	path = kstrdup(old->path, GFP_KERNEL);
+	if (unlikely(!path))
+		return ERR_PTR(-ENOMEM);
+
+	new = audit_init_watch(path);
+	if (IS_ERR(new)) {
+		kfree(path);
+		goto out;
+	}
+
+	new->dev = old->dev;
+	new->ino = old->ino;
+	get_inotify_watch(&old->parent->wdata);
+	new->parent = old->parent;
+
+out:
+	return new;
+}
+
+static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watch *w, char *op)
+{
+	if (audit_enabled) {
+		struct audit_buffer *ab;
+		ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+		audit_log_format(ab, "auid=%u ses=%u op=",
+				 audit_get_loginuid(current),
+				 audit_get_sessionid(current));
+		audit_log_string(ab, op);
+		audit_log_format(ab, " path=");
+		audit_log_untrustedstring(ab, w->path);
+		audit_log_key(ab, r->filterkey);
+		audit_log_format(ab, " list=%d res=1", r->listnr);
+		audit_log_end(ab);
+	}
+}
+
+/* Update inode info in audit rules based on filesystem event. */
+static void audit_update_watch(struct audit_parent *parent,
+			       const char *dname, dev_t dev,
+			       unsigned long ino, unsigned invalidating)
+{
+	struct audit_watch *owatch, *nwatch, *nextw;
+	struct audit_krule *r, *nextr;
+	struct audit_entry *oentry, *nentry;
+
+	mutex_lock(&audit_filter_mutex);
+	list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
+		if (audit_compare_dname_path(dname, owatch->path, NULL))
+			continue;
+
+		/* If the update involves invalidating rules, do the inode-based
+		 * filtering now, so we don't omit records. */
+		if (invalidating && current->audit_context)
+			audit_filter_inodes(current, current->audit_context);
+
+		nwatch = audit_dupe_watch(owatch);
+		if (IS_ERR(nwatch)) {
+			mutex_unlock(&audit_filter_mutex);
+			audit_panic("error updating watch, skipping");
+			return;
+		}
+		nwatch->dev = dev;
+		nwatch->ino = ino;
+
+		list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
+
+			oentry = container_of(r, struct audit_entry, rule);
+			list_del(&oentry->rule.rlist);
+			list_del_rcu(&oentry->list);
+
+			nentry = audit_dupe_rule(&oentry->rule, nwatch);
+			if (IS_ERR(nentry)) {
+				list_del(&oentry->rule.list);
+				audit_panic("error updating watch, removing");
+			} else {
+				int h = audit_hash_ino((u32)ino);
+				list_add(&nentry->rule.rlist, &nwatch->rules);
+				list_add_rcu(&nentry->list, &audit_inode_hash[h]);
+				list_replace(&oentry->rule.list,
+					     &nentry->rule.list);
+			}
+
+			audit_watch_log_rule_change(r, owatch, "updated rules");
+
+			call_rcu(&oentry->rcu, audit_free_rule_rcu);
+		}
+
+		audit_remove_watch(owatch);
+		goto add_watch_to_parent; /* event applies to a single watch */
+	}
+	mutex_unlock(&audit_filter_mutex);
+	return;
+
+add_watch_to_parent:
+	list_add(&nwatch->wlist, &parent->watches);
+	mutex_unlock(&audit_filter_mutex);
+	return;
+}
+
+/* Remove all watches & rules associated with a parent that is going away. */
+static void audit_remove_parent_watches(struct audit_parent *parent)
+{
+	struct audit_watch *w, *nextw;
+	struct audit_krule *r, *nextr;
+	struct audit_entry *e;
+
+	mutex_lock(&audit_filter_mutex);
+	parent->flags |= AUDIT_PARENT_INVALID;
+	list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
+		list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
+			e = container_of(r, struct audit_entry, rule);
+			audit_watch_log_rule_change(r, w, "remove rule");
+			list_del(&r->rlist);
+			list_del(&r->list);
+			list_del_rcu(&e->list);
+			call_rcu(&e->rcu, audit_free_rule_rcu);
+		}
+		audit_remove_watch(w);
+	}
+	mutex_unlock(&audit_filter_mutex);
+}
+
+/* Unregister inotify watches for parents on in_list.
+ * Generates an IN_IGNORED event. */
+void audit_inotify_unregister(struct list_head *in_list)
+{
+	struct audit_parent *p, *n;
+
+	list_for_each_entry_safe(p, n, in_list, ilist) {
+		list_del(&p->ilist);
+		inotify_rm_watch(audit_ih, &p->wdata);
+		/* the unpin matching the pin in audit_do_del_rule() */
+		unpin_inotify_watch(&p->wdata);
+	}
+}
+
+/* Get path information necessary for adding watches. */
+static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw)
+{
+	struct nameidata *ndparent, *ndwatch;
+	int err;
+
+	ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
+	if (unlikely(!ndparent))
+		return -ENOMEM;
+
+	ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
+	if (unlikely(!ndwatch)) {
+		kfree(ndparent);
+		return -ENOMEM;
+	}
+
+	err = path_lookup(path, LOOKUP_PARENT, ndparent);
+	if (err) {
+		kfree(ndparent);
+		kfree(ndwatch);
+		return err;
+	}
+
+	err = path_lookup(path, 0, ndwatch);
+	if (err) {
+		kfree(ndwatch);
+		ndwatch = NULL;
+	}
+
+	*ndp = ndparent;
+	*ndw = ndwatch;
+
+	return 0;
+}
+
+/* Release resources used for watch path information. */
+static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
+{
+	if (ndp) {
+		path_put(&ndp->path);
+		kfree(ndp);
+	}
+	if (ndw) {
+		path_put(&ndw->path);
+		kfree(ndw);
+	}
+}
+
+/* Associate the given rule with an existing parent inotify_watch.
+ * Caller must hold audit_filter_mutex. */
+static void audit_add_to_parent(struct audit_krule *krule,
+				struct audit_parent *parent)
+{
+	struct audit_watch *w, *watch = krule->watch;
+	int watch_found = 0;
+
+	list_for_each_entry(w, &parent->watches, wlist) {
+		if (strcmp(watch->path, w->path))
+			continue;
+
+		watch_found = 1;
+
+		/* put krule's and initial refs to temporary watch */
+		audit_put_watch(watch);
+		audit_put_watch(watch);
+
+		audit_get_watch(w);
+		krule->watch = watch = w;
+		break;
+	}
+
+	if (!watch_found) {
+		get_inotify_watch(&parent->wdata);
+		watch->parent = parent;
+
+		list_add(&watch->wlist, &parent->watches);
+	}
+	list_add(&krule->rlist, &watch->rules);
+}
+
+/* Find a matching watch entry, or add this one.
+ * Caller must hold audit_filter_mutex. */
+int audit_add_watch(struct audit_krule *krule)
+{
+	struct audit_watch *watch = krule->watch;
+	struct inotify_watch *i_watch;
+	struct audit_parent *parent;
+	struct nameidata *ndp = NULL, *ndw = NULL;
+	int ret = 0;
+
+	mutex_unlock(&audit_filter_mutex);
+
+	/* Avoid calling path_lookup under audit_filter_mutex. */
+	ret = audit_get_nd(watch->path, &ndp, &ndw);
+	if (ret) {
+		/* caller expects mutex locked */
+		mutex_lock(&audit_filter_mutex);
+		goto error;
+	}
+
+	/* update watch filter fields */
+	if (ndw) {
+		watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
+		watch->ino = ndw->path.dentry->d_inode->i_ino;
+	}
+
+	/* The audit_filter_mutex must not be held during inotify calls because
+	 * we hold it during inotify event callback processing.  If an existing
+	 * inotify watch is found, inotify_find_watch() grabs a reference before
+	 * returning.
+	 */
+	if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
+			       &i_watch) < 0) {
+		parent = audit_init_parent(ndp);
+		if (IS_ERR(parent)) {
+			/* caller expects mutex locked */
+			mutex_lock(&audit_filter_mutex);
+			ret = PTR_ERR(parent);
+			goto error;
+		}
+	} else
+		parent = container_of(i_watch, struct audit_parent, wdata);
+
+	mutex_lock(&audit_filter_mutex);
+
+	/* parent was moved before we took audit_filter_mutex */
+	if (parent->flags & AUDIT_PARENT_INVALID)
+		ret = -ENOENT;
+	else
+		audit_add_to_parent(krule, parent);
+
+	/* match get in audit_init_parent or inotify_find_watch */
+	put_inotify_watch(&parent->wdata);
+
+error:
+	audit_put_nd(ndp, ndw);		/* NULL args OK */
+	return ret;
+
+}
+
+void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list)
+{
+	struct audit_watch *watch = krule->watch;
+	struct audit_parent *parent = watch->parent;
+
+	list_del(&krule->rlist);
+
+	if (list_empty(&watch->rules)) {
+		audit_remove_watch(watch);
+
+		if (list_empty(&parent->watches)) {
+			/* Put parent on the inotify un-registration
+			 * list.  Grab a reference before releasing
+			 * audit_filter_mutex, to be released in
+			 * audit_inotify_unregister().
+			 * If filesystem is going away, just leave
+			 * the sucker alone, eviction will take
+			 * care of it. */
+			if (pin_inotify_watch(&parent->wdata))
+				list_add(&parent->ilist, list);
+		}
+	}
+}
+
+/* Update watch data in audit rules based on inotify events. */
+static void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
+			 u32 cookie, const char *dname, struct inode *inode)
+{
+	struct audit_parent *parent;
+
+	parent = container_of(i_watch, struct audit_parent, wdata);
+
+	if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
+		audit_update_watch(parent, dname, inode->i_sb->s_dev,
+				   inode->i_ino, 0);
+	else if (mask & (IN_DELETE|IN_MOVED_FROM))
+		audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
+	/* inotify automatically removes the watch and sends IN_IGNORED */
+	else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
+		audit_remove_parent_watches(parent);
+	/* inotify does not remove the watch, so remove it manually */
+	else if(mask & IN_MOVE_SELF) {
+		audit_remove_parent_watches(parent);
+		inotify_remove_watch_locked(audit_ih, i_watch);
+	} else if (mask & IN_IGNORED)
+		put_inotify_watch(i_watch);
+}
+
+static const struct inotify_operations audit_inotify_ops = {
+	.handle_event   = audit_handle_ievent,
+	.destroy_watch  = audit_free_parent,
+};
+
+static int __init audit_watch_init(void)
+{
+	audit_ih = inotify_init(&audit_inotify_ops);
+	if (IS_ERR(audit_ih))
+		audit_panic("cannot initialize inotify handle");
+	return 0;
+}
+subsys_initcall(audit_watch_init);
--- a/kernel/kernel/auditfilter.c
+++ b/kernel/kernel/auditfilter.c
--- a/kernel/kernel/auditsc.c
+++ b/kernel/kernel/auditsc.c
--- a/kernel/kernel/backtracetest.c
+++ b/kernel/kernel/backtracetest.c
@@ -0,0 +1,91 @@
+/*
+ * Simple stack backtrace regression test module
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+static void backtrace_test_normal(void)
+{
+	printk("Testing a backtrace from process context.\n");
+	printk("The following trace is a kernel self test and not a bug!\n");
+
+	dump_stack();
+}
+
+static DECLARE_COMPLETION(backtrace_work);
+
+static void backtrace_test_irq_callback(unsigned long data)
+{
+	dump_stack();
+	complete(&backtrace_work);
+}
+
+static DECLARE_TASKLET(backtrace_tasklet, &backtrace_test_irq_callback, 0);
+
+static void backtrace_test_irq(void)
+{
+	printk("Testing a backtrace from irq context.\n");
+	printk("The following trace is a kernel self test and not a bug!\n");
+
+	init_completion(&backtrace_work);
+	tasklet_schedule(&backtrace_tasklet);
+	wait_for_completion(&backtrace_work);
+}
+
+#ifdef CONFIG_STACKTRACE
+static void backtrace_test_saved(void)
+{
+	struct stack_trace trace;
+	unsigned long entries[8];
+
+	printk("Testing a saved backtrace.\n");
+	printk("The following trace is a kernel self test and not a bug!\n");
+
+	trace.nr_entries = 0;
+	trace.max_entries = ARRAY_SIZE(entries);
+	trace.entries = entries;
+	trace.skip = 0;
+
+	save_stack_trace(&trace);
+	print_stack_trace(&trace, 0);
+}
+#else
+static void backtrace_test_saved(void)
+{
+	printk("Saved backtrace test skipped.\n");
+}
+#endif
+
+static int backtrace_regression_test(void)
+{
+	printk("====[ backtrace testing ]===========\n");
+
+	backtrace_test_normal();
+	backtrace_test_irq();
+	backtrace_test_saved();
+
+	printk("====[ end of backtrace testing ]====\n");
+	return 0;
+}
+
+static void exitf(void)
+{
+}
+
+module_init(backtrace_regression_test);
+module_exit(exitf);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
--- a/kernel/kernel/bounds.c
+++ b/kernel/kernel/bounds.c
@@ -0,0 +1,19 @@
+/*
+ * Generate definitions needed by the preprocessor.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#define __GENERATING_BOUNDS_H
+/* Include headers that define the enum constants of interest */
+#include <linux/page-flags.h>
+#include <linux/mmzone.h>
+#include <linux/kbuild.h>
+
+void foo(void)
+{
+	/* The enum constants to put into include/linux/bounds.h */
+	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
+	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
+	/* End of constants */
+}
--- a/kernel/kernel/capability.c
+++ b/kernel/kernel/capability.c
@@ -0,0 +1,314 @@
+/*
+ * linux/kernel/capability.c
+ *
+ * Copyright (C) 1997  Andrew Main <zefram@fysh.org>
+ *
+ * Integrated into 2.1.97+,  Andrew G. Morgan <morgan@kernel.org>
+ * 30 May 2002:	Cleanup, Robert M. Love <rml@tech9.net>
+ */
+
+#include <linux/audit.h>
+#include <linux/capability.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/pid_namespace.h>
+#include <asm/uaccess.h>
+
+/*
+ * Leveraged for setting/resetting capabilities
+ */
+
+const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
+const kernel_cap_t __cap_full_set = CAP_FULL_SET;
+const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET;
+
+EXPORT_SYMBOL(__cap_empty_set);
+EXPORT_SYMBOL(__cap_full_set);
+EXPORT_SYMBOL(__cap_init_eff_set);
+
+#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
+int file_caps_enabled = 1;
+
+static int __init file_caps_disable(char *str)
+{
+	file_caps_enabled = 0;
+	return 1;
+}
+__setup("no_file_caps", file_caps_disable);
+#endif
+
+/*
+ * More recent versions of libcap are available from:
+ *
+ *   http://www.kernel.org/pub/linux/libs/security/linux-privs/
+ */
+
+static void warn_legacy_capability_use(void)
+{
+	static int warned;
+	if (!warned) {
+		char name[sizeof(current->comm)];
+
+		printk(KERN_INFO "warning: `%s' uses 32-bit capabilities"
+		       " (legacy support in use)\n",
+		       get_task_comm(name, current));
+		warned = 1;
+	}
+}
+
+/*
+ * Version 2 capabilities worked fine, but the linux/capability.h file
+ * that accompanied their introduction encouraged their use without
+ * the necessary user-space source code changes. As such, we have
+ * created a version 3 with equivalent functionality to version 2, but
+ * with a header change to protect legacy source code from using
+ * version 2 when it wanted to use version 1. If your system has code
+ * that trips the following warning, it is using version 2 specific
+ * capabilities and may be doing so insecurely.
+ *
+ * The remedy is to either upgrade your version of libcap (to 2.10+,
+ * if the application is linked against it), or recompile your
+ * application with modern kernel headers and this warning will go
+ * away.
+ */
+
+static void warn_deprecated_v2(void)
+{
+	static int warned;
+
+	if (!warned) {
+		char name[sizeof(current->comm)];
+
+		printk(KERN_INFO "warning: `%s' uses deprecated v2"
+		       " capabilities in a way that may be insecure.\n",
+		       get_task_comm(name, current));
+		warned = 1;
+	}
+}
+
+/*
+ * Version check. Return the number of u32s in each capability flag
+ * array, or a negative value on error.
+ */
+static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
+{
+	__u32 version;
+
+	if (get_user(version, &header->version))
+		return -EFAULT;
+
+	switch (version) {
+	case _LINUX_CAPABILITY_VERSION_1:
+		warn_legacy_capability_use();
+		*tocopy = _LINUX_CAPABILITY_U32S_1;
+		break;
+	case _LINUX_CAPABILITY_VERSION_2:
+		warn_deprecated_v2();
+		/*
+		 * fall through - v3 is otherwise equivalent to v2.
+		 */
+	case _LINUX_CAPABILITY_VERSION_3:
+		*tocopy = _LINUX_CAPABILITY_U32S_3;
+		break;
+	default:
+		if (put_user((u32)_KERNEL_CAPABILITY_VERSION, &header->version))
+			return -EFAULT;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * The only thing that can change the capabilities of the current
+ * process is the current process. As such, we can't be in this code
+ * at the same time as we are in the process of setting capabilities
+ * in this process. The net result is that we can limit our use of
+ * locks to when we are reading the caps of another process.
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+				     kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+	int ret;
+
+	if (pid && (pid != task_pid_vnr(current))) {
+		struct task_struct *target;
+
+		read_lock(&tasklist_lock);
+
+		target = find_task_by_vpid(pid);
+		if (!target)
+			ret = -ESRCH;
+		else
+			ret = security_capget(target, pEp, pIp, pPp);
+
+		read_unlock(&tasklist_lock);
+	} else
+		ret = security_capget(current, pEp, pIp, pPp);
+
+	return ret;
+}
+
+/**
+ * sys_capget - get the capabilities of a given process.
+ * @header: pointer to struct that contains capability version and
+ *	target pid data
+ * @dataptr: pointer to struct that contains the effective, permitted,
+ *	and inheritable capabilities that are returned
+ *
+ * Returns 0 on success and < 0 on error.
+ */
+SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
+{
+	int ret = 0;
+	pid_t pid;
+	unsigned tocopy;
+	kernel_cap_t pE, pI, pP;
+
+	ret = cap_validate_magic(header, &tocopy);
+	if (ret != 0)
+		return ret;
+
+	if (get_user(pid, &header->pid))
+		return -EFAULT;
+
+	if (pid < 0)
+		return -EINVAL;
+
+	ret = cap_get_target_pid(pid, &pE, &pI, &pP);
+	if (!ret) {
+		struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
+		unsigned i;
+
+		for (i = 0; i < tocopy; i++) {
+			kdata[i].effective = pE.cap[i];
+			kdata[i].permitted = pP.cap[i];
+			kdata[i].inheritable = pI.cap[i];
+		}
+
+		/*
+		 * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
+		 * we silently drop the upper capabilities here. This
+		 * has the effect of making older libcap
+		 * implementations implicitly drop upper capability
+		 * bits when they perform a: capget/modify/capset
+		 * sequence.
+		 *
+		 * This behavior is considered fail-safe
+		 * behavior. Upgrading the application to a newer
+		 * version of libcap will enable access to the newer
+		 * capabilities.
+		 *
+		 * An alternative would be to return an error here
+		 * (-ERANGE), but that causes legacy applications to
+		 * unexpectidly fail; the capget/modify/capset aborts
+		 * before modification is attempted and the application
+		 * fails.
+		 */
+		if (copy_to_user(dataptr, kdata, tocopy
+				 * sizeof(struct __user_cap_data_struct))) {
+			return -EFAULT;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * sys_capset - set capabilities for a process or (*) a group of processes
+ * @header: pointer to struct that contains capability version and
+ *	target pid data
+ * @data: pointer to struct that contains the effective, permitted,
+ *	and inheritable capabilities
+ *
+ * Set capabilities for the current process only.  The ability to any other
+ * process(es) has been deprecated and removed.
+ *
+ * The restrictions on setting capabilities are specified as:
+ *
+ * I: any raised capabilities must be a subset of the old permitted
+ * P: any raised capabilities must be a subset of the old permitted
+ * E: must be set to a subset of new permitted
+ *
+ * Returns 0 on success and < 0 on error.
+ */
+SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
+{
+	struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
+	unsigned i, tocopy;
+	kernel_cap_t inheritable, permitted, effective;
+	struct cred *new;
+	int ret;
+	pid_t pid;
+
+	ret = cap_validate_magic(header, &tocopy);
+	if (ret != 0)
+		return ret;
+
+	if (get_user(pid, &header->pid))
+		return -EFAULT;
+
+	/* may only affect current now */
+	if (pid != 0 && pid != task_pid_vnr(current))
+		return -EPERM;
+
+	if (copy_from_user(&kdata, data,
+			   tocopy * sizeof(struct __user_cap_data_struct)))
+		return -EFAULT;
+
+	for (i = 0; i < tocopy; i++) {
+		effective.cap[i] = kdata[i].effective;
+		permitted.cap[i] = kdata[i].permitted;
+		inheritable.cap[i] = kdata[i].inheritable;
+	}
+	while (i < _KERNEL_CAPABILITY_U32S) {
+		effective.cap[i] = 0;
+		permitted.cap[i] = 0;
+		inheritable.cap[i] = 0;
+		i++;
+	}
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = security_capset(new, current_cred(),
+			      &effective, &inheritable, &permitted);
+	if (ret < 0)
+		goto error;
+
+	audit_log_capset(pid, new, current_cred());
+
+	return commit_creds(new);
+
+error:
+	abort_creds(new);
+	return ret;
+}
+
+/**
+ * capable - Determine if the current task has a superior capability in effect
+ * @cap: The capability to be tested for
+ *
+ * Return true if the current task has the given superior capability currently
+ * available for use, false if not.
+ *
+ * This sets PF_SUPERPRIV on the task if the capability is available on the
+ * assumption that it's about to be used.
+ */
+int capable(int cap)
+{
+	if (unlikely(!cap_valid(cap))) {
+		printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
+		BUG();
+	}
+
+	if (security_capable(cap) == 0) {
+		current->flags |= PF_SUPERPRIV;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(capable);
--- a/kernel/kernel/cgroup.c
+++ b/kernel/kernel/cgroup.c
--- a/kernel/kernel/cgroup_freezer.c
+++ b/kernel/kernel/cgroup_freezer.c
@@ -0,0 +1,395 @@
+/*
+ * cgroup_freezer.c -  control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+
+enum freezer_state {
+	CGROUP_THAWED = 0,
+	CGROUP_FREEZING,
+	CGROUP_FROZEN,
+};
+
+struct freezer {
+	struct cgroup_subsys_state css;
+	enum freezer_state state;
+	spinlock_t lock; /* protects _writes_ to state */
+};
+
+static inline struct freezer *cgroup_freezer(
+		struct cgroup *cgroup)
+{
+	return container_of(
+		cgroup_subsys_state(cgroup, freezer_subsys_id),
+		struct freezer, css);
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, freezer_subsys_id),
+			    struct freezer, css);
+}
+
+int cgroup_freezing_or_frozen(struct task_struct *task)
+{
+	struct freezer *freezer;
+	enum freezer_state state;
+
+	task_lock(task);
+	freezer = task_freezer(task);
+	if (!freezer->css.cgroup->parent)
+		state = CGROUP_THAWED; /* root cgroup can't be frozen */
+	else
+		state = freezer->state;
+	task_unlock(task);
+
+	return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
+}
+
+/*
+ * cgroups_write_string() limits the size of freezer state strings to
+ * CGROUP_LOCAL_BUFFER_SIZE
+ */
+static const char *freezer_state_strs[] = {
+	"THAWED",
+	"FREEZING",
+	"FROZEN",
+};
+
+/*
+ * State diagram
+ * Transitions are caused by userspace writes to the freezer.state file.
+ * The values in parenthesis are state labels. The rest are edge labels.
+ *
+ * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
+ *    ^ ^                    |                     |
+ *    | \_______THAWED_______/                     |
+ *    \__________________________THAWED____________/
+ */
+
+struct cgroup_subsys freezer_subsys;
+
+/* Locks taken and their ordering
+ * ------------------------------
+ * css_set_lock
+ * cgroup_mutex (AKA cgroup_lock)
+ * task->alloc_lock (AKA task_lock)
+ * freezer->lock
+ * task->sighand->siglock
+ *
+ * cgroup code forces css_set_lock to be taken before task->alloc_lock
+ *
+ * freezer_create(), freezer_destroy():
+ * cgroup_mutex [ by cgroup core ]
+ *
+ * can_attach():
+ * cgroup_mutex
+ *
+ * cgroup_frozen():
+ * task->alloc_lock (to get task's cgroup)
+ *
+ * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
+ * task->alloc_lock (to get task's cgroup)
+ * freezer->lock
+ *  sighand->siglock (if the cgroup is freezing)
+ *
+ * freezer_read():
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *
+ * freezer_write() (freeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    sighand->siglock
+ *
+ * freezer_write() (unfreeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock (to prevent races with freeze_task())
+ *     sighand->siglock
+ */
+static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
+						  struct cgroup *cgroup)
+{
+	struct freezer *freezer;
+
+	freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+	if (!freezer)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&freezer->lock);
+	freezer->state = CGROUP_THAWED;
+	return &freezer->css;
+}
+
+static void freezer_destroy(struct cgroup_subsys *ss,
+			    struct cgroup *cgroup)
+{
+	kfree(cgroup_freezer(cgroup));
+}
+
+/* Task is frozen or will freeze immediately when next it gets woken */
+static bool is_task_frozen_enough(struct task_struct *task)
+{
+	return frozen(task) ||
+		(task_is_stopped_or_traced(task) && freezing(task));
+}
+
+/*
+ * The call to cgroup_lock() in the freezer.state write method prevents
+ * a write to that file racing against an attach, and hence the
+ * can_attach() result will remain valid until the attach completes.
+ */
+static int freezer_can_attach(struct cgroup_subsys *ss,
+			      struct cgroup *new_cgroup,
+			      struct task_struct *task, bool threadgroup)
+{
+	struct freezer *freezer;
+
+	/*
+	 * Anything frozen can't move or be moved to/from.
+	 *
+	 * Since orig_freezer->state == FROZEN means that @task has been
+	 * frozen, so it's sufficient to check the latter condition.
+	 */
+
+	if (is_task_frozen_enough(task))
+		return -EBUSY;
+
+	freezer = cgroup_freezer(new_cgroup);
+	if (freezer->state == CGROUP_FROZEN)
+		return -EBUSY;
+
+	if (threadgroup) {
+		struct task_struct *c;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+			if (is_task_frozen_enough(c)) {
+				rcu_read_unlock();
+				return -EBUSY;
+			}
+		}
+		rcu_read_unlock();
+	}
+
+	return 0;
+}
+
+static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+{
+	struct freezer *freezer;
+
+	/*
+	 * No lock is needed, since the task isn't on tasklist yet,
+	 * so it can't be moved to another cgroup, which means the
+	 * freezer won't be removed and will be valid during this
+	 * function call.
+	 */
+	freezer = task_freezer(task);
+
+	/*
+	 * The root cgroup is non-freezable, so we can skip the
+	 * following check.
+	 */
+	if (!freezer->css.cgroup->parent)
+		return;
+
+	spin_lock_irq(&freezer->lock);
+	BUG_ON(freezer->state == CGROUP_FROZEN);
+
+	/* Locking avoids race with FREEZING -> THAWED transitions. */
+	if (freezer->state == CGROUP_FREEZING)
+		freeze_task(task, true);
+	spin_unlock_irq(&freezer->lock);
+}
+
+/*
+ * caller must hold freezer->lock
+ */
+static void update_freezer_state(struct cgroup *cgroup,
+				 struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+	unsigned int nfrozen = 0, ntotal = 0;
+
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		ntotal++;
+		if (is_task_frozen_enough(task))
+			nfrozen++;
+	}
+
+	/*
+	 * Transition to FROZEN when no new tasks can be added ensures
+	 * that we never exist in the FROZEN state while there are unfrozen
+	 * tasks.
+	 */
+	if (nfrozen == ntotal)
+		freezer->state = CGROUP_FROZEN;
+	else if (nfrozen > 0)
+		freezer->state = CGROUP_FREEZING;
+	else
+		freezer->state = CGROUP_THAWED;
+	cgroup_iter_end(cgroup, &it);
+}
+
+static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+			struct seq_file *m)
+{
+	struct freezer *freezer;
+	enum freezer_state state;
+
+	if (!cgroup_lock_live_group(cgroup))
+		return -ENODEV;
+
+	freezer = cgroup_freezer(cgroup);
+	spin_lock_irq(&freezer->lock);
+	state = freezer->state;
+	if (state == CGROUP_FREEZING) {
+		/* We change from FREEZING to FROZEN lazily if the cgroup was
+		 * only partially frozen when we exitted write. */
+		update_freezer_state(cgroup, freezer);
+		state = freezer->state;
+	}
+	spin_unlock_irq(&freezer->lock);
+	cgroup_unlock();
+
+	seq_puts(m, freezer_state_strs[state]);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+	unsigned int num_cant_freeze_now = 0;
+
+	freezer->state = CGROUP_FREEZING;
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		if (!freeze_task(task, true))
+			continue;
+		if (is_task_frozen_enough(task))
+			continue;
+		if (!freezing(task) && !freezer_should_skip(task))
+			num_cant_freeze_now++;
+	}
+	cgroup_iter_end(cgroup, &it);
+
+	return num_cant_freeze_now ? -EBUSY : 0;
+}
+
+static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+	struct cgroup_iter it;
+	struct task_struct *task;
+
+	cgroup_iter_start(cgroup, &it);
+	while ((task = cgroup_iter_next(cgroup, &it))) {
+		thaw_process(task);
+	}
+	cgroup_iter_end(cgroup, &it);
+
+	freezer->state = CGROUP_THAWED;
+}
+
+static int freezer_change_state(struct cgroup *cgroup,
+				enum freezer_state goal_state)
+{
+	struct freezer *freezer;
+	int retval = 0;
+
+	freezer = cgroup_freezer(cgroup);
+
+	spin_lock_irq(&freezer->lock);
+
+	update_freezer_state(cgroup, freezer);
+	if (goal_state == freezer->state)
+		goto out;
+
+	switch (goal_state) {
+	case CGROUP_THAWED:
+		unfreeze_cgroup(cgroup, freezer);
+		break;
+	case CGROUP_FROZEN:
+		retval = try_to_freeze_cgroup(cgroup, freezer);
+		break;
+	default:
+		BUG();
+	}
+out:
+	spin_unlock_irq(&freezer->lock);
+
+	return retval;
+}
+
+static int freezer_write(struct cgroup *cgroup,
+			 struct cftype *cft,
+			 const char *buffer)
+{
+	int retval;
+	enum freezer_state goal_state;
+
+	if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
+		goal_state = CGROUP_THAWED;
+	else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
+		goal_state = CGROUP_FROZEN;
+	else
+		return -EINVAL;
+
+	if (!cgroup_lock_live_group(cgroup))
+		return -ENODEV;
+	retval = freezer_change_state(cgroup, goal_state);
+	cgroup_unlock();
+	return retval;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "state",
+		.read_seq_string = freezer_read,
+		.write_string = freezer_write,
+	},
+};
+
+static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
+{
+	if (!cgroup->parent)
+		return 0;
+	return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys freezer_subsys = {
+	.name		= "freezer",
+	.create		= freezer_create,
+	.destroy	= freezer_destroy,
+	.populate	= freezer_populate,
+	.subsys_id	= freezer_subsys_id,
+	.can_attach	= freezer_can_attach,
+	.attach		= NULL,
+	.fork		= freezer_fork,
+	.exit		= NULL,
+};
--- a/kernel/kernel/compat.c
+++ b/kernel/kernel/compat.c
--- a/kernel/kernel/configs.c
+++ b/kernel/kernel/configs.c
@@ -0,0 +1,98 @@
+/*
+ * kernel/configs.c
+ * Echo the kernel .config file used to build the kernel
+ *
+ * Copyright (C) 2002 Khalid Aziz <khalid_aziz@hp.com>
+ * Copyright (C) 2002 Randy Dunlap <rdunlap@xenotime.net>
+ * Copyright (C) 2002 Al Stone <ahs3@fc.hp.com>
+ * Copyright (C) 2002 Hewlett-Packard Company
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <asm/uaccess.h>
+
+/**************************************************/
+/* the actual current config file                 */
+
+/*
+ * Define kernel_config_data and kernel_config_data_size, which contains the
+ * wrapped and compressed configuration file.  The file is first compressed
+ * with gzip and then bounded by two eight byte magic numbers to allow
+ * extraction from a binary kernel image:
+ *
+ *   IKCFG_ST
+ *   <image>
+ *   IKCFG_ED
+ */
+#define MAGIC_START	"IKCFG_ST"
+#define MAGIC_END	"IKCFG_ED"
+#include "config_data.h"
+
+
+#define MAGIC_SIZE (sizeof(MAGIC_START) - 1)
+#define kernel_config_data_size \
+	(sizeof(kernel_config_data) - 1 - MAGIC_SIZE * 2)
+
+#ifdef CONFIG_IKCONFIG_PROC
+
+static ssize_t
+ikconfig_read_current(struct file *file, char __user *buf,
+		      size_t len, loff_t * offset)
+{
+	return simple_read_from_buffer(buf, len, offset,
+				       kernel_config_data + MAGIC_SIZE,
+				       kernel_config_data_size);
+}
+
+static const struct file_operations ikconfig_file_ops = {
+	.owner = THIS_MODULE,
+	.read = ikconfig_read_current,
+};
+
+static int __init ikconfig_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	/* create the current config file */
+	entry = proc_create("config.gz", S_IFREG | S_IRUGO, NULL,
+			    &ikconfig_file_ops);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->size = kernel_config_data_size;
+
+	return 0;
+}
+
+static void __exit ikconfig_cleanup(void)
+{
+	remove_proc_entry("config.gz", NULL);
+}
+
+module_init(ikconfig_init);
+module_exit(ikconfig_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Randy Dunlap");
+MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel");
+
+#endif /* CONFIG_IKCONFIG_PROC */
--- a/kernel/kernel/cpu.c
+++ b/kernel/kernel/cpu.c
@@ -0,0 +1,562 @@
+/* CPU control.
+ * (C) 2001, 2002, 2003, 2004 Rusty Russell
+ *
+ * This code is licenced under the GPL.
+ */
+#include <linux/proc_fs.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/sched.h>
+#include <linux/unistd.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/stop_machine.h>
+#include <linux/mutex.h>
+
+#ifdef CONFIG_SMP
+/* Serializes the updates to cpu_online_mask, cpu_present_mask */
+static DEFINE_MUTEX(cpu_add_remove_lock);
+
+static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
+
+/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
+static struct {
+	struct task_struct *active_writer;
+	struct mutex lock; /* Synchronizes accesses to refcount, */
+	/*
+	 * Also blocks the new readers during
+	 * an ongoing cpu hotplug operation.
+	 */
+	int refcount;
+} cpu_hotplug = {
+	.active_writer = NULL,
+	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
+	.refcount = 0,
+};
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void get_online_cpus(void)
+{
+	might_sleep();
+	if (cpu_hotplug.active_writer == current)
+		return;
+	mutex_lock(&cpu_hotplug.lock);
+	cpu_hotplug.refcount++;
+	mutex_unlock(&cpu_hotplug.lock);
+
+}
+EXPORT_SYMBOL_GPL(get_online_cpus);
+
+void put_online_cpus(void)
+{
+	if (cpu_hotplug.active_writer == current)
+		return;
+	mutex_lock(&cpu_hotplug.lock);
+	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
+		wake_up_process(cpu_hotplug.active_writer);
+	mutex_unlock(&cpu_hotplug.lock);
+
+}
+EXPORT_SYMBOL_GPL(put_online_cpus);
+
+#endif	/* CONFIG_HOTPLUG_CPU */
+
+/*
+ * The following two API's must be used when attempting
+ * to serialize the updates to cpu_online_mask, cpu_present_mask.
+ */
+void cpu_maps_update_begin(void)
+{
+	mutex_lock(&cpu_add_remove_lock);
+}
+
+void cpu_maps_update_done(void)
+{
+	mutex_unlock(&cpu_add_remove_lock);
+}
+
+/*
+ * This ensures that the hotplug operation can begin only when the
+ * refcount goes to zero.
+ *
+ * Note that during a cpu-hotplug operation, the new readers, if any,
+ * will be blocked by the cpu_hotplug.lock
+ *
+ * Since cpu_hotplug_begin() is always called after invoking
+ * cpu_maps_update_begin(), we can be sure that only one writer is active.
+ *
+ * Note that theoretically, there is a possibility of a livelock:
+ * - Refcount goes to zero, last reader wakes up the sleeping
+ *   writer.
+ * - Last reader unlocks the cpu_hotplug.lock.
+ * - A new reader arrives at this moment, bumps up the refcount.
+ * - The writer acquires the cpu_hotplug.lock finds the refcount
+ *   non zero and goes to sleep again.
+ *
+ * However, this is very difficult to achieve in practice since
+ * get_online_cpus() not an api which is called all that often.
+ *
+ */
+static void cpu_hotplug_begin(void)
+{
+	cpu_hotplug.active_writer = current;
+
+	for (;;) {
+		mutex_lock(&cpu_hotplug.lock);
+		if (likely(!cpu_hotplug.refcount))
+			break;
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+		mutex_unlock(&cpu_hotplug.lock);
+		schedule();
+	}
+}
+
+static void cpu_hotplug_done(void)
+{
+	cpu_hotplug.active_writer = NULL;
+	mutex_unlock(&cpu_hotplug.lock);
+}
+/* Need to know about CPUs going up/down? */
+int __ref register_cpu_notifier(struct notifier_block *nb)
+{
+	int ret;
+	cpu_maps_update_begin();
+	ret = raw_notifier_chain_register(&cpu_chain, nb);
+	cpu_maps_update_done();
+	return ret;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+EXPORT_SYMBOL(register_cpu_notifier);
+
+void __ref unregister_cpu_notifier(struct notifier_block *nb)
+{
+	cpu_maps_update_begin();
+	raw_notifier_chain_unregister(&cpu_chain, nb);
+	cpu_maps_update_done();
+}
+EXPORT_SYMBOL(unregister_cpu_notifier);
+
+static inline void check_for_tasks(int cpu)
+{
+	struct task_struct *p;
+
+	write_lock_irq(&tasklist_lock);
+	for_each_process(p) {
+		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
+		    (!cputime_eq(p->utime, cputime_zero) ||
+		     !cputime_eq(p->stime, cputime_zero)))
+			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+				(state = %ld, flags = %x) \n",
+				 p->comm, task_pid_nr(p), cpu,
+				 p->state, p->flags);
+	}
+	write_unlock_irq(&tasklist_lock);
+}
+
+struct take_cpu_down_param {
+	struct task_struct *caller;
+	unsigned long mod;
+	void *hcpu;
+};
+
+/* Take this CPU down. */
+static int __ref take_cpu_down(void *_param)
+{
+	struct take_cpu_down_param *param = _param;
+	unsigned int cpu = (unsigned long)param->hcpu;
+	int err;
+
+	/* Ensure this CPU doesn't handle any more interrupts. */
+	err = __cpu_disable();
+	if (err < 0)
+		return err;
+
+	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+				param->hcpu);
+
+	if (task_cpu(param->caller) == cpu)
+		move_task_off_dead_cpu(cpu, param->caller);
+	/* Force idle task to run as soon as we yield: it should
+	   immediately notice cpu is offline and die quickly. */
+	sched_idle_next();
+	return 0;
+}
+
+/* Requires cpu_add_remove_lock to be held */
+static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+{
+	int err, nr_calls = 0;
+	void *hcpu = (void *)(long)cpu;
+	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+	struct take_cpu_down_param tcd_param = {
+		.caller = current,
+		.mod = mod,
+		.hcpu = hcpu,
+	};
+
+	if (num_online_cpus() == 1)
+		return -EBUSY;
+
+	if (!cpu_online(cpu))
+		return -EINVAL;
+
+	cpu_hotplug_begin();
+	set_cpu_active(cpu, false);
+	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+					hcpu, -1, &nr_calls);
+	if (err == NOTIFY_BAD) {
+		set_cpu_active(cpu, true);
+
+		nr_calls--;
+		__raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
+					  hcpu, nr_calls, NULL);
+		printk("%s: attempt to take down CPU %u failed\n",
+				__func__, cpu);
+		err = -EINVAL;
+		goto out_release;
+	}
+
+	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+	if (err) {
+		set_cpu_active(cpu, true);
+		/* CPU didn't die: tell everyone.  Can't complain. */
+		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
+					    hcpu) == NOTIFY_BAD)
+			BUG();
+
+		goto out_release;
+	}
+	BUG_ON(cpu_online(cpu));
+
+	/* Wait for it to sleep (leaving idle task). */
+	while (!idle_cpu(cpu))
+		yield();
+
+	/* This actually kills the CPU. */
+	__cpu_die(cpu);
+
+	/* CPU is completely dead: tell everyone.  Too late to complain. */
+	if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
+				    hcpu) == NOTIFY_BAD)
+		BUG();
+
+	check_for_tasks(cpu);
+
+out_release:
+	cpu_hotplug_done();
+	if (!err) {
+		if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
+					    hcpu) == NOTIFY_BAD)
+			BUG();
+	}
+	return err;
+}
+
+int __ref cpu_down(unsigned int cpu)
+{
+	int err;
+
+	err = stop_machine_create();
+	if (err)
+		return err;
+	cpu_maps_update_begin();
+
+	if (cpu_hotplug_disabled) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	err = _cpu_down(cpu, 0);
+
+out:
+	cpu_maps_update_done();
+	stop_machine_destroy();
+	return err;
+}
+EXPORT_SYMBOL(cpu_down);
+#endif /*CONFIG_HOTPLUG_CPU*/
+
+/* Requires cpu_add_remove_lock to be held */
+static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
+{
+	int ret, nr_calls = 0;
+	void *hcpu = (void *)(long)cpu;
+	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+
+	if (cpu_online(cpu) || !cpu_present(cpu))
+		return -EINVAL;
+
+	cpu_hotplug_begin();
+	ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
+							-1, &nr_calls);
+	if (ret == NOTIFY_BAD) {
+		nr_calls--;
+		printk("%s: attempt to bring up CPU %u failed\n",
+				__func__, cpu);
+		ret = -EINVAL;
+		goto out_notify;
+	}
+
+	/* Arch-specific enabling code. */
+	ret = __cpu_up(cpu);
+	if (ret != 0)
+		goto out_notify;
+	BUG_ON(!cpu_online(cpu));
+
+	set_cpu_active(cpu, true);
+
+	/* Now call notifier in preparation. */
+	raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
+
+out_notify:
+	if (ret != 0)
+		__raw_notifier_call_chain(&cpu_chain,
+				CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+	cpu_hotplug_done();
+
+	return ret;
+}
+
+int __cpuinit cpu_up(unsigned int cpu)
+{
+	int err = 0;
+	if (!cpu_possible(cpu)) {
+		printk(KERN_ERR "can't online cpu %d because it is not "
+			"configured as may-hotadd at boot time\n", cpu);
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+		printk(KERN_ERR "please check additional_cpus= boot "
+				"parameter\n");
+#endif
+		return -EINVAL;
+	}
+
+	cpu_maps_update_begin();
+
+	if (cpu_hotplug_disabled) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	err = _cpu_up(cpu, 0);
+
+out:
+	cpu_maps_update_done();
+	return err;
+}
+
+#ifdef CONFIG_PM_SLEEP_SMP
+static cpumask_var_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+	int cpu, first_cpu, error;
+
+	error = stop_machine_create();
+	if (error)
+		return error;
+	cpu_maps_update_begin();
+	first_cpu = cpumask_first(cpu_online_mask);
+	/*
+	 * We take down all of the non-boot CPUs in one shot to avoid races
+	 * with the userspace trying to use the CPU hotplug at the same time
+	 */
+	cpumask_clear(frozen_cpus);
+
+	printk("Disabling non-boot CPUs ...\n");
+	for_each_online_cpu(cpu) {
+		if (cpu == first_cpu)
+			continue;
+		error = _cpu_down(cpu, 1);
+		if (!error)
+			cpumask_set_cpu(cpu, frozen_cpus);
+		else {
+			printk(KERN_ERR "Error taking CPU%d down: %d\n",
+				cpu, error);
+			break;
+		}
+	}
+
+	if (!error) {
+		BUG_ON(num_online_cpus() > 1);
+		/* Make sure the CPUs won't be enabled by someone else */
+		cpu_hotplug_disabled = 1;
+	} else {
+		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
+	}
+	cpu_maps_update_done();
+	stop_machine_destroy();
+	return error;
+}
+
+void __weak arch_enable_nonboot_cpus_begin(void)
+{
+}
+
+void __weak arch_enable_nonboot_cpus_end(void)
+{
+}
+
+void __ref enable_nonboot_cpus(void)
+{
+	int cpu, error;
+
+	/* Allow everyone to use the CPU hotplug again */
+	cpu_maps_update_begin();
+	cpu_hotplug_disabled = 0;
+	if (cpumask_empty(frozen_cpus))
+		goto out;
+
+	printk("Enabling non-boot CPUs ...\n");
+
+	arch_enable_nonboot_cpus_begin();
+
+	for_each_cpu(cpu, frozen_cpus) {
+		error = _cpu_up(cpu, 1);
+		if (!error) {
+			printk("CPU%d is up\n", cpu);
+			continue;
+		}
+		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
+	}
+
+	arch_enable_nonboot_cpus_end();
+
+	cpumask_clear(frozen_cpus);
+out:
+	cpu_maps_update_done();
+}
+
+static int alloc_frozen_cpus(void)
+{
+	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
+		return -ENOMEM;
+	return 0;
+}
+core_initcall(alloc_frozen_cpus);
+#endif /* CONFIG_PM_SLEEP_SMP */
+
+/**
+ * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * @cpu: cpu that just started
+ *
+ * This function calls the cpu_chain notifiers with CPU_STARTING.
+ * It must be called by the arch code on the new cpu, before the new cpu
+ * enables interrupts and before the "boot" cpu returns from __cpu_up().
+ */
+void __cpuinit notify_cpu_starting(unsigned int cpu)
+{
+	unsigned long val = CPU_STARTING;
+
+#ifdef CONFIG_PM_SLEEP_SMP
+	if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
+		val = CPU_STARTING_FROZEN;
+#endif /* CONFIG_PM_SLEEP_SMP */
+	raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * cpu_bit_bitmap[] is a special, "compressed" data structure that
+ * represents all NR_CPUS bits binary values of 1<<nr.
+ *
+ * It is used by cpumask_of() to get a constant address to a CPU
+ * mask value that has a single bit set only.
+ */
+
+/* cpu_bit_bitmap[0] is empty - so we can back into it */
+#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
+#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
+#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
+#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
+
+const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
+
+	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
+	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
+#if BITS_PER_LONG > 32
+	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
+	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
+#endif
+};
+EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
+
+const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
+EXPORT_SYMBOL(cpu_all_bits);
+
+#ifdef CONFIG_INIT_ALL_POSSIBLE
+static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
+	= CPU_BITS_ALL;
+#else
+static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
+#endif
+const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
+EXPORT_SYMBOL(cpu_possible_mask);
+
+static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
+EXPORT_SYMBOL(cpu_online_mask);
+
+static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
+EXPORT_SYMBOL(cpu_present_mask);
+
+static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
+EXPORT_SYMBOL(cpu_active_mask);
+
+void set_cpu_possible(unsigned int cpu, bool possible)
+{
+	if (possible)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
+}
+
+void set_cpu_present(unsigned int cpu, bool present)
+{
+	if (present)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
+}
+
+void set_cpu_online(unsigned int cpu, bool online)
+{
+	if (online)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
+}
+
+void set_cpu_active(unsigned int cpu, bool active)
+{
+	if (active)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
+}
+
+void init_cpu_present(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_present_bits), src);
+}
+
+void init_cpu_possible(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_possible_bits), src);
+}
+
+void init_cpu_online(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_online_bits), src);
+}
--- a/kernel/kernel/cpuset.c
+++ b/kernel/kernel/cpuset.c
--- a/kernel/kernel/cred.c
+++ b/kernel/kernel/cred.c
@@ -0,0 +1,916 @@
+/* Task credentials management - see Documentation/credentials.txt
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/cred.h>
+#include <linux/sched.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/init_task.h>
+#include <linux/security.h>
+#include <linux/cn_proc.h>
+
+#if 0
+#define kdebug(FMT, ...) \
+	printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#else
+static inline __attribute__((format(printf, 1, 2)))
+void no_printk(const char *fmt, ...)
+{
+}
+#define kdebug(FMT, ...) \
+	no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#endif
+
+static struct kmem_cache *cred_jar;
+
+/*
+ * The common credentials for the initial task's thread group
+ */
+#ifdef CONFIG_KEYS
+static struct thread_group_cred init_tgcred = {
+	.usage	= ATOMIC_INIT(2),
+	.tgid	= 0,
+	.lock	= SPIN_LOCK_UNLOCKED,
+};
+#endif
+
+/*
+ * The initial credentials for the initial task
+ */
+struct cred init_cred = {
+	.usage			= ATOMIC_INIT(4),
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	.subscribers		= ATOMIC_INIT(2),
+	.magic			= CRED_MAGIC,
+#endif
+	.securebits		= SECUREBITS_DEFAULT,
+	.cap_inheritable	= CAP_INIT_INH_SET,
+	.cap_permitted		= CAP_FULL_SET,
+	.cap_effective		= CAP_INIT_EFF_SET,
+	.cap_bset		= CAP_INIT_BSET,
+	.user			= INIT_USER,
+	.group_info		= &init_groups,
+#ifdef CONFIG_KEYS
+	.tgcred			= &init_tgcred,
+#endif
+};
+
+static inline void set_cred_subscribers(struct cred *cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	atomic_set(&cred->subscribers, n);
+#endif
+}
+
+static inline int read_cred_subscribers(const struct cred *cred)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	return atomic_read(&cred->subscribers);
+#else
+	return 0;
+#endif
+}
+
+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	struct cred *cred = (struct cred *) _cred;
+
+	atomic_add(n, &cred->subscribers);
+#endif
+}
+
+/*
+ * Dispose of the shared task group credentials
+ */
+#ifdef CONFIG_KEYS
+static void release_tgcred_rcu(struct rcu_head *rcu)
+{
+	struct thread_group_cred *tgcred =
+		container_of(rcu, struct thread_group_cred, rcu);
+
+	BUG_ON(atomic_read(&tgcred->usage) != 0);
+
+	key_put(tgcred->session_keyring);
+	key_put(tgcred->process_keyring);
+	kfree(tgcred);
+}
+#endif
+
+/*
+ * Release a set of thread group credentials.
+ */
+static void release_tgcred(struct cred *cred)
+{
+#ifdef CONFIG_KEYS
+	struct thread_group_cred *tgcred = cred->tgcred;
+
+	if (atomic_dec_and_test(&tgcred->usage))
+		call_rcu(&tgcred->rcu, release_tgcred_rcu);
+#endif
+}
+
+/*
+ * The RCU callback to actually dispose of a set of credentials
+ */
+static void put_cred_rcu(struct rcu_head *rcu)
+{
+	struct cred *cred = container_of(rcu, struct cred, rcu);
+
+	kdebug("put_cred_rcu(%p)", cred);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	if (cred->magic != CRED_MAGIC_DEAD ||
+	    atomic_read(&cred->usage) != 0 ||
+	    read_cred_subscribers(cred) != 0)
+		panic("CRED: put_cred_rcu() sees %p with"
+		      " mag %x, put %p, usage %d, subscr %d\n",
+		      cred, cred->magic, cred->put_addr,
+		      atomic_read(&cred->usage),
+		      read_cred_subscribers(cred));
+#else
+	if (atomic_read(&cred->usage) != 0)
+		panic("CRED: put_cred_rcu() sees %p with usage %d\n",
+		      cred, atomic_read(&cred->usage));
+#endif
+
+	security_cred_free(cred);
+	key_put(cred->thread_keyring);
+	key_put(cred->request_key_auth);
+	release_tgcred(cred);
+	if (cred->group_info)
+		put_group_info(cred->group_info);
+	free_uid(cred->user);
+	kmem_cache_free(cred_jar, cred);
+}
+
+/**
+ * __put_cred - Destroy a set of credentials
+ * @cred: The record to release
+ *
+ * Destroy a set of credentials on which no references remain.
+ */
+void __put_cred(struct cred *cred)
+{
+	kdebug("__put_cred(%p{%d,%d})", cred,
+	       atomic_read(&cred->usage),
+	       read_cred_subscribers(cred));
+
+	BUG_ON(atomic_read(&cred->usage) != 0);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(cred) != 0);
+	cred->magic = CRED_MAGIC_DEAD;
+	cred->put_addr = __builtin_return_address(0);
+#endif
+	BUG_ON(cred == current->cred);
+	BUG_ON(cred == current->real_cred);
+
+	call_rcu(&cred->rcu, put_cred_rcu);
+}
+EXPORT_SYMBOL(__put_cred);
+
+/*
+ * Clean up a task's credentials when it exits
+ */
+void exit_creds(struct task_struct *tsk)
+{
+	struct cred *cred;
+
+	kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
+	       atomic_read(&tsk->cred->usage),
+	       read_cred_subscribers(tsk->cred));
+
+	cred = (struct cred *) tsk->real_cred;
+	tsk->real_cred = NULL;
+	validate_creds(cred);
+	alter_cred_subscribers(cred, -1);
+	put_cred(cred);
+
+	cred = (struct cred *) tsk->cred;
+	tsk->cred = NULL;
+	validate_creds(cred);
+	alter_cred_subscribers(cred, -1);
+	put_cred(cred);
+
+	cred = (struct cred *) tsk->replacement_session_keyring;
+	if (cred) {
+		tsk->replacement_session_keyring = NULL;
+		validate_creds(cred);
+		put_cred(cred);
+	}
+}
+
+/**
+ * get_task_cred - Get another task's objective credentials
+ * @task: The task to query
+ *
+ * Get the objective credentials of a task, pinning them so that they can't go
+ * away.  Accessing a task's credentials directly is not permitted.
+ *
+ * The caller must also make sure task doesn't get deleted, either by holding a
+ * ref on task or by holding tasklist_lock to prevent it from being unlinked.
+ */
+const struct cred *get_task_cred(struct task_struct *task)
+{
+	const struct cred *cred;
+
+	rcu_read_lock();
+
+	do {
+		cred = __task_cred((task));
+		BUG_ON(!cred);
+	} while (!atomic_inc_not_zero(&((struct cred *)cred)->usage));
+
+	rcu_read_unlock();
+	return cred;
+}
+
+/*
+ * Allocate blank credentials, such that the credentials can be filled in at a
+ * later date without risk of ENOMEM.
+ */
+struct cred *cred_alloc_blank(void)
+{
+	struct cred *new;
+
+	new = kmem_cache_zalloc(cred_jar, GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+#ifdef CONFIG_KEYS
+	new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
+	if (!new->tgcred) {
+		kmem_cache_free(cred_jar, new);
+		return NULL;
+	}
+	atomic_set(&new->tgcred->usage, 1);
+#endif
+
+	atomic_set(&new->usage, 1);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	new->magic = CRED_MAGIC;
+#endif
+
+	if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
+		goto error;
+
+	return new;
+
+error:
+	abort_creds(new);
+	return NULL;
+}
+
+/**
+ * prepare_creds - Prepare a new set of credentials for modification
+ *
+ * Prepare a new set of task credentials for modification.  A task's creds
+ * shouldn't generally be modified directly, therefore this function is used to
+ * prepare a new copy, which the caller then modifies and then commits by
+ * calling commit_creds().
+ *
+ * Preparation involves making a copy of the objective creds for modification.
+ *
+ * Returns a pointer to the new creds-to-be if successful, NULL otherwise.
+ *
+ * Call commit_creds() or abort_creds() to clean up.
+ */
+struct cred *prepare_creds(void)
+{
+	struct task_struct *task = current;
+	const struct cred *old;
+	struct cred *new;
+
+	validate_process_creds();
+
+	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	kdebug("prepare_creds() alloc %p", new);
+
+	old = task->cred;
+	memcpy(new, old, sizeof(struct cred));
+
+	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
+	get_group_info(new->group_info);
+	get_uid(new->user);
+
+#ifdef CONFIG_KEYS
+	key_get(new->thread_keyring);
+	key_get(new->request_key_auth);
+	atomic_inc(&new->tgcred->usage);
+#endif
+
+#ifdef CONFIG_SECURITY
+	new->security = NULL;
+#endif
+
+	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+		goto error;
+	validate_creds(new);
+	return new;
+
+error:
+	abort_creds(new);
+	return NULL;
+}
+EXPORT_SYMBOL(prepare_creds);
+
+/*
+ * Prepare credentials for current to perform an execve()
+ * - The caller must hold current->cred_guard_mutex
+ */
+struct cred *prepare_exec_creds(void)
+{
+	struct thread_group_cred *tgcred = NULL;
+	struct cred *new;
+
+#ifdef CONFIG_KEYS
+	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
+	if (!tgcred)
+		return NULL;
+#endif
+
+	new = prepare_creds();
+	if (!new) {
+		kfree(tgcred);
+		return new;
+	}
+
+#ifdef CONFIG_KEYS
+	/* newly exec'd tasks don't get a thread keyring */
+	key_put(new->thread_keyring);
+	new->thread_keyring = NULL;
+
+	/* create a new per-thread-group creds for all this set of threads to
+	 * share */
+	memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred));
+
+	atomic_set(&tgcred->usage, 1);
+	spin_lock_init(&tgcred->lock);
+
+	/* inherit the session keyring; new process keyring */
+	key_get(tgcred->session_keyring);
+	tgcred->process_keyring = NULL;
+
+	release_tgcred(new);
+	new->tgcred = tgcred;
+#endif
+
+	return new;
+}
+
+/*
+ * prepare new credentials for the usermode helper dispatcher
+ */
+struct cred *prepare_usermodehelper_creds(void)
+{
+#ifdef CONFIG_KEYS
+	struct thread_group_cred *tgcred = NULL;
+#endif
+	struct cred *new;
+
+#ifdef CONFIG_KEYS
+	tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC);
+	if (!tgcred)
+		return NULL;
+#endif
+
+	new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
+	if (!new)
+		return NULL;
+
+	kdebug("prepare_usermodehelper_creds() alloc %p", new);
+
+	memcpy(new, &init_cred, sizeof(struct cred));
+
+	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
+	get_group_info(new->group_info);
+	get_uid(new->user);
+
+#ifdef CONFIG_KEYS
+	new->thread_keyring = NULL;
+	new->request_key_auth = NULL;
+	new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT;
+
+	atomic_set(&tgcred->usage, 1);
+	spin_lock_init(&tgcred->lock);
+	new->tgcred = tgcred;
+#endif
+
+#ifdef CONFIG_SECURITY
+	new->security = NULL;
+#endif
+	if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
+		goto error;
+	validate_creds(new);
+
+	BUG_ON(atomic_read(&new->usage) != 1);
+	return new;
+
+error:
+	put_cred(new);
+	return NULL;
+}
+
+/*
+ * Copy credentials for the new process created by fork()
+ *
+ * We share if we can, but under some circumstances we have to generate a new
+ * set.
+ *
+ * The new process gets the current process's subjective credentials as its
+ * objective and subjective credentials
+ */
+int copy_creds(struct task_struct *p, unsigned long clone_flags)
+{
+#ifdef CONFIG_KEYS
+	struct thread_group_cred *tgcred;
+#endif
+	struct cred *new;
+	int ret;
+
+	mutex_init(&p->cred_guard_mutex);
+
+	if (
+#ifdef CONFIG_KEYS
+		!p->cred->thread_keyring &&
+#endif
+		clone_flags & CLONE_THREAD
+	    ) {
+		p->real_cred = get_cred(p->cred);
+		get_cred(p->cred);
+		alter_cred_subscribers(p->cred, 2);
+		kdebug("share_creds(%p{%d,%d})",
+		       p->cred, atomic_read(&p->cred->usage),
+		       read_cred_subscribers(p->cred));
+		atomic_inc(&p->cred->user->processes);
+		return 0;
+	}
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	if (clone_flags & CLONE_NEWUSER) {
+		ret = create_user_ns(new);
+		if (ret < 0)
+			goto error_put;
+	}
+
+#ifdef CONFIG_KEYS
+	/* new threads get their own thread keyrings if their parent already
+	 * had one */
+	if (new->thread_keyring) {
+		key_put(new->thread_keyring);
+		new->thread_keyring = NULL;
+		if (clone_flags & CLONE_THREAD)
+			install_thread_keyring_to_cred(new);
+	}
+
+	/* we share the process and session keyrings between all the threads in
+	 * a process - this is slightly icky as we violate COW credentials a
+	 * bit */
+	if (!(clone_flags & CLONE_THREAD)) {
+		tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
+		if (!tgcred) {
+			ret = -ENOMEM;
+			goto error_put;
+		}
+		atomic_set(&tgcred->usage, 1);
+		spin_lock_init(&tgcred->lock);
+		tgcred->process_keyring = NULL;
+		tgcred->session_keyring = key_get(new->tgcred->session_keyring);
+
+		release_tgcred(new);
+		new->tgcred = tgcred;
+	}
+#endif
+
+	atomic_inc(&new->user->processes);
+	p->cred = p->real_cred = get_cred(new);
+	alter_cred_subscribers(new, 2);
+	validate_creds(new);
+	return 0;
+
+error_put:
+	put_cred(new);
+	return ret;
+}
+
+/**
+ * commit_creds - Install new credentials upon the current task
+ * @new: The credentials to be assigned
+ *
+ * Install a new set of credentials to the current task, using RCU to replace
+ * the old set.  Both the objective and the subjective credentials pointers are
+ * updated.  This function may not be called if the subjective credentials are
+ * in an overridden state.
+ *
+ * This function eats the caller's reference to the new credentials.
+ *
+ * Always returns 0 thus allowing this function to be tail-called at the end
+ * of, say, sys_setgid().
+ */
+int commit_creds(struct cred *new)
+{
+	struct task_struct *task = current;
+	const struct cred *old = task->real_cred;
+
+	kdebug("commit_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+	BUG_ON(task->cred != old);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(old) < 2);
+	validate_creds(old);
+	validate_creds(new);
+#endif
+	BUG_ON(atomic_read(&new->usage) < 1);
+
+	security_commit_creds(new, old);
+
+	get_cred(new); /* we will require a ref for the subj creds too */
+
+	/* dumpability changes */
+	if (old->euid != new->euid ||
+	    old->egid != new->egid ||
+	    old->fsuid != new->fsuid ||
+	    old->fsgid != new->fsgid ||
+	    !cap_issubset(new->cap_permitted, old->cap_permitted)) {
+		if (task->mm)
+			set_dumpable(task->mm, suid_dumpable);
+		task->pdeath_signal = 0;
+		smp_wmb();
+	}
+
+	/* alter the thread keyring */
+	if (new->fsuid != old->fsuid)
+		key_fsuid_changed(task);
+	if (new->fsgid != old->fsgid)
+		key_fsgid_changed(task);
+
+	/* do it
+	 * - What if a process setreuid()'s and this brings the
+	 *   new uid over his NPROC rlimit?  We can check this now
+	 *   cheaply with the new uid cache, so if it matters
+	 *   we should be checking for it.  -DaveM
+	 */
+	alter_cred_subscribers(new, 2);
+	if (new->user != old->user)
+		atomic_inc(&new->user->processes);
+	rcu_assign_pointer(task->real_cred, new);
+	rcu_assign_pointer(task->cred, new);
+	if (new->user != old->user)
+		atomic_dec(&old->user->processes);
+	alter_cred_subscribers(old, -2);
+
+	/* send notifications */
+	if (new->uid   != old->uid  ||
+	    new->euid  != old->euid ||
+	    new->suid  != old->suid ||
+	    new->fsuid != old->fsuid)
+		proc_id_connector(task, PROC_EVENT_UID);
+
+	if (new->gid   != old->gid  ||
+	    new->egid  != old->egid ||
+	    new->sgid  != old->sgid ||
+	    new->fsgid != old->fsgid)
+		proc_id_connector(task, PROC_EVENT_GID);
+
+	/* release the old obj and subj refs both */
+	put_cred(old);
+	put_cred(old);
+	return 0;
+}
+EXPORT_SYMBOL(commit_creds);
+
+/**
+ * abort_creds - Discard a set of credentials and unlock the current task
+ * @new: The credentials that were going to be applied
+ *
+ * Discard a set of credentials that were under construction and unlock the
+ * current task.
+ */
+void abort_creds(struct cred *new)
+{
+	kdebug("abort_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(new) != 0);
+#endif
+	BUG_ON(atomic_read(&new->usage) < 1);
+	put_cred(new);
+}
+EXPORT_SYMBOL(abort_creds);
+
+/**
+ * override_creds - Override the current process's subjective credentials
+ * @new: The credentials to be assigned
+ *
+ * Install a set of temporary override subjective credentials on the current
+ * process, returning the old set for later reversion.
+ */
+const struct cred *override_creds(const struct cred *new)
+{
+	const struct cred *old = current->cred;
+
+	kdebug("override_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+	validate_creds(old);
+	validate_creds(new);
+	get_cred(new);
+	alter_cred_subscribers(new, 1);
+	rcu_assign_pointer(current->cred, new);
+	alter_cred_subscribers(old, -1);
+
+	kdebug("override_creds() = %p{%d,%d}", old,
+	       atomic_read(&old->usage),
+	       read_cred_subscribers(old));
+	return old;
+}
+EXPORT_SYMBOL(override_creds);
+
+/**
+ * revert_creds - Revert a temporary subjective credentials override
+ * @old: The credentials to be restored
+ *
+ * Revert a temporary set of override subjective credentials to an old set,
+ * discarding the override set.
+ */
+void revert_creds(const struct cred *old)
+{
+	const struct cred *override = current->cred;
+
+	kdebug("revert_creds(%p{%d,%d})", old,
+	       atomic_read(&old->usage),
+	       read_cred_subscribers(old));
+
+	validate_creds(old);
+	validate_creds(override);
+	alter_cred_subscribers(old, 1);
+	rcu_assign_pointer(current->cred, old);
+	alter_cred_subscribers(override, -1);
+	put_cred(override);
+}
+EXPORT_SYMBOL(revert_creds);
+
+/*
+ * initialise the credentials stuff
+ */
+void __init cred_init(void)
+{
+	/* allocate a slab in which we can store credentials */
+	cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
+				     0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+}
+
+/**
+ * prepare_kernel_cred - Prepare a set of credentials for a kernel service
+ * @daemon: A userspace daemon to be used as a reference
+ *
+ * Prepare a set of credentials for a kernel service.  This can then be used to
+ * override a task's own credentials so that work can be done on behalf of that
+ * task that requires a different subjective context.
+ *
+ * @daemon is used to provide a base for the security record, but can be NULL.
+ * If @daemon is supplied, then the security data will be derived from that;
+ * otherwise they'll be set to 0 and no groups, full capabilities and no keys.
+ *
+ * The caller may change these controls afterwards if desired.
+ *
+ * Returns the new credentials or NULL if out of memory.
+ *
+ * Does not take, and does not return holding current->cred_replace_mutex.
+ */
+struct cred *prepare_kernel_cred(struct task_struct *daemon)
+{
+	const struct cred *old;
+	struct cred *new;
+
+	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	kdebug("prepare_kernel_cred() alloc %p", new);
+
+	if (daemon)
+		old = get_task_cred(daemon);
+	else
+		old = get_cred(&init_cred);
+
+	validate_creds(old);
+
+	*new = *old;
+	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
+	get_uid(new->user);
+	get_group_info(new->group_info);
+
+#ifdef CONFIG_KEYS
+	atomic_inc(&init_tgcred.usage);
+	new->tgcred = &init_tgcred;
+	new->request_key_auth = NULL;
+	new->thread_keyring = NULL;
+	new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+#endif
+
+#ifdef CONFIG_SECURITY
+	new->security = NULL;
+#endif
+	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+		goto error;
+
+	put_cred(old);
+	validate_creds(new);
+	return new;
+
+error:
+	put_cred(new);
+	put_cred(old);
+	return NULL;
+}
+EXPORT_SYMBOL(prepare_kernel_cred);
+
+/**
+ * set_security_override - Set the security ID in a set of credentials
+ * @new: The credentials to alter
+ * @secid: The LSM security ID to set
+ *
+ * Set the LSM security ID in a set of credentials so that the subjective
+ * security is overridden when an alternative set of credentials is used.
+ */
+int set_security_override(struct cred *new, u32 secid)
+{
+	return security_kernel_act_as(new, secid);
+}
+EXPORT_SYMBOL(set_security_override);
+
+/**
+ * set_security_override_from_ctx - Set the security ID in a set of credentials
+ * @new: The credentials to alter
+ * @secctx: The LSM security context to generate the security ID from.
+ *
+ * Set the LSM security ID in a set of credentials so that the subjective
+ * security is overridden when an alternative set of credentials is used.  The
+ * security ID is specified in string form as a security context to be
+ * interpreted by the LSM.
+ */
+int set_security_override_from_ctx(struct cred *new, const char *secctx)
+{
+	u32 secid;
+	int ret;
+
+	ret = security_secctx_to_secid(secctx, strlen(secctx), &secid);
+	if (ret < 0)
+		return ret;
+
+	return set_security_override(new, secid);
+}
+EXPORT_SYMBOL(set_security_override_from_ctx);
+
+/**
+ * set_create_files_as - Set the LSM file create context in a set of credentials
+ * @new: The credentials to alter
+ * @inode: The inode to take the context from
+ *
+ * Change the LSM file creation context in a set of credentials to be the same
+ * as the object context of the specified inode, so that the new inodes have
+ * the same MAC context as that inode.
+ */
+int set_create_files_as(struct cred *new, struct inode *inode)
+{
+	new->fsuid = inode->i_uid;
+	new->fsgid = inode->i_gid;
+	return security_kernel_create_files_as(new, inode);
+}
+EXPORT_SYMBOL(set_create_files_as);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+
+bool creds_are_invalid(const struct cred *cred)
+{
+	if (cred->magic != CRED_MAGIC)
+		return true;
+#ifdef CONFIG_SECURITY_SELINUX
+	/*
+	 * cred->security == NULL if security_cred_alloc_blank() or
+	 * security_prepare_creds() returned an error.
+	 */
+	if (selinux_is_enabled() && cred->security) {
+		if ((unsigned long) cred->security < PAGE_SIZE)
+			return true;
+		if ((*(u32 *)cred->security & 0xffffff00) ==
+		    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+			return true;
+	}
+#endif
+	return false;
+}
+EXPORT_SYMBOL(creds_are_invalid);
+
+/*
+ * dump invalid credentials
+ */
+static void dump_invalid_creds(const struct cred *cred, const char *label,
+			       const struct task_struct *tsk)
+{
+	printk(KERN_ERR "CRED: %s credentials: %p %s%s%s\n",
+	       label, cred,
+	       cred == &init_cred ? "[init]" : "",
+	       cred == tsk->real_cred ? "[real]" : "",
+	       cred == tsk->cred ? "[eff]" : "");
+	printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n",
+	       cred->magic, cred->put_addr);
+	printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n",
+	       atomic_read(&cred->usage),
+	       read_cred_subscribers(cred));
+	printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n",
+	       cred->uid, cred->euid, cred->suid, cred->fsuid);
+	printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n",
+	       cred->gid, cred->egid, cred->sgid, cred->fsgid);
+#ifdef CONFIG_SECURITY
+	printk(KERN_ERR "CRED: ->security is %p\n", cred->security);
+	if ((unsigned long) cred->security >= PAGE_SIZE &&
+	    (((unsigned long) cred->security & 0xffffff00) !=
+	     (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)))
+		printk(KERN_ERR "CRED: ->security {%x, %x}\n",
+		       ((u32*)cred->security)[0],
+		       ((u32*)cred->security)[1]);
+#endif
+}
+
+/*
+ * report use of invalid credentials
+ */
+void __invalid_creds(const struct cred *cred, const char *file, unsigned line)
+{
+	printk(KERN_ERR "CRED: Invalid credentials\n");
+	printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+	dump_invalid_creds(cred, "Specified", current);
+	BUG();
+}
+EXPORT_SYMBOL(__invalid_creds);
+
+/*
+ * check the credentials on a process
+ */
+void __validate_process_creds(struct task_struct *tsk,
+			      const char *file, unsigned line)
+{
+	if (tsk->cred == tsk->real_cred) {
+		if (unlikely(read_cred_subscribers(tsk->cred) < 2 ||
+			     creds_are_invalid(tsk->cred)))
+			goto invalid_creds;
+	} else {
+		if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 ||
+			     read_cred_subscribers(tsk->cred) < 1 ||
+			     creds_are_invalid(tsk->real_cred) ||
+			     creds_are_invalid(tsk->cred)))
+			goto invalid_creds;
+	}
+	return;
+
+invalid_creds:
+	printk(KERN_ERR "CRED: Invalid process credentials\n");
+	printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+
+	dump_invalid_creds(tsk->real_cred, "Real", tsk);
+	if (tsk->cred != tsk->real_cred)
+		dump_invalid_creds(tsk->cred, "Effective", tsk);
+	else
+		printk(KERN_ERR "CRED: Effective creds == Real creds\n");
+	BUG();
+}
+EXPORT_SYMBOL(__validate_process_creds);
+
+/*
+ * check creds for do_exit()
+ */
+void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+	kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})",
+	       tsk->real_cred, tsk->cred,
+	       atomic_read(&tsk->cred->usage),
+	       read_cred_subscribers(tsk->cred));
+
+	__validate_process_creds(tsk, __FILE__, __LINE__);
+}
+
+#endif /* CONFIG_DEBUG_CREDENTIALS */
--- a/kernel/kernel/delayacct.c
+++ b/kernel/kernel/delayacct.c
@@ -0,0 +1,184 @@
+/* delayacct.c - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/taskstats.h>
+#include <linux/time.h>
+#include <linux/sysctl.h>
+#include <linux/delayacct.h>
+
+int delayacct_on __read_mostly = 1;	/* Delay accounting turned on/off */
+struct kmem_cache *delayacct_cache;
+
+static int __init delayacct_setup_disable(char *str)
+{
+	delayacct_on = 0;
+	return 1;
+}
+__setup("nodelayacct", delayacct_setup_disable);
+
+void delayacct_init(void)
+{
+	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC);
+	delayacct_tsk_init(&init_task);
+}
+
+void __delayacct_tsk_init(struct task_struct *tsk)
+{
+	tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
+	if (tsk->delays)
+		spin_lock_init(&tsk->delays->lock);
+}
+
+/*
+ * Start accounting for a delay statistic using
+ * its starting timestamp (@start)
+ */
+
+static inline void delayacct_start(struct timespec *start)
+{
+	do_posix_clock_monotonic_gettime(start);
+}
+
+/*
+ * Finish delay accounting for a statistic using
+ * its timestamps (@start, @end), accumalator (@total) and @count
+ */
+
+static void delayacct_end(struct timespec *start, struct timespec *end,
+				u64 *total, u32 *count)
+{
+	struct timespec ts;
+	s64 ns;
+	unsigned long flags;
+
+	do_posix_clock_monotonic_gettime(end);
+	ts = timespec_sub(*end, *start);
+	ns = timespec_to_ns(&ts);
+	if (ns < 0)
+		return;
+
+	spin_lock_irqsave(&current->delays->lock, flags);
+	*total += ns;
+	(*count)++;
+	spin_unlock_irqrestore(&current->delays->lock, flags);
+}
+
+void __delayacct_blkio_start(void)
+{
+	delayacct_start(&current->delays->blkio_start);
+}
+
+void __delayacct_blkio_end(void)
+{
+	if (current->delays->flags & DELAYACCT_PF_SWAPIN)
+		/* Swapin block I/O */
+		delayacct_end(&current->delays->blkio_start,
+			&current->delays->blkio_end,
+			&current->delays->swapin_delay,
+			&current->delays->swapin_count);
+	else	/* Other block I/O */
+		delayacct_end(&current->delays->blkio_start,
+			&current->delays->blkio_end,
+			&current->delays->blkio_delay,
+			&current->delays->blkio_count);
+}
+
+int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+{
+	s64 tmp;
+	unsigned long t1;
+	unsigned long long t2, t3;
+	unsigned long flags;
+	struct timespec ts;
+
+	/* Though tsk->delays accessed later, early exit avoids
+	 * unnecessary returning of other data
+	 */
+	if (!tsk->delays)
+		goto done;
+
+	tmp = (s64)d->cpu_run_real_total;
+	cputime_to_timespec(tsk->utime + tsk->stime, &ts);
+	tmp += timespec_to_ns(&ts);
+	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
+
+	tmp = (s64)d->cpu_scaled_run_real_total;
+	cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts);
+	tmp += timespec_to_ns(&ts);
+	d->cpu_scaled_run_real_total =
+		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
+
+	/*
+	 * No locking available for sched_info (and too expensive to add one)
+	 * Mitigate by taking snapshot of values
+	 */
+	t1 = tsk->sched_info.pcount;
+	t2 = tsk->sched_info.run_delay;
+	t3 = tsk->se.sum_exec_runtime;
+
+	d->cpu_count += t1;
+
+	tmp = (s64)d->cpu_delay_total + t2;
+	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
+
+	tmp = (s64)d->cpu_run_virtual_total + t3;
+	d->cpu_run_virtual_total =
+		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
+
+	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
+
+	spin_lock_irqsave(&tsk->delays->lock, flags);
+	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
+	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
+	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
+	d->blkio_count += tsk->delays->blkio_count;
+	d->swapin_count += tsk->delays->swapin_count;
+	d->freepages_count += tsk->delays->freepages_count;
+	spin_unlock_irqrestore(&tsk->delays->lock, flags);
+
+done:
+	return 0;
+}
+
+__u64 __delayacct_blkio_ticks(struct task_struct *tsk)
+{
+	__u64 ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tsk->delays->lock, flags);
+	ret = nsec_to_clock_t(tsk->delays->blkio_delay +
+				tsk->delays->swapin_delay);
+	spin_unlock_irqrestore(&tsk->delays->lock, flags);
+	return ret;
+}
+
+void __delayacct_freepages_start(void)
+{
+	delayacct_start(&current->delays->freepages_start);
+}
+
+void __delayacct_freepages_end(void)
+{
+	delayacct_end(&current->delays->freepages_start,
+			&current->delays->freepages_end,
+			&current->delays->freepages_delay,
+			&current->delays->freepages_count);
+}
+
--- a/kernel/kernel/dma.c
+++ b/kernel/kernel/dma.c
@@ -0,0 +1,161 @@
+/*
+ * linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
+ *
+ * Written by Hennus Bergman, 1992.
+ *
+ * 1994/12/26: Changes by Alex Nash to fix a minor bug in /proc/dma.
+ *   In the previous version the reported device could end up being wrong,
+ *   if a device requested a DMA channel that was already in use.
+ *   [It also happened to remove the sizeof(char *) == sizeof(int)
+ *   assumption introduced because of those /proc/dma patches. -- Hennus]
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/dma.h>
+#include <asm/system.h>
+
+
+
+/* A note on resource allocation:
+ *
+ * All drivers needing DMA channels, should allocate and release them
+ * through the public routines `request_dma()' and `free_dma()'.
+ *
+ * In order to avoid problems, all processes should allocate resources in
+ * the same sequence and release them in the reverse order.
+ *
+ * So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA.
+ * When releasing them, first release the DMA, then release the IRQ.
+ * If you don't, you may cause allocation requests to fail unnecessarily.
+ * This doesn't really matter now, but it will once we get real semaphores
+ * in the kernel.
+ */
+
+
+DEFINE_SPINLOCK(dma_spin_lock);
+
+/*
+ *	If our port doesn't define this it has no PC like DMA
+ */
+
+#ifdef MAX_DMA_CHANNELS
+
+
+/* Channel n is busy iff dma_chan_busy[n].lock != 0.
+ * DMA0 used to be reserved for DRAM refresh, but apparently not any more...
+ * DMA4 is reserved for cascading.
+ */
+
+struct dma_chan {
+	int  lock;
+	const char *device_id;
+};
+
+static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
+	[4] = { 1, "cascade" },
+};
+
+
+/**
+ * request_dma - request and reserve a system DMA channel
+ * @dmanr: DMA channel number
+ * @device_id: reserving device ID string, used in /proc/dma
+ */
+int request_dma(unsigned int dmanr, const char * device_id)
+{
+	if (dmanr >= MAX_DMA_CHANNELS)
+		return -EINVAL;
+
+	if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0)
+		return -EBUSY;
+
+	dma_chan_busy[dmanr].device_id = device_id;
+
+	/* old flag was 0, now contains 1 to indicate busy */
+	return 0;
+} /* request_dma */
+
+/**
+ * free_dma - free a reserved system DMA channel
+ * @dmanr: DMA channel number
+ */
+void free_dma(unsigned int dmanr)
+{
+	if (dmanr >= MAX_DMA_CHANNELS) {
+		printk(KERN_WARNING "Trying to free DMA%d\n", dmanr);
+		return;
+	}
+
+	if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) {
+		printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr);
+		return;
+	}
+
+} /* free_dma */
+
+#else
+
+int request_dma(unsigned int dmanr, const char *device_id)
+{
+	return -EINVAL;
+}
+
+void free_dma(unsigned int dmanr)
+{
+}
+
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+#ifdef MAX_DMA_CHANNELS
+static int proc_dma_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) {
+		if (dma_chan_busy[i].lock) {
+			seq_printf(m, "%2d: %s\n", i,
+				   dma_chan_busy[i].device_id);
+		}
+	}
+	return 0;
+}
+#else
+static int proc_dma_show(struct seq_file *m, void *v)
+{
+	seq_puts(m, "No DMA\n");
+	return 0;
+}
+#endif /* MAX_DMA_CHANNELS */
+
+static int proc_dma_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_dma_show, NULL);
+}
+
+static const struct file_operations proc_dma_operations = {
+	.open		= proc_dma_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init proc_dma_init(void)
+{
+	proc_create("dma", 0, NULL, &proc_dma_operations);
+	return 0;
+}
+
+__initcall(proc_dma_init);
+#endif
+
+EXPORT_SYMBOL(request_dma);
+EXPORT_SYMBOL(free_dma);
+EXPORT_SYMBOL(dma_spin_lock);
--- a/kernel/kernel/exec_domain.c
+++ b/kernel/kernel/exec_domain.c
@@ -0,0 +1,207 @@
+/*
+ * Handling of different ABIs (personalities).
+ *
+ * We group personalities into execution domains which have their
+ * own handlers for kernel entry points, signal mapping, etc...
+ *
+ * 2001-05-06	Complete rewrite,  Christoph Hellwig (hch@infradead.org)
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/personality.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+#include <linux/fs_struct.h>
+
+
+static void default_handler(int, struct pt_regs *);
+
+static struct exec_domain *exec_domains = &default_exec_domain;
+static DEFINE_RWLOCK(exec_domains_lock);
+
+
+static u_long ident_map[32] = {
+	0,	1,	2,	3,	4,	5,	6,	7,
+	8,	9,	10,	11,	12,	13,	14,	15,
+	16,	17,	18,	19,	20,	21,	22,	23,
+	24,	25,	26,	27,	28,	29,	30,	31
+};
+
+struct exec_domain default_exec_domain = {
+	.name		= "Linux",		/* name */
+	.handler	= default_handler,	/* lcall7 causes a seg fault. */
+	.pers_low	= 0, 			/* PER_LINUX personality. */
+	.pers_high	= 0,			/* PER_LINUX personality. */
+	.signal_map	= ident_map,		/* Identity map signals. */
+	.signal_invmap	= ident_map,		/*  - both ways. */
+};
+
+
+static void
+default_handler(int segment, struct pt_regs *regp)
+{
+	set_personality(0);
+
+	if (current_thread_info()->exec_domain->handler != default_handler)
+		current_thread_info()->exec_domain->handler(segment, regp);
+	else
+		send_sig(SIGSEGV, current, 1);
+}
+
+static struct exec_domain *
+lookup_exec_domain(u_long personality)
+{
+	struct exec_domain *	ep;
+	u_long			pers = personality(personality);
+
+	read_lock(&exec_domains_lock);
+	for (ep = exec_domains; ep; ep = ep->next) {
+		if (pers >= ep->pers_low && pers <= ep->pers_high)
+			if (try_module_get(ep->module))
+				goto out;
+	}
+
+#ifdef CONFIG_MODULES
+	read_unlock(&exec_domains_lock);
+	request_module("personality-%ld", pers);
+	read_lock(&exec_domains_lock);
+
+	for (ep = exec_domains; ep; ep = ep->next) {
+		if (pers >= ep->pers_low && pers <= ep->pers_high)
+			if (try_module_get(ep->module))
+				goto out;
+	}
+#endif
+
+	ep = &default_exec_domain;
+out:
+	read_unlock(&exec_domains_lock);
+	return (ep);
+}
+
+int
+register_exec_domain(struct exec_domain *ep)
+{
+	struct exec_domain	*tmp;
+	int			err = -EBUSY;
+
+	if (ep == NULL)
+		return -EINVAL;
+
+	if (ep->next != NULL)
+		return -EBUSY;
+
+	write_lock(&exec_domains_lock);
+	for (tmp = exec_domains; tmp; tmp = tmp->next) {
+		if (tmp == ep)
+			goto out;
+	}
+
+	ep->next = exec_domains;
+	exec_domains = ep;
+	err = 0;
+
+out:
+	write_unlock(&exec_domains_lock);
+	return (err);
+}
+
+int
+unregister_exec_domain(struct exec_domain *ep)
+{
+	struct exec_domain	**epp;
+
+	epp = &exec_domains;
+	write_lock(&exec_domains_lock);
+	for (epp = &exec_domains; *epp; epp = &(*epp)->next) {
+		if (ep == *epp)
+			goto unregister;
+	}
+	write_unlock(&exec_domains_lock);
+	return -EINVAL;
+
+unregister:
+	*epp = ep->next;
+	ep->next = NULL;
+	write_unlock(&exec_domains_lock);
+	return 0;
+}
+
+int
+__set_personality(u_long personality)
+{
+	struct exec_domain	*ep, *oep;
+
+	ep = lookup_exec_domain(personality);
+	if (ep == current_thread_info()->exec_domain) {
+		current->personality = personality;
+		module_put(ep->module);
+		return 0;
+	}
+
+	current->personality = personality;
+	oep = current_thread_info()->exec_domain;
+	current_thread_info()->exec_domain = ep;
+
+	module_put(oep->module);
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static int execdomains_proc_show(struct seq_file *m, void *v)
+{
+	struct exec_domain	*ep;
+
+	read_lock(&exec_domains_lock);
+	for (ep = exec_domains; ep; ep = ep->next)
+		seq_printf(m, "%d-%d\t%-16s\t[%s]\n",
+			       ep->pers_low, ep->pers_high, ep->name,
+			       module_name(ep->module));
+	read_unlock(&exec_domains_lock);
+	return 0;
+}
+
+static int execdomains_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, execdomains_proc_show, NULL);
+}
+
+static const struct file_operations execdomains_proc_fops = {
+	.open		= execdomains_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init proc_execdomains_init(void)
+{
+	proc_create("execdomains", 0, NULL, &execdomains_proc_fops);
+	return 0;
+}
+module_init(proc_execdomains_init);
+#endif
+
+SYSCALL_DEFINE1(personality, u_long, personality)
+{
+	u_long old = current->personality;
+
+	if (personality != 0xffffffff) {
+		set_personality(personality);
+		if (current->personality != personality)
+			return -EINVAL;
+	}
+
+	return (long)old;
+}
+
+
+EXPORT_SYMBOL(register_exec_domain);
+EXPORT_SYMBOL(unregister_exec_domain);
+EXPORT_SYMBOL(__set_personality);
--- a/kernel/kernel/exit.c
+++ b/kernel/kernel/exit.c
--- a/kernel/kernel/extable.c
+++ b/kernel/kernel/extable.c
@@ -0,0 +1,115 @@
+/* Rewritten by Rusty Russell, on the backs of many others...
+   Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/ftrace.h>
+#include <linux/memory.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/init.h>
+
+#include <asm/sections.h>
+#include <asm/uaccess.h>
+
+/*
+ * mutex protecting text section modification (dynamic code patching).
+ * some users need to sleep (allocating memory...) while they hold this lock.
+ *
+ * NOT exported to modules - patching kernel text is a really delicate matter.
+ */
+DEFINE_MUTEX(text_mutex);
+
+extern struct exception_table_entry __start___ex_table[];
+extern struct exception_table_entry __stop___ex_table[];
+
+/* Sort the kernel's built-in exception table */
+void __init sort_main_extable(void)
+{
+	sort_extable(__start___ex_table, __stop___ex_table);
+}
+
+/* Given an address, look for it in the exception tables. */
+const struct exception_table_entry *search_exception_tables(unsigned long addr)
+{
+	const struct exception_table_entry *e;
+
+	e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
+	if (!e)
+		e = search_module_extables(addr);
+	return e;
+}
+
+static inline int init_kernel_text(unsigned long addr)
+{
+	if (addr >= (unsigned long)_sinittext &&
+	    addr <= (unsigned long)_einittext)
+		return 1;
+	return 0;
+}
+
+int core_kernel_text(unsigned long addr)
+{
+	if (addr >= (unsigned long)_stext &&
+	    addr <= (unsigned long)_etext)
+		return 1;
+
+	if (system_state == SYSTEM_BOOTING &&
+	    init_kernel_text(addr))
+		return 1;
+	return 0;
+}
+
+int __kernel_text_address(unsigned long addr)
+{
+	if (core_kernel_text(addr))
+		return 1;
+	if (is_module_text_address(addr))
+		return 1;
+	/*
+	 * There might be init symbols in saved stacktraces.
+	 * Give those symbols a chance to be printed in
+	 * backtraces (such as lockdep traces).
+	 *
+	 * Since we are after the module-symbols check, there's
+	 * no danger of address overlap:
+	 */
+	if (init_kernel_text(addr))
+		return 1;
+	return 0;
+}
+
+int kernel_text_address(unsigned long addr)
+{
+	if (core_kernel_text(addr))
+		return 1;
+	return is_module_text_address(addr);
+}
+
+/*
+ * On some architectures (PPC64, IA64) function pointers
+ * are actually only tokens to some data that then holds the
+ * real function address. As a result, to find if a function
+ * pointer is part of the kernel text, we need to do some
+ * special dereferencing first.
+ */
+int func_ptr_is_kernel_text(void *ptr)
+{
+	unsigned long addr;
+	addr = (unsigned long) dereference_function_descriptor(ptr);
+	if (core_kernel_text(addr))
+		return 1;
+	return is_module_text_address(addr);
+}
--- a/kernel/kernel/fork.c
+++ b/kernel/kernel/fork.c
--- a/kernel/kernel/freezer.c
+++ b/kernel/kernel/freezer.c
@@ -0,0 +1,161 @@
+/*
+ * kernel/freezer.c - Function to freeze a process
+ *
+ * Originally from kernel/power/process.c
+ */
+
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+	if (!unlikely(current->flags & PF_NOFREEZE)) {
+		current->flags |= PF_FROZEN;
+		wmb();
+	}
+	clear_freeze_flag(current);
+}
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+	/* Hmm, should we be allowed to suspend when there are realtime
+	   processes around? */
+	long save;
+
+	task_lock(current);
+	if (freezing(current)) {
+		frozen_process();
+		task_unlock(current);
+	} else {
+		task_unlock(current);
+		return;
+	}
+	save = current->state;
+	pr_debug("%s entered refrigerator\n", current->comm);
+
+	spin_lock_irq(&current->sighand->siglock);
+	recalc_sigpending(); /* We sent fake signal, clean it up */
+	spin_unlock_irq(&current->sighand->siglock);
+
+	/* prevent accounting of that task to load */
+	current->flags |= PF_FREEZING;
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!frozen(current))
+			break;
+		schedule();
+	}
+
+	/* Remove the accounting blocker */
+	current->flags &= ~PF_FREEZING;
+
+	pr_debug("%s left refrigerator\n", current->comm);
+	__set_current_state(save);
+}
+EXPORT_SYMBOL(refrigerator);
+
+static void fake_signal_wake_up(struct task_struct *p)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&p->sighand->siglock, flags);
+	signal_wake_up(p, 0);
+	spin_unlock_irqrestore(&p->sighand->siglock, flags);
+}
+
+/**
+ *	freeze_task - send a freeze request to given task
+ *	@p: task to send the request to
+ *	@sig_only: if set, the request will only be sent if the task has the
+ *		PF_FREEZER_NOSIG flag unset
+ *	Return value: 'false', if @sig_only is set and the task has
+ *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ *
+ *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and
+ *	either sending a fake signal to it or waking it up, depending on whether
+ *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
+ *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ *	TIF_FREEZE flag will not be set.
+ */
+bool freeze_task(struct task_struct *p, bool sig_only)
+{
+	/*
+	 * We first check if the task is freezing and next if it has already
+	 * been frozen to avoid the race with frozen_process() which first marks
+	 * the task as frozen and next clears its TIF_FREEZE.
+	 */
+	if (!freezing(p)) {
+		rmb();
+		if (frozen(p))
+			return false;
+
+		if (!sig_only || should_send_signal(p))
+			set_freeze_flag(p);
+		else
+			return false;
+	}
+
+	if (should_send_signal(p)) {
+		if (!signal_pending(p))
+			fake_signal_wake_up(p);
+	} else if (sig_only) {
+		return false;
+	} else {
+		wake_up_state(p, TASK_INTERRUPTIBLE);
+	}
+
+	return true;
+}
+
+void cancel_freezing(struct task_struct *p)
+{
+	unsigned long flags;
+
+	if (freezing(p)) {
+		pr_debug("  clean up: %s\n", p->comm);
+		clear_freeze_flag(p);
+		spin_lock_irqsave(&p->sighand->siglock, flags);
+		recalc_sigpending_and_wake(p);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	}
+}
+
+static int __thaw_process(struct task_struct *p)
+{
+	if (frozen(p)) {
+		p->flags &= ~PF_FROZEN;
+		return 1;
+	}
+	clear_freeze_flag(p);
+	return 0;
+}
+
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails.  Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
+int thaw_process(struct task_struct *p)
+{
+	task_lock(p);
+	if (__thaw_process(p) == 1) {
+		task_unlock(p);
+		wake_up_process(p);
+		return 1;
+	}
+	task_unlock(p);
+	return 0;
+}
+EXPORT_SYMBOL(thaw_process);
--- a/kernel/kernel/futex.c
+++ b/kernel/kernel/futex.c
--- a/kernel/kernel/futex_compat.c
+++ b/kernel/kernel/futex_compat.c
@@ -0,0 +1,200 @@
+/*
+ * linux/kernel/futex_compat.c
+ *
+ * Futex compatibililty routines.
+ *
+ * Copyright 2006, Red Hat, Inc., Ingo Molnar
+ */
+
+#include <linux/linkage.h>
+#include <linux/compat.h>
+#include <linux/nsproxy.h>
+#include <linux/futex.h>
+
+#include <asm/uaccess.h>
+
+
+/*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int
+fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+		   compat_uptr_t __user *head, int *pi)
+{
+	if (get_user(*uentry, head))
+		return -EFAULT;
+
+	*entry = compat_ptr((*uentry) & ~1);
+	*pi = (unsigned int)(*uentry) & 1;
+
+	return 0;
+}
+
+static void __user *futex_uaddr(struct robust_list __user *entry,
+				compat_long_t futex_offset)
+{
+	compat_uptr_t base = ptr_to_compat(entry);
+	void __user *uaddr = compat_ptr(base + futex_offset);
+
+	return uaddr;
+}
+
+/*
+ * Walk curr->robust_list (very carefully, it's a userspace list!)
+ * and mark any locks found there dead, and notify any waiters.
+ *
+ * We silently return on any sign of list-walking problem.
+ */
+void compat_exit_robust_list(struct task_struct *curr)
+{
+	struct compat_robust_list_head __user *head = curr->compat_robust_list;
+	struct robust_list __user *entry, *next_entry, *pending;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
+	compat_uptr_t uentry, next_uentry, upending;
+	compat_long_t futex_offset;
+	int rc;
+
+	if (!futex_cmpxchg_enabled)
+		return;
+
+	/*
+	 * Fetch the list head (which was registered earlier, via
+	 * sys_set_robust_list()):
+	 */
+	if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
+		return;
+	/*
+	 * Fetch the relative futex offset:
+	 */
+	if (get_user(futex_offset, &head->futex_offset))
+		return;
+	/*
+	 * Fetch any possibly pending lock-add first, and handle it
+	 * if it exists:
+	 */
+	if (fetch_robust_entry(&upending, &pending,
+			       &head->list_op_pending, &pip))
+		return;
+
+	next_entry = NULL;	/* avoid warning with gcc */
+	while (entry != (struct robust_list __user *) &head->list) {
+		/*
+		 * Fetch the next entry in the list before calling
+		 * handle_futex_death:
+		 */
+		rc = fetch_robust_entry(&next_uentry, &next_entry,
+			(compat_uptr_t __user *)&entry->next, &next_pi);
+		/*
+		 * A pending lock might already be on the list, so
+		 * dont process it twice:
+		 */
+		if (entry != pending) {
+			void __user *uaddr = futex_uaddr(entry, futex_offset);
+
+			if (handle_futex_death(uaddr, curr, pi))
+				return;
+		}
+		if (rc)
+			return;
+		uentry = next_uentry;
+		entry = next_entry;
+		pi = next_pi;
+		/*
+		 * Avoid excessively long or circular lists:
+		 */
+		if (!--limit)
+			break;
+
+		cond_resched();
+	}
+	if (pending) {
+		void __user *uaddr = futex_uaddr(pending, futex_offset);
+
+		handle_futex_death(uaddr, curr, pip);
+	}
+}
+
+asmlinkage long
+compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
+			   compat_size_t len)
+{
+	if (!futex_cmpxchg_enabled)
+		return -ENOSYS;
+
+	if (unlikely(len != sizeof(*head)))
+		return -EINVAL;
+
+	current->compat_robust_list = head;
+
+	return 0;
+}
+
+asmlinkage long
+compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
+			   compat_size_t __user *len_ptr)
+{
+	struct compat_robust_list_head __user *head;
+	unsigned long ret;
+	const struct cred *cred = current_cred(), *pcred;
+
+	if (!futex_cmpxchg_enabled)
+		return -ENOSYS;
+
+	if (!pid)
+		head = current->compat_robust_list;
+	else {
+		struct task_struct *p;
+
+		ret = -ESRCH;
+		read_lock(&tasklist_lock);
+		p = find_task_by_vpid(pid);
+		if (!p)
+			goto err_unlock;
+		ret = -EPERM;
+		pcred = __task_cred(p);
+		if (cred->euid != pcred->euid &&
+		    cred->euid != pcred->uid &&
+		    !capable(CAP_SYS_PTRACE))
+			goto err_unlock;
+		head = p->compat_robust_list;
+		read_unlock(&tasklist_lock);
+	}
+
+	if (put_user(sizeof(*head), len_ptr))
+		return -EFAULT;
+	return put_user(ptr_to_compat(head), head_ptr);
+
+err_unlock:
+	read_unlock(&tasklist_lock);
+
+	return ret;
+}
+
+asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
+		struct compat_timespec __user *utime, u32 __user *uaddr2,
+		u32 val3)
+{
+	struct timespec ts;
+	ktime_t t, *tp = NULL;
+	int val2 = 0;
+	int cmd = op & FUTEX_CMD_MASK;
+
+	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+		      cmd == FUTEX_WAIT_BITSET ||
+		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+		if (get_compat_timespec(&ts, utime))
+			return -EFAULT;
+		if (!timespec_valid(&ts))
+			return -EINVAL;
+
+		t = timespec_to_ktime(ts);
+		if (cmd == FUTEX_WAIT)
+			t = ktime_add_safe(ktime_get(), t);
+		tp = &t;
+	}
+	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
+		val2 = (int) (unsigned long) utime;
+
+	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+}
--- a/kernel/kernel/gcov/Kconfig
+++ b/kernel/kernel/gcov/Kconfig
@@ -0,0 +1,48 @@
+menu "GCOV-based kernel profiling"
+
+config GCOV_KERNEL
+	bool "Enable gcov-based kernel profiling"
+	depends on DEBUG_FS && CONSTRUCTORS
+	default n
+	---help---
+	This option enables gcov-based code profiling (e.g. for code coverage
+	measurements).
+
+	If unsure, say N.
+
+	Additionally specify CONFIG_GCOV_PROFILE_ALL=y to get profiling data
+	for the entire kernel. To enable profiling for specific files or
+	directories, add a line similar to the following to the respective
+	Makefile:
+
+	For a single file (e.g. main.o):
+	        GCOV_PROFILE_main.o := y
+
+	For all files in one directory:
+	        GCOV_PROFILE := y
+
+	To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
+	is specified, use:
+
+	        GCOV_PROFILE_main.o := n
+	and:
+	        GCOV_PROFILE := n
+
+	Note that the debugfs filesystem has to be mounted to access
+	profiling data.
+
+config GCOV_PROFILE_ALL
+	bool "Profile entire Kernel"
+	depends on GCOV_KERNEL
+	depends on SUPERH || S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
+	default n
+	---help---
+	This options activates profiling for the entire kernel.
+
+	If unsure, say N.
+
+	Note that a kernel compiled with profiling flags will be significantly
+	larger and run slower. Also be sure to exclude files from profiling
+	which are not linked to the kernel image to prevent linker errors.
+
+endmenu
--- a/kernel/kernel/gcov/Makefile
+++ b/kernel/kernel/gcov/Makefile
@@ -0,0 +1,3 @@
+EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
+
+obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o
--- a/kernel/kernel/gcov/base.c
+++ b/kernel/kernel/gcov/base.c
@@ -0,0 +1,148 @@
+/*
+ *  This code maintains a list of active profiling data structures.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ *    Based on the gcov-kernel patch by:
+ *		 Hubertus Franke <frankeh@us.ibm.com>
+ *		 Nigel Hinds <nhinds@us.ibm.com>
+ *		 Rajan Ravindran <rajancr@us.ibm.com>
+ *		 Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *		 Paul Larson
+ */
+
+#define pr_fmt(fmt)	"gcov: " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include "gcov.h"
+
+static struct gcov_info *gcov_info_head;
+static int gcov_events_enabled;
+static DEFINE_MUTEX(gcov_lock);
+
+/*
+ * __gcov_init is called by gcc-generated constructor code for each object
+ * file compiled with -fprofile-arcs.
+ */
+void __gcov_init(struct gcov_info *info)
+{
+	static unsigned int gcov_version;
+
+	mutex_lock(&gcov_lock);
+	if (gcov_version == 0) {
+		gcov_version = info->version;
+		/*
+		 * Printing gcc's version magic may prove useful for debugging
+		 * incompatibility reports.
+		 */
+		pr_info("version magic: 0x%x\n", gcov_version);
+	}
+	/*
+	 * Add new profiling data structure to list and inform event
+	 * listener.
+	 */
+	info->next = gcov_info_head;
+	gcov_info_head = info;
+	if (gcov_events_enabled)
+		gcov_event(GCOV_ADD, info);
+	mutex_unlock(&gcov_lock);
+}
+EXPORT_SYMBOL(__gcov_init);
+
+/*
+ * These functions may be referenced by gcc-generated profiling code but serve
+ * no function for kernel profiling.
+ */
+void __gcov_flush(void)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_flush);
+
+void __gcov_merge_add(gcov_type *counters, unsigned int n_counters)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_add);
+
+void __gcov_merge_single(gcov_type *counters, unsigned int n_counters)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_single);
+
+void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_delta);
+
+/**
+ * gcov_enable_events - enable event reporting through gcov_event()
+ *
+ * Turn on reporting of profiling data load/unload-events through the
+ * gcov_event() callback. Also replay all previous events once. This function
+ * is needed because some events are potentially generated too early for the
+ * callback implementation to handle them initially.
+ */
+void gcov_enable_events(void)
+{
+	struct gcov_info *info;
+
+	mutex_lock(&gcov_lock);
+	gcov_events_enabled = 1;
+	/* Perform event callback for previously registered entries. */
+	for (info = gcov_info_head; info; info = info->next)
+		gcov_event(GCOV_ADD, info);
+	mutex_unlock(&gcov_lock);
+}
+
+#ifdef CONFIG_MODULES
+static inline int within(void *addr, void *start, unsigned long size)
+{
+	return ((addr >= start) && (addr < start + size));
+}
+
+/* Update list and generate events when modules are unloaded. */
+static int gcov_module_notifier(struct notifier_block *nb, unsigned long event,
+				void *data)
+{
+	struct module *mod = data;
+	struct gcov_info *info;
+	struct gcov_info *prev;
+
+	if (event != MODULE_STATE_GOING)
+		return NOTIFY_OK;
+	mutex_lock(&gcov_lock);
+	prev = NULL;
+	/* Remove entries located in module from linked list. */
+	for (info = gcov_info_head; info; info = info->next) {
+		if (within(info, mod->module_core, mod->core_size)) {
+			if (prev)
+				prev->next = info->next;
+			else
+				gcov_info_head = info->next;
+			if (gcov_events_enabled)
+				gcov_event(GCOV_REMOVE, info);
+		} else
+			prev = info;
+	}
+	mutex_unlock(&gcov_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block gcov_nb = {
+	.notifier_call	= gcov_module_notifier,
+};
+
+static int __init gcov_init(void)
+{
+	return register_module_notifier(&gcov_nb);
+}
+device_initcall(gcov_init);
+#endif /* CONFIG_MODULES */
--- a/kernel/kernel/gcov/fs.c
+++ b/kernel/kernel/gcov/fs.c
@@ -0,0 +1,789 @@
+/*
+ *  This code exports profiling data as debugfs files to userspace.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ *    Based on the gcov-kernel patch by:
+ *		 Hubertus Franke <frankeh@us.ibm.com>
+ *		 Nigel Hinds <nhinds@us.ibm.com>
+ *		 Rajan Ravindran <rajancr@us.ibm.com>
+ *		 Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *		 Paul Larson
+ *		 Yi CDL Yang
+ */
+
+#define pr_fmt(fmt)	"gcov: " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
+#include "gcov.h"
+
+/**
+ * struct gcov_node - represents a debugfs entry
+ * @list: list head for child node list
+ * @children: child nodes
+ * @all: list head for list of all nodes
+ * @parent: parent node
+ * @loaded_info: array of pointers to profiling data sets for loaded object
+ *   files.
+ * @num_loaded: number of profiling data sets for loaded object files.
+ * @unloaded_info: accumulated copy of profiling data sets for unloaded
+ *   object files. Used only when gcov_persist=1.
+ * @dentry: main debugfs entry, either a directory or data file
+ * @links: associated symbolic links
+ * @name: data file basename
+ *
+ * struct gcov_node represents an entity within the gcov/ subdirectory
+ * of debugfs. There are directory and data file nodes. The latter represent
+ * the actual synthesized data file plus any associated symbolic links which
+ * are needed by the gcov tool to work correctly.
+ */
+struct gcov_node {
+	struct list_head list;
+	struct list_head children;
+	struct list_head all;
+	struct gcov_node *parent;
+	struct gcov_info **loaded_info;
+	struct gcov_info *unloaded_info;
+	struct dentry *dentry;
+	struct dentry **links;
+	int num_loaded;
+	char name[0];
+};
+
+static const char objtree[] = OBJTREE;
+static const char srctree[] = SRCTREE;
+static struct gcov_node root_node;
+static struct dentry *reset_dentry;
+static LIST_HEAD(all_head);
+static DEFINE_MUTEX(node_lock);
+
+/* If non-zero, keep copies of profiling data for unloaded modules. */
+static int gcov_persist = 1;
+
+static int __init gcov_persist_setup(char *str)
+{
+	unsigned long val;
+
+	if (strict_strtoul(str, 0, &val)) {
+		pr_warning("invalid gcov_persist parameter '%s'\n", str);
+		return 0;
+	}
+	gcov_persist = val;
+	pr_info("setting gcov_persist to %d\n", gcov_persist);
+
+	return 1;
+}
+__setup("gcov_persist=", gcov_persist_setup);
+
+/*
+ * seq_file.start() implementation for gcov data files. Note that the
+ * gcov_iterator interface is designed to be more restrictive than seq_file
+ * (no start from arbitrary position, etc.), to simplify the iterator
+ * implementation.
+ */
+static void *gcov_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t i;
+
+	gcov_iter_start(seq->private);
+	for (i = 0; i < *pos; i++) {
+		if (gcov_iter_next(seq->private))
+			return NULL;
+	}
+	return seq->private;
+}
+
+/* seq_file.next() implementation for gcov data files. */
+static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos)
+{
+	struct gcov_iterator *iter = data;
+
+	if (gcov_iter_next(iter))
+		return NULL;
+	(*pos)++;
+
+	return iter;
+}
+
+/* seq_file.show() implementation for gcov data files. */
+static int gcov_seq_show(struct seq_file *seq, void *data)
+{
+	struct gcov_iterator *iter = data;
+
+	if (gcov_iter_write(iter, seq))
+		return -EINVAL;
+	return 0;
+}
+
+static void gcov_seq_stop(struct seq_file *seq, void *data)
+{
+	/* Unused. */
+}
+
+static const struct seq_operations gcov_seq_ops = {
+	.start	= gcov_seq_start,
+	.next	= gcov_seq_next,
+	.show	= gcov_seq_show,
+	.stop	= gcov_seq_stop,
+};
+
+/*
+ * Return a profiling data set associated with the given node. This is
+ * either a data set for a loaded object file or a data set copy in case
+ * all associated object files have been unloaded.
+ */
+static struct gcov_info *get_node_info(struct gcov_node *node)
+{
+	if (node->num_loaded > 0)
+		return node->loaded_info[0];
+
+	return node->unloaded_info;
+}
+
+/*
+ * Return a newly allocated profiling data set which contains the sum of
+ * all profiling data associated with the given node.
+ */
+static struct gcov_info *get_accumulated_info(struct gcov_node *node)
+{
+	struct gcov_info *info;
+	int i = 0;
+
+	if (node->unloaded_info)
+		info = gcov_info_dup(node->unloaded_info);
+	else
+		info = gcov_info_dup(node->loaded_info[i++]);
+	if (!info)
+		return NULL;
+	for (; i < node->num_loaded; i++)
+		gcov_info_add(info, node->loaded_info[i]);
+
+	return info;
+}
+
+/*
+ * open() implementation for gcov data files. Create a copy of the profiling
+ * data set and initialize the iterator and seq_file interface.
+ */
+static int gcov_seq_open(struct inode *inode, struct file *file)
+{
+	struct gcov_node *node = inode->i_private;
+	struct gcov_iterator *iter;
+	struct seq_file *seq;
+	struct gcov_info *info;
+	int rc = -ENOMEM;
+
+	mutex_lock(&node_lock);
+	/*
+	 * Read from a profiling data copy to minimize reference tracking
+	 * complexity and concurrent access and to keep accumulating multiple
+	 * profiling data sets associated with one node simple.
+	 */
+	info = get_accumulated_info(node);
+	if (!info)
+		goto out_unlock;
+	iter = gcov_iter_new(info);
+	if (!iter)
+		goto err_free_info;
+	rc = seq_open(file, &gcov_seq_ops);
+	if (rc)
+		goto err_free_iter_info;
+	seq = file->private_data;
+	seq->private = iter;
+out_unlock:
+	mutex_unlock(&node_lock);
+	return rc;
+
+err_free_iter_info:
+	gcov_iter_free(iter);
+err_free_info:
+	gcov_info_free(info);
+	goto out_unlock;
+}
+
+/*
+ * release() implementation for gcov data files. Release resources allocated
+ * by open().
+ */
+static int gcov_seq_release(struct inode *inode, struct file *file)
+{
+	struct gcov_iterator *iter;
+	struct gcov_info *info;
+	struct seq_file *seq;
+
+	seq = file->private_data;
+	iter = seq->private;
+	info = gcov_iter_get_info(iter);
+	gcov_iter_free(iter);
+	gcov_info_free(info);
+	seq_release(inode, file);
+
+	return 0;
+}
+
+/*
+ * Find a node by the associated data file name. Needs to be called with
+ * node_lock held.
+ */
+static struct gcov_node *get_node_by_name(const char *name)
+{
+	struct gcov_node *node;
+	struct gcov_info *info;
+
+	list_for_each_entry(node, &all_head, all) {
+		info = get_node_info(node);
+		if (info && (strcmp(info->filename, name) == 0))
+			return node;
+	}
+
+	return NULL;
+}
+
+/*
+ * Reset all profiling data associated with the specified node.
+ */
+static void reset_node(struct gcov_node *node)
+{
+	int i;
+
+	if (node->unloaded_info)
+		gcov_info_reset(node->unloaded_info);
+	for (i = 0; i < node->num_loaded; i++)
+		gcov_info_reset(node->loaded_info[i]);
+}
+
+static void remove_node(struct gcov_node *node);
+
+/*
+ * write() implementation for gcov data files. Reset profiling data for the
+ * corresponding file. If all associated object files have been unloaded,
+ * remove the debug fs node as well.
+ */
+static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
+			      size_t len, loff_t *pos)
+{
+	struct seq_file *seq;
+	struct gcov_info *info;
+	struct gcov_node *node;
+
+	seq = file->private_data;
+	info = gcov_iter_get_info(seq->private);
+	mutex_lock(&node_lock);
+	node = get_node_by_name(info->filename);
+	if (node) {
+		/* Reset counts or remove node for unloaded modules. */
+		if (node->num_loaded == 0)
+			remove_node(node);
+		else
+			reset_node(node);
+	}
+	/* Reset counts for open file. */
+	gcov_info_reset(info);
+	mutex_unlock(&node_lock);
+
+	return len;
+}
+
+/*
+ * Given a string <path> representing a file path of format:
+ *   path/to/file.gcda
+ * construct and return a new string:
+ *   <dir/>path/to/file.<ext>
+ */
+static char *link_target(const char *dir, const char *path, const char *ext)
+{
+	char *target;
+	char *old_ext;
+	char *copy;
+
+	copy = kstrdup(path, GFP_KERNEL);
+	if (!copy)
+		return NULL;
+	old_ext = strrchr(copy, '.');
+	if (old_ext)
+		*old_ext = '\0';
+	if (dir)
+		target = kasprintf(GFP_KERNEL, "%s/%s.%s", dir, copy, ext);
+	else
+		target = kasprintf(GFP_KERNEL, "%s.%s", copy, ext);
+	kfree(copy);
+
+	return target;
+}
+
+/*
+ * Construct a string representing the symbolic link target for the given
+ * gcov data file name and link type. Depending on the link type and the
+ * location of the data file, the link target can either point to a
+ * subdirectory of srctree, objtree or in an external location.
+ */
+static char *get_link_target(const char *filename, const struct gcov_link *ext)
+{
+	const char *rel;
+	char *result;
+
+	if (strncmp(filename, objtree, strlen(objtree)) == 0) {
+		rel = filename + strlen(objtree) + 1;
+		if (ext->dir == SRC_TREE)
+			result = link_target(srctree, rel, ext->ext);
+		else
+			result = link_target(objtree, rel, ext->ext);
+	} else {
+		/* External compilation. */
+		result = link_target(NULL, filename, ext->ext);
+	}
+
+	return result;
+}
+
+#define SKEW_PREFIX	".tmp_"
+
+/*
+ * For a filename .tmp_filename.ext return filename.ext. Needed to compensate
+ * for filename skewing caused by the mod-versioning mechanism.
+ */
+static const char *deskew(const char *basename)
+{
+	if (strncmp(basename, SKEW_PREFIX, sizeof(SKEW_PREFIX) - 1) == 0)
+		return basename + sizeof(SKEW_PREFIX) - 1;
+	return basename;
+}
+
+/*
+ * Create links to additional files (usually .c and .gcno files) which the
+ * gcov tool expects to find in the same directory as the gcov data file.
+ */
+static void add_links(struct gcov_node *node, struct dentry *parent)
+{
+	char *basename;
+	char *target;
+	int num;
+	int i;
+
+	for (num = 0; gcov_link[num].ext; num++)
+		/* Nothing. */;
+	node->links = kcalloc(num, sizeof(struct dentry *), GFP_KERNEL);
+	if (!node->links)
+		return;
+	for (i = 0; i < num; i++) {
+		target = get_link_target(get_node_info(node)->filename,
+					 &gcov_link[i]);
+		if (!target)
+			goto out_err;
+		basename = strrchr(target, '/');
+		if (!basename)
+			goto out_err;
+		basename++;
+		node->links[i] = debugfs_create_symlink(deskew(basename),
+							parent,	target);
+		if (!node->links[i])
+			goto out_err;
+		kfree(target);
+	}
+
+	return;
+out_err:
+	kfree(target);
+	while (i-- > 0)
+		debugfs_remove(node->links[i]);
+	kfree(node->links);
+	node->links = NULL;
+}
+
+static const struct file_operations gcov_data_fops = {
+	.open		= gcov_seq_open,
+	.release	= gcov_seq_release,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.write		= gcov_seq_write,
+};
+
+/* Basic initialization of a new node. */
+static void init_node(struct gcov_node *node, struct gcov_info *info,
+		      const char *name, struct gcov_node *parent)
+{
+	INIT_LIST_HEAD(&node->list);
+	INIT_LIST_HEAD(&node->children);
+	INIT_LIST_HEAD(&node->all);
+	if (node->loaded_info) {
+		node->loaded_info[0] = info;
+		node->num_loaded = 1;
+	}
+	node->parent = parent;
+	if (name)
+		strcpy(node->name, name);
+}
+
+/*
+ * Create a new node and associated debugfs entry. Needs to be called with
+ * node_lock held.
+ */
+static struct gcov_node *new_node(struct gcov_node *parent,
+				  struct gcov_info *info, const char *name)
+{
+	struct gcov_node *node;
+
+	node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL);
+	if (!node)
+		goto err_nomem;
+	if (info) {
+		node->loaded_info = kcalloc(1, sizeof(struct gcov_info *),
+					   GFP_KERNEL);
+		if (!node->loaded_info)
+			goto err_nomem;
+	}
+	init_node(node, info, name, parent);
+	/* Differentiate between gcov data file nodes and directory nodes. */
+	if (info) {
+		node->dentry = debugfs_create_file(deskew(node->name), 0600,
+					parent->dentry, node, &gcov_data_fops);
+	} else
+		node->dentry = debugfs_create_dir(node->name, parent->dentry);
+	if (!node->dentry) {
+		pr_warning("could not create file\n");
+		kfree(node);
+		return NULL;
+	}
+	if (info)
+		add_links(node, parent->dentry);
+	list_add(&node->list, &parent->children);
+	list_add(&node->all, &all_head);
+
+	return node;
+
+err_nomem:
+	kfree(node);
+	pr_warning("out of memory\n");
+	return NULL;
+}
+
+/* Remove symbolic links associated with node. */
+static void remove_links(struct gcov_node *node)
+{
+	int i;
+
+	if (!node->links)
+		return;
+	for (i = 0; gcov_link[i].ext; i++)
+		debugfs_remove(node->links[i]);
+	kfree(node->links);
+	node->links = NULL;
+}
+
+/*
+ * Remove node from all lists and debugfs and release associated resources.
+ * Needs to be called with node_lock held.
+ */
+static void release_node(struct gcov_node *node)
+{
+	list_del(&node->list);
+	list_del(&node->all);
+	debugfs_remove(node->dentry);
+	remove_links(node);
+	kfree(node->loaded_info);
+	if (node->unloaded_info)
+		gcov_info_free(node->unloaded_info);
+	kfree(node);
+}
+
+/* Release node and empty parents. Needs to be called with node_lock held. */
+static void remove_node(struct gcov_node *node)
+{
+	struct gcov_node *parent;
+
+	while ((node != &root_node) && list_empty(&node->children)) {
+		parent = node->parent;
+		release_node(node);
+		node = parent;
+	}
+}
+
+/*
+ * Find child node with given basename. Needs to be called with node_lock
+ * held.
+ */
+static struct gcov_node *get_child_by_name(struct gcov_node *parent,
+					   const char *name)
+{
+	struct gcov_node *node;
+
+	list_for_each_entry(node, &parent->children, list) {
+		if (strcmp(node->name, name) == 0)
+			return node;
+	}
+
+	return NULL;
+}
+
+/*
+ * write() implementation for reset file. Reset all profiling data to zero
+ * and remove nodes for which all associated object files are unloaded.
+ */
+static ssize_t reset_write(struct file *file, const char __user *addr,
+			   size_t len, loff_t *pos)
+{
+	struct gcov_node *node;
+
+	mutex_lock(&node_lock);
+restart:
+	list_for_each_entry(node, &all_head, all) {
+		if (node->num_loaded > 0)
+			reset_node(node);
+		else if (list_empty(&node->children)) {
+			remove_node(node);
+			/* Several nodes may have gone - restart loop. */
+			goto restart;
+		}
+	}
+	mutex_unlock(&node_lock);
+
+	return len;
+}
+
+/* read() implementation for reset file. Unused. */
+static ssize_t reset_read(struct file *file, char __user *addr, size_t len,
+			  loff_t *pos)
+{
+	/* Allow read operation so that a recursive copy won't fail. */
+	return 0;
+}
+
+static const struct file_operations gcov_reset_fops = {
+	.write	= reset_write,
+	.read	= reset_read,
+};
+
+/*
+ * Create a node for a given profiling data set and add it to all lists and
+ * debugfs. Needs to be called with node_lock held.
+ */
+static void add_node(struct gcov_info *info)
+{
+	char *filename;
+	char *curr;
+	char *next;
+	struct gcov_node *parent;
+	struct gcov_node *node;
+
+	filename = kstrdup(info->filename, GFP_KERNEL);
+	if (!filename)
+		return;
+	parent = &root_node;
+	/* Create directory nodes along the path. */
+	for (curr = filename; (next = strchr(curr, '/')); curr = next + 1) {
+		if (curr == next)
+			continue;
+		*next = 0;
+		if (strcmp(curr, ".") == 0)
+			continue;
+		if (strcmp(curr, "..") == 0) {
+			if (!parent->parent)
+				goto err_remove;
+			parent = parent->parent;
+			continue;
+		}
+		node = get_child_by_name(parent, curr);
+		if (!node) {
+			node = new_node(parent, NULL, curr);
+			if (!node)
+				goto err_remove;
+		}
+		parent = node;
+	}
+	/* Create file node. */
+	node = new_node(parent, info, curr);
+	if (!node)
+		goto err_remove;
+out:
+	kfree(filename);
+	return;
+
+err_remove:
+	remove_node(parent);
+	goto out;
+}
+
+/*
+ * Associate a profiling data set with an existing node. Needs to be called
+ * with node_lock held.
+ */
+static void add_info(struct gcov_node *node, struct gcov_info *info)
+{
+	struct gcov_info **loaded_info;
+	int num = node->num_loaded;
+
+	/*
+	 * Prepare new array. This is done first to simplify cleanup in
+	 * case the new data set is incompatible, the node only contains
+	 * unloaded data sets and there's not enough memory for the array.
+	 */
+	loaded_info = kcalloc(num + 1, sizeof(struct gcov_info *), GFP_KERNEL);
+	if (!loaded_info) {
+		pr_warning("could not add '%s' (out of memory)\n",
+			   info->filename);
+		return;
+	}
+	memcpy(loaded_info, node->loaded_info,
+	       num * sizeof(struct gcov_info *));
+	loaded_info[num] = info;
+	/* Check if the new data set is compatible. */
+	if (num == 0) {
+		/*
+		 * A module was unloaded, modified and reloaded. The new
+		 * data set replaces the copy of the last one.
+		 */
+		if (!gcov_info_is_compatible(node->unloaded_info, info)) {
+			pr_warning("discarding saved data for %s "
+				   "(incompatible version)\n", info->filename);
+			gcov_info_free(node->unloaded_info);
+			node->unloaded_info = NULL;
+		}
+	} else {
+		/*
+		 * Two different versions of the same object file are loaded.
+		 * The initial one takes precedence.
+		 */
+		if (!gcov_info_is_compatible(node->loaded_info[0], info)) {
+			pr_warning("could not add '%s' (incompatible "
+				   "version)\n", info->filename);
+			kfree(loaded_info);
+			return;
+		}
+	}
+	/* Overwrite previous array. */
+	kfree(node->loaded_info);
+	node->loaded_info = loaded_info;
+	node->num_loaded = num + 1;
+}
+
+/*
+ * Return the index of a profiling data set associated with a node.
+ */
+static int get_info_index(struct gcov_node *node, struct gcov_info *info)
+{
+	int i;
+
+	for (i = 0; i < node->num_loaded; i++) {
+		if (node->loaded_info[i] == info)
+			return i;
+	}
+	return -ENOENT;
+}
+
+/*
+ * Save the data of a profiling data set which is being unloaded.
+ */
+static void save_info(struct gcov_node *node, struct gcov_info *info)
+{
+	if (node->unloaded_info)
+		gcov_info_add(node->unloaded_info, info);
+	else {
+		node->unloaded_info = gcov_info_dup(info);
+		if (!node->unloaded_info) {
+			pr_warning("could not save data for '%s' "
+				   "(out of memory)\n", info->filename);
+		}
+	}
+}
+
+/*
+ * Disassociate a profiling data set from a node. Needs to be called with
+ * node_lock held.
+ */
+static void remove_info(struct gcov_node *node, struct gcov_info *info)
+{
+	int i;
+
+	i = get_info_index(node, info);
+	if (i < 0) {
+		pr_warning("could not remove '%s' (not found)\n",
+			   info->filename);
+		return;
+	}
+	if (gcov_persist)
+		save_info(node, info);
+	/* Shrink array. */
+	node->loaded_info[i] = node->loaded_info[node->num_loaded - 1];
+	node->num_loaded--;
+	if (node->num_loaded > 0)
+		return;
+	/* Last loaded data set was removed. */
+	kfree(node->loaded_info);
+	node->loaded_info = NULL;
+	node->num_loaded = 0;
+	if (!node->unloaded_info)
+		remove_node(node);
+}
+
+/*
+ * Callback to create/remove profiling files when code compiled with
+ * -fprofile-arcs is loaded/unloaded.
+ */
+void gcov_event(enum gcov_action action, struct gcov_info *info)
+{
+	struct gcov_node *node;
+
+	mutex_lock(&node_lock);
+	node = get_node_by_name(info->filename);
+	switch (action) {
+	case GCOV_ADD:
+		if (node)
+			add_info(node, info);
+		else
+			add_node(info);
+		break;
+	case GCOV_REMOVE:
+		if (node)
+			remove_info(node, info);
+		else {
+			pr_warning("could not remove '%s' (not found)\n",
+				   info->filename);
+		}
+		break;
+	}
+	mutex_unlock(&node_lock);
+}
+
+/* Create debugfs entries. */
+static __init int gcov_fs_init(void)
+{
+	int rc = -EIO;
+
+	init_node(&root_node, NULL, NULL, NULL);
+	/*
+	 * /sys/kernel/debug/gcov will be parent for the reset control file
+	 * and all profiling files.
+	 */
+	root_node.dentry = debugfs_create_dir("gcov", NULL);
+	if (!root_node.dentry)
+		goto err_remove;
+	/*
+	 * Create reset file which resets all profiling counts when written
+	 * to.
+	 */
+	reset_dentry = debugfs_create_file("reset", 0600, root_node.dentry,
+					   NULL, &gcov_reset_fops);
+	if (!reset_dentry)
+		goto err_remove;
+	/* Replay previous events to get our fs hierarchy up-to-date. */
+	gcov_enable_events();
+	return 0;
+
+err_remove:
+	pr_err("init failed\n");
+	if (root_node.dentry)
+		debugfs_remove(root_node.dentry);
+
+	return rc;
+}
+device_initcall(gcov_fs_init);
--- a/kernel/kernel/gcov/gcc_3_4.c
+++ b/kernel/kernel/gcov/gcc_3_4.c
@@ -0,0 +1,447 @@
+/*
+ *  This code provides functions to handle gcc's profiling data format
+ *  introduced with gcc 3.4. Future versions of gcc may change the gcov
+ *  format (as happened before), so all format-specific information needs
+ *  to be kept modular and easily exchangeable.
+ *
+ *  This file is based on gcc-internal definitions. Functions and data
+ *  structures are defined to be compatible with gcc counterparts.
+ *  For a better understanding, refer to gcc source: gcc/gcov-io.h.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include "gcov.h"
+
+/* Symbolic links to be created for each profiling data file. */
+const struct gcov_link gcov_link[] = {
+	{ OBJ_TREE, "gcno" },	/* Link to .gcno file in $(objtree). */
+	{ 0, NULL},
+};
+
+/*
+ * Determine whether a counter is active. Based on gcc magic. Doesn't change
+ * at run-time.
+ */
+static int counter_active(struct gcov_info *info, unsigned int type)
+{
+	return (1 << type) & info->ctr_mask;
+}
+
+/* Determine number of active counters. Based on gcc magic. */
+static unsigned int num_counter_active(struct gcov_info *info)
+{
+	unsigned int i;
+	unsigned int result = 0;
+
+	for (i = 0; i < GCOV_COUNTERS; i++) {
+		if (counter_active(info, i))
+			result++;
+	}
+	return result;
+}
+
+/**
+ * gcov_info_reset - reset profiling data to zero
+ * @info: profiling data set
+ */
+void gcov_info_reset(struct gcov_info *info)
+{
+	unsigned int active = num_counter_active(info);
+	unsigned int i;
+
+	for (i = 0; i < active; i++) {
+		memset(info->counts[i].values, 0,
+		       info->counts[i].num * sizeof(gcov_type));
+	}
+}
+
+/**
+ * gcov_info_is_compatible - check if profiling data can be added
+ * @info1: first profiling data set
+ * @info2: second profiling data set
+ *
+ * Returns non-zero if profiling data can be added, zero otherwise.
+ */
+int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2)
+{
+	return (info1->stamp == info2->stamp);
+}
+
+/**
+ * gcov_info_add - add up profiling data
+ * @dest: profiling data set to which data is added
+ * @source: profiling data set which is added
+ *
+ * Adds profiling counts of @source to @dest.
+ */
+void gcov_info_add(struct gcov_info *dest, struct gcov_info *source)
+{
+	unsigned int i;
+	unsigned int j;
+
+	for (i = 0; i < num_counter_active(dest); i++) {
+		for (j = 0; j < dest->counts[i].num; j++) {
+			dest->counts[i].values[j] +=
+				source->counts[i].values[j];
+		}
+	}
+}
+
+/* Get size of function info entry. Based on gcc magic. */
+static size_t get_fn_size(struct gcov_info *info)
+{
+	size_t size;
+
+	size = sizeof(struct gcov_fn_info) + num_counter_active(info) *
+	       sizeof(unsigned int);
+	if (__alignof__(struct gcov_fn_info) > sizeof(unsigned int))
+		size = ALIGN(size, __alignof__(struct gcov_fn_info));
+	return size;
+}
+
+/* Get address of function info entry. Based on gcc magic. */
+static struct gcov_fn_info *get_fn_info(struct gcov_info *info, unsigned int fn)
+{
+	return (struct gcov_fn_info *)
+		((char *) info->functions + fn * get_fn_size(info));
+}
+
+/**
+ * gcov_info_dup - duplicate profiling data set
+ * @info: profiling data set to duplicate
+ *
+ * Return newly allocated duplicate on success, %NULL on error.
+ */
+struct gcov_info *gcov_info_dup(struct gcov_info *info)
+{
+	struct gcov_info *dup;
+	unsigned int i;
+	unsigned int active;
+
+	/* Duplicate gcov_info. */
+	active = num_counter_active(info);
+	dup = kzalloc(sizeof(struct gcov_info) +
+		      sizeof(struct gcov_ctr_info) * active, GFP_KERNEL);
+	if (!dup)
+		return NULL;
+	dup->version		= info->version;
+	dup->stamp		= info->stamp;
+	dup->n_functions	= info->n_functions;
+	dup->ctr_mask		= info->ctr_mask;
+	/* Duplicate filename. */
+	dup->filename		= kstrdup(info->filename, GFP_KERNEL);
+	if (!dup->filename)
+		goto err_free;
+	/* Duplicate table of functions. */
+	dup->functions = kmemdup(info->functions, info->n_functions *
+				 get_fn_size(info), GFP_KERNEL);
+	if (!dup->functions)
+		goto err_free;
+	/* Duplicate counter arrays. */
+	for (i = 0; i < active ; i++) {
+		struct gcov_ctr_info *ctr = &info->counts[i];
+		size_t size = ctr->num * sizeof(gcov_type);
+
+		dup->counts[i].num = ctr->num;
+		dup->counts[i].merge = ctr->merge;
+		dup->counts[i].values = vmalloc(size);
+		if (!dup->counts[i].values)
+			goto err_free;
+		memcpy(dup->counts[i].values, ctr->values, size);
+	}
+	return dup;
+
+err_free:
+	gcov_info_free(dup);
+	return NULL;
+}
+
+/**
+ * gcov_info_free - release memory for profiling data set duplicate
+ * @info: profiling data set duplicate to free
+ */
+void gcov_info_free(struct gcov_info *info)
+{
+	unsigned int active = num_counter_active(info);
+	unsigned int i;
+
+	for (i = 0; i < active ; i++)
+		vfree(info->counts[i].values);
+	kfree(info->functions);
+	kfree(info->filename);
+	kfree(info);
+}
+
+/**
+ * struct type_info - iterator helper array
+ * @ctr_type: counter type
+ * @offset: index of the first value of the current function for this type
+ *
+ * This array is needed to convert the in-memory data format into the in-file
+ * data format:
+ *
+ * In-memory:
+ *   for each counter type
+ *     for each function
+ *       values
+ *
+ * In-file:
+ *   for each function
+ *     for each counter type
+ *       values
+ *
+ * See gcc source gcc/gcov-io.h for more information on data organization.
+ */
+struct type_info {
+	int ctr_type;
+	unsigned int offset;
+};
+
+/**
+ * struct gcov_iterator - specifies current file position in logical records
+ * @info: associated profiling data
+ * @record: record type
+ * @function: function number
+ * @type: counter type
+ * @count: index into values array
+ * @num_types: number of counter types
+ * @type_info: helper array to get values-array offset for current function
+ */
+struct gcov_iterator {
+	struct gcov_info *info;
+
+	int record;
+	unsigned int function;
+	unsigned int type;
+	unsigned int count;
+
+	int num_types;
+	struct type_info type_info[0];
+};
+
+static struct gcov_fn_info *get_func(struct gcov_iterator *iter)
+{
+	return get_fn_info(iter->info, iter->function);
+}
+
+static struct type_info *get_type(struct gcov_iterator *iter)
+{
+	return &iter->type_info[iter->type];
+}
+
+/**
+ * gcov_iter_new - allocate and initialize profiling data iterator
+ * @info: profiling data set to be iterated
+ *
+ * Return file iterator on success, %NULL otherwise.
+ */
+struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
+{
+	struct gcov_iterator *iter;
+
+	iter = kzalloc(sizeof(struct gcov_iterator) +
+		       num_counter_active(info) * sizeof(struct type_info),
+		       GFP_KERNEL);
+	if (iter)
+		iter->info = info;
+
+	return iter;
+}
+
+/**
+ * gcov_iter_free - release memory for iterator
+ * @iter: file iterator to free
+ */
+void gcov_iter_free(struct gcov_iterator *iter)
+{
+	kfree(iter);
+}
+
+/**
+ * gcov_iter_get_info - return profiling data set for given file iterator
+ * @iter: file iterator
+ */
+struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter)
+{
+	return iter->info;
+}
+
+/**
+ * gcov_iter_start - reset file iterator to starting position
+ * @iter: file iterator
+ */
+void gcov_iter_start(struct gcov_iterator *iter)
+{
+	int i;
+
+	iter->record = 0;
+	iter->function = 0;
+	iter->type = 0;
+	iter->count = 0;
+	iter->num_types = 0;
+	for (i = 0; i < GCOV_COUNTERS; i++) {
+		if (counter_active(iter->info, i)) {
+			iter->type_info[iter->num_types].ctr_type = i;
+			iter->type_info[iter->num_types++].offset = 0;
+		}
+	}
+}
+
+/* Mapping of logical record number to actual file content. */
+#define RECORD_FILE_MAGIC	0
+#define RECORD_GCOV_VERSION	1
+#define RECORD_TIME_STAMP	2
+#define RECORD_FUNCTION_TAG	3
+#define RECORD_FUNCTON_TAG_LEN	4
+#define RECORD_FUNCTION_IDENT	5
+#define RECORD_FUNCTION_CHECK	6
+#define RECORD_COUNT_TAG	7
+#define RECORD_COUNT_LEN	8
+#define RECORD_COUNT		9
+
+/**
+ * gcov_iter_next - advance file iterator to next logical record
+ * @iter: file iterator
+ *
+ * Return zero if new position is valid, non-zero if iterator has reached end.
+ */
+int gcov_iter_next(struct gcov_iterator *iter)
+{
+	switch (iter->record) {
+	case RECORD_FILE_MAGIC:
+	case RECORD_GCOV_VERSION:
+	case RECORD_FUNCTION_TAG:
+	case RECORD_FUNCTON_TAG_LEN:
+	case RECORD_FUNCTION_IDENT:
+	case RECORD_COUNT_TAG:
+		/* Advance to next record */
+		iter->record++;
+		break;
+	case RECORD_COUNT:
+		/* Advance to next count */
+		iter->count++;
+		/* fall through */
+	case RECORD_COUNT_LEN:
+		if (iter->count < get_func(iter)->n_ctrs[iter->type]) {
+			iter->record = 9;
+			break;
+		}
+		/* Advance to next counter type */
+		get_type(iter)->offset += iter->count;
+		iter->count = 0;
+		iter->type++;
+		/* fall through */
+	case RECORD_FUNCTION_CHECK:
+		if (iter->type < iter->num_types) {
+			iter->record = 7;
+			break;
+		}
+		/* Advance to next function */
+		iter->type = 0;
+		iter->function++;
+		/* fall through */
+	case RECORD_TIME_STAMP:
+		if (iter->function < iter->info->n_functions)
+			iter->record = 3;
+		else
+			iter->record = -1;
+		break;
+	}
+	/* Check for EOF. */
+	if (iter->record == -1)
+		return -EINVAL;
+	else
+		return 0;
+}
+
+/**
+ * seq_write_gcov_u32 - write 32 bit number in gcov format to seq_file
+ * @seq: seq_file handle
+ * @v: value to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file.
+ */
+static int seq_write_gcov_u32(struct seq_file *seq, u32 v)
+{
+	return seq_write(seq, &v, sizeof(v));
+}
+
+/**
+ * seq_write_gcov_u64 - write 64 bit number in gcov format to seq_file
+ * @seq: seq_file handle
+ * @v: value to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file. 64 bit numbers are stored as two 32 bit numbers, the low part
+ * first.
+ */
+static int seq_write_gcov_u64(struct seq_file *seq, u64 v)
+{
+	u32 data[2];
+
+	data[0] = (v & 0xffffffffUL);
+	data[1] = (v >> 32);
+	return seq_write(seq, data, sizeof(data));
+}
+
+/**
+ * gcov_iter_write - write data for current pos to seq_file
+ * @iter: file iterator
+ * @seq: seq_file handle
+ *
+ * Return zero on success, non-zero otherwise.
+ */
+int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
+{
+	int rc = -EINVAL;
+
+	switch (iter->record) {
+	case RECORD_FILE_MAGIC:
+		rc = seq_write_gcov_u32(seq, GCOV_DATA_MAGIC);
+		break;
+	case RECORD_GCOV_VERSION:
+		rc = seq_write_gcov_u32(seq, iter->info->version);
+		break;
+	case RECORD_TIME_STAMP:
+		rc = seq_write_gcov_u32(seq, iter->info->stamp);
+		break;
+	case RECORD_FUNCTION_TAG:
+		rc = seq_write_gcov_u32(seq, GCOV_TAG_FUNCTION);
+		break;
+	case RECORD_FUNCTON_TAG_LEN:
+		rc = seq_write_gcov_u32(seq, 2);
+		break;
+	case RECORD_FUNCTION_IDENT:
+		rc = seq_write_gcov_u32(seq, get_func(iter)->ident);
+		break;
+	case RECORD_FUNCTION_CHECK:
+		rc = seq_write_gcov_u32(seq, get_func(iter)->checksum);
+		break;
+	case RECORD_COUNT_TAG:
+		rc = seq_write_gcov_u32(seq,
+			GCOV_TAG_FOR_COUNTER(get_type(iter)->ctr_type));
+		break;
+	case RECORD_COUNT_LEN:
+		rc = seq_write_gcov_u32(seq,
+				get_func(iter)->n_ctrs[iter->type] * 2);
+		break;
+	case RECORD_COUNT:
+		rc = seq_write_gcov_u64(seq,
+			iter->info->counts[iter->type].
+				values[iter->count + get_type(iter)->offset]);
+		break;
+	}
+	return rc;
+}
--- a/kernel/kernel/gcov/gcov.h
+++ b/kernel/kernel/gcov/gcov.h
@@ -0,0 +1,128 @@
+/*
+ *  Profiling infrastructure declarations.
+ *
+ *  This file is based on gcc-internal definitions. Data structures are
+ *  defined to be compatible with gcc counterparts. For a better
+ *  understanding, refer to gcc source: gcc/gcov-io.h.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ */
+
+#ifndef GCOV_H
+#define GCOV_H GCOV_H
+
+#include <linux/types.h>
+
+/*
+ * Profiling data types used for gcc 3.4 and above - these are defined by
+ * gcc and need to be kept as close to the original definition as possible to
+ * remain compatible.
+ */
+#define GCOV_COUNTERS		5
+#define GCOV_DATA_MAGIC		((unsigned int) 0x67636461)
+#define GCOV_TAG_FUNCTION	((unsigned int) 0x01000000)
+#define GCOV_TAG_COUNTER_BASE	((unsigned int) 0x01a10000)
+#define GCOV_TAG_FOR_COUNTER(count)					\
+	(GCOV_TAG_COUNTER_BASE + ((unsigned int) (count) << 17))
+
+#if BITS_PER_LONG >= 64
+typedef long gcov_type;
+#else
+typedef long long gcov_type;
+#endif
+
+/**
+ * struct gcov_fn_info - profiling meta data per function
+ * @ident: object file-unique function identifier
+ * @checksum: function checksum
+ * @n_ctrs: number of values per counter type belonging to this function
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time.
+ */
+struct gcov_fn_info {
+	unsigned int ident;
+	unsigned int checksum;
+	unsigned int n_ctrs[0];
+};
+
+/**
+ * struct gcov_ctr_info - profiling data per counter type
+ * @num: number of counter values for this type
+ * @values: array of counter values for this type
+ * @merge: merge function for counter values of this type (unused)
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time with the exception of the values array.
+ */
+struct gcov_ctr_info {
+	unsigned int	num;
+	gcov_type	*values;
+	void		(*merge)(gcov_type *, unsigned int);
+};
+
+/**
+ * struct gcov_info - profiling data per object file
+ * @version: gcov version magic indicating the gcc version used for compilation
+ * @next: list head for a singly-linked list
+ * @stamp: time stamp
+ * @filename: name of the associated gcov data file
+ * @n_functions: number of instrumented functions
+ * @functions: function data
+ * @ctr_mask: mask specifying which counter types are active
+ * @counts: counter data per counter type
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time with the exception of the next pointer.
+ */
+struct gcov_info {
+	unsigned int			version;
+	struct gcov_info		*next;
+	unsigned int			stamp;
+	const char			*filename;
+	unsigned int			n_functions;
+	const struct gcov_fn_info	*functions;
+	unsigned int			ctr_mask;
+	struct gcov_ctr_info		counts[0];
+};
+
+/* Base interface. */
+enum gcov_action {
+	GCOV_ADD,
+	GCOV_REMOVE,
+};
+
+void gcov_event(enum gcov_action action, struct gcov_info *info);
+void gcov_enable_events(void);
+
+/* Iterator control. */
+struct seq_file;
+struct gcov_iterator;
+
+struct gcov_iterator *gcov_iter_new(struct gcov_info *info);
+void gcov_iter_free(struct gcov_iterator *iter);
+void gcov_iter_start(struct gcov_iterator *iter);
+int gcov_iter_next(struct gcov_iterator *iter);
+int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq);
+struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter);
+
+/* gcov_info control. */
+void gcov_info_reset(struct gcov_info *info);
+int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2);
+void gcov_info_add(struct gcov_info *dest, struct gcov_info *source);
+struct gcov_info *gcov_info_dup(struct gcov_info *info);
+void gcov_info_free(struct gcov_info *info);
+
+struct gcov_link {
+	enum {
+		OBJ_TREE,
+		SRC_TREE,
+	} dir;
+	const char *ext;
+};
+extern const struct gcov_link gcov_link[];
+
+#endif /* GCOV_H */
--- a/kernel/kernel/groups.c
+++ b/kernel/kernel/groups.c
@@ -0,0 +1,287 @@
+/*
+ * Supplementary group IDs
+ */
+#include <linux/cred.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+
+/* init to 2 - one for init_task, one to ensure it is never freed */
+struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
+
+struct group_info *groups_alloc(int gidsetsize)
+{
+	struct group_info *group_info;
+	int nblocks;
+	int i;
+
+	nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
+	/* Make sure we always allocate at least one indirect block pointer */
+	nblocks = nblocks ? : 1;
+	group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER);
+	if (!group_info)
+		return NULL;
+	group_info->ngroups = gidsetsize;
+	group_info->nblocks = nblocks;
+	atomic_set(&group_info->usage, 1);
+
+	if (gidsetsize <= NGROUPS_SMALL)
+		group_info->blocks[0] = group_info->small_block;
+	else {
+		for (i = 0; i < nblocks; i++) {
+			gid_t *b;
+			b = (void *)__get_free_page(GFP_USER);
+			if (!b)
+				goto out_undo_partial_alloc;
+			group_info->blocks[i] = b;
+		}
+	}
+	return group_info;
+
+out_undo_partial_alloc:
+	while (--i >= 0) {
+		free_page((unsigned long)group_info->blocks[i]);
+	}
+	kfree(group_info);
+	return NULL;
+}
+
+EXPORT_SYMBOL(groups_alloc);
+
+void groups_free(struct group_info *group_info)
+{
+	if (group_info->blocks[0] != group_info->small_block) {
+		int i;
+		for (i = 0; i < group_info->nblocks; i++)
+			free_page((unsigned long)group_info->blocks[i]);
+	}
+	kfree(group_info);
+}
+
+EXPORT_SYMBOL(groups_free);
+
+/* export the group_info to a user-space array */
+static int groups_to_user(gid_t __user *grouplist,
+			  const struct group_info *group_info)
+{
+	int i;
+	unsigned int count = group_info->ngroups;
+
+	for (i = 0; i < group_info->nblocks; i++) {
+		unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
+		unsigned int len = cp_count * sizeof(*grouplist);
+
+		if (copy_to_user(grouplist, group_info->blocks[i], len))
+			return -EFAULT;
+
+		grouplist += NGROUPS_PER_BLOCK;
+		count -= cp_count;
+	}
+	return 0;
+}
+
+/* fill a group_info from a user-space array - it must be allocated already */
+static int groups_from_user(struct group_info *group_info,
+    gid_t __user *grouplist)
+{
+	int i;
+	unsigned int count = group_info->ngroups;
+
+	for (i = 0; i < group_info->nblocks; i++) {
+		unsigned int cp_count = min(NGROUPS_PER_BLOCK, count);
+		unsigned int len = cp_count * sizeof(*grouplist);
+
+		if (copy_from_user(group_info->blocks[i], grouplist, len))
+			return -EFAULT;
+
+		grouplist += NGROUPS_PER_BLOCK;
+		count -= cp_count;
+	}
+	return 0;
+}
+
+/* a simple Shell sort */
+static void groups_sort(struct group_info *group_info)
+{
+	int base, max, stride;
+	int gidsetsize = group_info->ngroups;
+
+	for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
+		; /* nothing */
+	stride /= 3;
+
+	while (stride) {
+		max = gidsetsize - stride;
+		for (base = 0; base < max; base++) {
+			int left = base;
+			int right = left + stride;
+			gid_t tmp = GROUP_AT(group_info, right);
+
+			while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
+				GROUP_AT(group_info, right) =
+				    GROUP_AT(group_info, left);
+				right = left;
+				left -= stride;
+			}
+			GROUP_AT(group_info, right) = tmp;
+		}
+		stride /= 3;
+	}
+}
+
+/* a simple bsearch */
+int groups_search(const struct group_info *group_info, gid_t grp)
+{
+	unsigned int left, right;
+
+	if (!group_info)
+		return 0;
+
+	left = 0;
+	right = group_info->ngroups;
+	while (left < right) {
+		unsigned int mid = (left+right)/2;
+		if (grp > GROUP_AT(group_info, mid))
+			left = mid + 1;
+		else if (grp < GROUP_AT(group_info, mid))
+			right = mid;
+		else
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * set_groups - Change a group subscription in a set of credentials
+ * @new: The newly prepared set of credentials to alter
+ * @group_info: The group list to install
+ *
+ * Validate a group subscription and, if valid, insert it into a set
+ * of credentials.
+ */
+int set_groups(struct cred *new, struct group_info *group_info)
+{
+	int retval;
+
+	retval = security_task_setgroups(group_info);
+	if (retval)
+		return retval;
+
+	put_group_info(new->group_info);
+	groups_sort(group_info);
+	get_group_info(group_info);
+	new->group_info = group_info;
+	return 0;
+}
+
+EXPORT_SYMBOL(set_groups);
+
+/**
+ * set_current_groups - Change current's group subscription
+ * @group_info: The group list to impose
+ *
+ * Validate a group subscription and, if valid, impose it upon current's task
+ * security record.
+ */
+int set_current_groups(struct group_info *group_info)
+{
+	struct cred *new;
+	int ret;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = set_groups(new, group_info);
+	if (ret < 0) {
+		abort_creds(new);
+		return ret;
+	}
+
+	return commit_creds(new);
+}
+
+EXPORT_SYMBOL(set_current_groups);
+
+SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist)
+{
+	const struct cred *cred = current_cred();
+	int i;
+
+	if (gidsetsize < 0)
+		return -EINVAL;
+
+	/* no need to grab task_lock here; it cannot change */
+	i = cred->group_info->ngroups;
+	if (gidsetsize) {
+		if (i > gidsetsize) {
+			i = -EINVAL;
+			goto out;
+		}
+		if (groups_to_user(grouplist, cred->group_info)) {
+			i = -EFAULT;
+			goto out;
+		}
+	}
+out:
+	return i;
+}
+
+/*
+ *	SMP: Our groups are copy-on-write. We can set them safely
+ *	without another task interfering.
+ */
+
+SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
+{
+	struct group_info *group_info;
+	int retval;
+
+	if (!capable(CAP_SETGID))
+		return -EPERM;
+	if ((unsigned)gidsetsize > NGROUPS_MAX)
+		return -EINVAL;
+
+	group_info = groups_alloc(gidsetsize);
+	if (!group_info)
+		return -ENOMEM;
+	retval = groups_from_user(group_info, grouplist);
+	if (retval) {
+		put_group_info(group_info);
+		return retval;
+	}
+
+	retval = set_current_groups(group_info);
+	put_group_info(group_info);
+
+	return retval;
+}
+
+/*
+ * Check whether we're fsgid/egid or in the supplemental group..
+ */
+int in_group_p(gid_t grp)
+{
+	const struct cred *cred = current_cred();
+	int retval = 1;
+
+	if (grp != cred->fsgid)
+		retval = groups_search(cred->group_info, grp);
+	return retval;
+}
+
+EXPORT_SYMBOL(in_group_p);
+
+int in_egroup_p(gid_t grp)
+{
+	const struct cred *cred = current_cred();
+	int retval = 1;
+
+	if (grp != cred->egid)
+		retval = groups_search(cred->group_info, grp);
+	return retval;
+}
+
+EXPORT_SYMBOL(in_egroup_p);
--- a/kernel/kernel/hrtimer.c
+++ b/kernel/kernel/hrtimer.c
--- a/kernel/kernel/hung_task.c
+++ b/kernel/kernel/hung_task.c
@@ -0,0 +1,217 @@
+/*
+ * Detect Hung Task
+ *
+ * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/lockdep.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+
+/*
+ * The number of tasks checked:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
+
+/*
+ * Limit number of tasks checked in a batch.
+ *
+ * This value controls the preemptibility of khungtaskd since preemption
+ * is disabled during the critical section. It also controls the size of
+ * the RCU grace period. So it needs to be upper-bound.
+ */
+#define HUNG_TASK_BATCHING 1024
+
+/*
+ * Zero means infinite timeout - no checking done:
+ */
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+
+unsigned long __read_mostly sysctl_hung_task_warnings = 10;
+
+static int __read_mostly did_panic;
+
+static struct task_struct *watchdog_task;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * hung task is detected:
+ */
+unsigned int __read_mostly sysctl_hung_task_panic =
+				CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
+
+static int __init hung_task_panic_setup(char *str)
+{
+	sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
+
+	return 1;
+}
+__setup("hung_task_panic=", hung_task_panic_setup);
+
+static int
+hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	did_panic = 1;
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block panic_block = {
+	.notifier_call = hung_task_panic,
+};
+
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
+{
+	unsigned long switch_count = t->nvcsw + t->nivcsw;
+
+	/*
+	 * Ensure the task is not frozen.
+	 * Also, when a freshly created task is scheduled once, changes
+	 * its state to TASK_UNINTERRUPTIBLE without having ever been
+	 * switched out once, it musn't be checked.
+	 */
+	if (unlikely(t->flags & PF_FROZEN || !switch_count))
+		return;
+
+	if (switch_count != t->last_switch_count) {
+		t->last_switch_count = switch_count;
+		return;
+	}
+	if (!sysctl_hung_task_warnings)
+		return;
+	sysctl_hung_task_warnings--;
+
+	/*
+	 * Ok, the task did not get scheduled for more than 2 minutes,
+	 * complain:
+	 */
+	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
+			"%ld seconds.\n", t->comm, t->pid, timeout);
+	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+			" disables this message.\n");
+	sched_show_task(t);
+	__debug_show_held_locks(t);
+
+	touch_nmi_watchdog();
+
+	if (sysctl_hung_task_panic)
+		panic("hung_task: blocked tasks");
+}
+
+/*
+ * To avoid extending the RCU grace period for an unbounded amount of time,
+ * periodically exit the critical section and enter a new one.
+ *
+ * For preemptible RCU it is sufficient to call rcu_read_unlock in order
+ * exit the grace period. For classic RCU, a reschedule is required.
+ */
+static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+{
+	get_task_struct(g);
+	get_task_struct(t);
+	rcu_read_unlock();
+	cond_resched();
+	rcu_read_lock();
+	put_task_struct(t);
+	put_task_struct(g);
+}
+
+/*
+ * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
+ * a really long time (120 seconds). If that happens, print out
+ * a warning.
+ */
+static void check_hung_uninterruptible_tasks(unsigned long timeout)
+{
+	int max_count = sysctl_hung_task_check_count;
+	int batch_count = HUNG_TASK_BATCHING;
+	struct task_struct *g, *t;
+
+	/*
+	 * If the system crashed already then all bets are off,
+	 * do not report extra hung tasks:
+	 */
+	if (test_taint(TAINT_DIE) || did_panic)
+		return;
+
+	rcu_read_lock();
+	do_each_thread(g, t) {
+		if (!--max_count)
+			goto unlock;
+		if (!--batch_count) {
+			batch_count = HUNG_TASK_BATCHING;
+			rcu_lock_break(g, t);
+			/* Exit if t or g was unhashed during refresh. */
+			if (t->state == TASK_DEAD || g->state == TASK_DEAD)
+				goto unlock;
+		}
+		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
+		if (t->state == TASK_UNINTERRUPTIBLE)
+			check_hung_task(t, timeout);
+	} while_each_thread(g, t);
+ unlock:
+	rcu_read_unlock();
+}
+
+static unsigned long timeout_jiffies(unsigned long timeout)
+{
+	/* timeout of 0 will disable the watchdog */
+	return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+}
+
+/*
+ * Process updating of timeout sysctl
+ */
+int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+				  void __user *buffer,
+				  size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (ret || !write)
+		goto out;
+
+	wake_up_process(watchdog_task);
+
+ out:
+	return ret;
+}
+
+/*
+ * kthread which checks for tasks stuck in D state
+ */
+static int watchdog(void *dummy)
+{
+	set_user_nice(current, 0);
+
+	for ( ; ; ) {
+		unsigned long timeout = sysctl_hung_task_timeout_secs;
+
+		while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
+			timeout = sysctl_hung_task_timeout_secs;
+
+		check_hung_uninterruptible_tasks(timeout);
+	}
+
+	return 0;
+}
+
+static int __init hung_task_init(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+
+	return 0;
+}
+
+module_init(hung_task_init);
--- a/kernel/kernel/irq/Makefile
+++ b/kernel/kernel/irq/Makefile
@@ -0,0 +1,7 @@
+
+obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
+obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
+obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
+obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o
+obj-$(CONFIG_PM_SLEEP) += pm.o
--- a/kernel/kernel/irq/autoprobe.c
+++ b/kernel/kernel/irq/autoprobe.c
@@ -0,0 +1,196 @@
+/*
+ * linux/kernel/irq/autoprobe.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the interrupt probing code and driver APIs.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/async.h>
+
+#include "internals.h"
+
+/*
+ * Autodetection depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck with
+ * "IRQ_WAITING" cleared and the interrupt disabled.
+ */
+static DEFINE_MUTEX(probing_active);
+
+/**
+ *	probe_irq_on	- begin an interrupt autodetect
+ *
+ *	Commence probing for an interrupt. The interrupts are scanned
+ *	and a mask of potential interrupt lines is returned.
+ *
+ */
+unsigned long probe_irq_on(void)
+{
+	struct irq_desc *desc;
+	unsigned long mask = 0;
+	unsigned int status;
+	int i;
+
+	/*
+	 * quiesce the kernel, or at least the asynchronous portion
+	 */
+	async_synchronize_full();
+	mutex_lock(&probing_active);
+	/*
+	 * something may have generated an irq long ago and we want to
+	 * flush such a longstanding irq before considering it as spurious.
+	 */
+	for_each_irq_desc_reverse(i, desc) {
+		spin_lock_irq(&desc->lock);
+		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
+			/*
+			 * An old-style architecture might still have
+			 * the handle_bad_irq handler there:
+			 */
+			compat_irq_chip_set_default_handler(desc);
+
+			/*
+			 * Some chips need to know about probing in
+			 * progress:
+			 */
+			if (desc->chip->set_type)
+				desc->chip->set_type(i, IRQ_TYPE_PROBE);
+			desc->chip->startup(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	/* Wait for longstanding interrupts to trigger. */
+	msleep(20);
+
+	/*
+	 * enable any unassigned irqs
+	 * (we must startup again here because if a longstanding irq
+	 * happened in the previous stage, it may have masked itself)
+	 */
+	for_each_irq_desc_reverse(i, desc) {
+		spin_lock_irq(&desc->lock);
+		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
+			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+			if (desc->chip->startup(i))
+				desc->status |= IRQ_PENDING;
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	/*
+	 * Wait for spurious interrupts to trigger
+	 */
+	msleep(100);
+
+	/*
+	 * Now filter out any obviously spurious interrupts
+	 */
+	for_each_irq_desc(i, desc) {
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			/* It triggered already - consider it spurious. */
+			if (!(status & IRQ_WAITING)) {
+				desc->status = status & ~IRQ_AUTODETECT;
+				desc->chip->shutdown(i);
+			} else
+				if (i < 32)
+					mask |= 1 << i;
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	return mask;
+}
+EXPORT_SYMBOL(probe_irq_on);
+
+/**
+ *	probe_irq_mask - scan a bitmap of interrupt lines
+ *	@val:	mask of interrupts to consider
+ *
+ *	Scan the interrupt lines and return a bitmap of active
+ *	autodetect interrupts. The interrupt probe logic state
+ *	is then returned to its previous value.
+ *
+ *	Note: we need to scan all the irq's even though we will
+ *	only return autodetect irq numbers - just so that we reset
+ *	them all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+	unsigned int status, mask = 0;
+	struct irq_desc *desc;
+	int i;
+
+	for_each_irq_desc(i, desc) {
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			if (i < 16 && !(status & IRQ_WAITING))
+				mask |= 1 << i;
+
+			desc->status = status & ~IRQ_AUTODETECT;
+			desc->chip->shutdown(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+	mutex_unlock(&probing_active);
+
+	return mask & val;
+}
+EXPORT_SYMBOL(probe_irq_mask);
+
+/**
+ *	probe_irq_off	- end an interrupt autodetect
+ *	@val: mask of potential interrupts (unused)
+ *
+ *	Scans the unused interrupt lines and returns the line which
+ *	appears to have triggered the interrupt. If no interrupt was
+ *	found then zero is returned. If more than one interrupt is
+ *	found then minus the first candidate is returned to indicate
+ *	their is doubt.
+ *
+ *	The interrupt probe logic state is returned to its previous
+ *	value.
+ *
+ *	BUGS: When used in a module (which arguably shouldn't happen)
+ *	nothing prevents two IRQ probe callers from overlapping. The
+ *	results of this are non-optimal.
+ */
+int probe_irq_off(unsigned long val)
+{
+	int i, irq_found = 0, nr_of_irqs = 0;
+	struct irq_desc *desc;
+	unsigned int status;
+
+	for_each_irq_desc(i, desc) {
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			if (!(status & IRQ_WAITING)) {
+				if (!nr_of_irqs)
+					irq_found = i;
+				nr_of_irqs++;
+			}
+			desc->status = status & ~IRQ_AUTODETECT;
+			desc->chip->shutdown(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+	mutex_unlock(&probing_active);
+
+	if (nr_of_irqs > 1)
+		irq_found = -irq_found;
+
+	return irq_found;
+}
+EXPORT_SYMBOL(probe_irq_off);
+
--- a/kernel/kernel/irq/chip.c
+++ b/kernel/kernel/irq/chip.c
@@ -0,0 +1,747 @@
+/*
+ * linux/kernel/irq/chip.c
+ *
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
+ *
+ * This file contains the core interrupt handling code, for irq-chip
+ * based architectures.
+ *
+ * Detailed information is available in Documentation/DocBook/genericirq
+ */
+
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	desc = irq_to_desc(irq);
+	if (!desc) {
+		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
+		return;
+	}
+
+	/* Ensure we don't have left over values from a previous use of this irq */
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->status = IRQ_DISABLED;
+	desc->chip = &no_irq_chip;
+	desc->handle_irq = handle_bad_irq;
+	desc->depth = 1;
+	desc->msi_desc = NULL;
+	desc->handler_data = NULL;
+	if (!keep_chip_data)
+		desc->chip_data = NULL;
+	desc->action = NULL;
+	desc->irq_count = 0;
+	desc->irqs_unhandled = 0;
+#ifdef CONFIG_SMP
+	cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_clear(desc->pending_mask);
+#endif
+#endif
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ *	dynamic_irq_init - initialize a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ */
+void dynamic_irq_init(unsigned int irq)
+{
+	dynamic_irq_init_x(irq, false);
+}
+
+/**
+ *	dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ *
+ *	does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_init_keep_chip_data(unsigned int irq)
+{
+	dynamic_irq_init_x(irq, true);
+}
+
+static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc) {
+		WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq);
+		return;
+	}
+
+	spin_lock_irqsave(&desc->lock, flags);
+	if (desc->action) {
+		spin_unlock_irqrestore(&desc->lock, flags);
+		WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n",
+			irq);
+		return;
+	}
+	desc->msi_desc = NULL;
+	desc->handler_data = NULL;
+	if (!keep_chip_data)
+		desc->chip_data = NULL;
+	desc->handle_irq = handle_bad_irq;
+	desc->chip = &no_irq_chip;
+	desc->name = NULL;
+	clear_kstat_irqs(desc);
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ *	dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ */
+void dynamic_irq_cleanup(unsigned int irq)
+{
+	dynamic_irq_cleanup_x(irq, false);
+}
+
+/**
+ *	dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ *
+ *	does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
+{
+	dynamic_irq_cleanup_x(irq, true);
+}
+
+
+/**
+ *	set_irq_chip - set the irq chip for an irq
+ *	@irq:	irq number
+ *	@chip:	pointer to irq chip description structure
+ */
+int set_irq_chip(unsigned int irq, struct irq_chip *chip)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc) {
+		WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+		return -EINVAL;
+	}
+
+	if (!chip)
+		chip = &no_irq_chip;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	irq_chip_set_defaults(chip);
+	desc->chip = chip;
+	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(set_irq_chip);
+
+/**
+ *	set_irq_type - set the irq trigger type for an irq
+ *	@irq:	irq number
+ *	@type:	IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
+ */
+int set_irq_type(unsigned int irq, unsigned int type)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+	int ret = -ENXIO;
+
+	if (!desc) {
+		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
+		return -ENODEV;
+	}
+
+	type &= IRQ_TYPE_SENSE_MASK;
+	if (type == IRQ_TYPE_NONE)
+		return 0;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	ret = __irq_set_trigger(desc, irq, type);
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(set_irq_type);
+
+/**
+ *	set_irq_data - set irq type data for an irq
+ *	@irq:	Interrupt number
+ *	@data:	Pointer to interrupt specific data
+ *
+ *	Set the hardware irq controller data for an irq
+ */
+int set_irq_data(unsigned int irq, void *data)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc) {
+		printk(KERN_ERR
+		       "Trying to install controller data for IRQ%d\n", irq);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->handler_data = data;
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(set_irq_data);
+
+/**
+ *	set_irq_data - set irq type data for an irq
+ *	@irq:	Interrupt number
+ *	@entry:	Pointer to MSI descriptor data
+ *
+ *	Set the hardware irq controller data for an irq
+ */
+int set_irq_msi(unsigned int irq, struct msi_desc *entry)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc) {
+		printk(KERN_ERR
+		       "Trying to install msi data for IRQ%d\n", irq);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->msi_desc = entry;
+	if (entry)
+		entry->irq = irq;
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return 0;
+}
+
+/**
+ *	set_irq_chip_data - set irq chip data for an irq
+ *	@irq:	Interrupt number
+ *	@data:	Pointer to chip specific data
+ *
+ *	Set the hardware irq chip data for an irq
+ */
+int set_irq_chip_data(unsigned int irq, void *data)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc) {
+		printk(KERN_ERR
+		       "Trying to install chip data for IRQ%d\n", irq);
+		return -EINVAL;
+	}
+
+	if (!desc->chip) {
+		printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->chip_data = data;
+	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(set_irq_chip_data);
+
+/**
+ *	set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq
+ *
+ *	@irq:	Interrupt number
+ *	@nest:	0 to clear / 1 to set the IRQ_NESTED_THREAD flag
+ *
+ *	The IRQ_NESTED_THREAD flag indicates that on
+ *	request_threaded_irq() no separate interrupt thread should be
+ *	created for the irq as the handler are called nested in the
+ *	context of a demultiplexing interrupt handler thread.
+ */
+void set_irq_nested_thread(unsigned int irq, int nest)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc)
+		return;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	if (nest)
+		desc->status |= IRQ_NESTED_THREAD;
+	else
+		desc->status &= ~IRQ_NESTED_THREAD;
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+EXPORT_SYMBOL_GPL(set_irq_nested_thread);
+
+/*
+ * default enable function
+ */
+static void default_enable(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	desc->chip->unmask(irq);
+	desc->status &= ~IRQ_MASKED;
+}
+
+/*
+ * default disable function
+ */
+static void default_disable(unsigned int irq)
+{
+}
+
+/*
+ * default startup function
+ */
+static unsigned int default_startup(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	desc->chip->enable(irq);
+	return 0;
+}
+
+/*
+ * default shutdown function
+ */
+static void default_shutdown(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	desc->chip->mask(irq);
+	desc->status |= IRQ_MASKED;
+}
+
+/*
+ * Fixup enable/disable function pointers
+ */
+void irq_chip_set_defaults(struct irq_chip *chip)
+{
+	if (!chip->enable)
+		chip->enable = default_enable;
+	if (!chip->disable)
+		chip->disable = default_disable;
+	if (!chip->startup)
+		chip->startup = default_startup;
+	/*
+	 * We use chip->disable, when the user provided its own. When
+	 * we have default_disable set for chip->disable, then we need
+	 * to use default_shutdown, otherwise the irq line is not
+	 * disabled on free_irq():
+	 */
+	if (!chip->shutdown)
+		chip->shutdown = chip->disable != default_disable ?
+			chip->disable : default_shutdown;
+	if (!chip->name)
+		chip->name = chip->typename;
+	if (!chip->end)
+		chip->end = dummy_irq_chip.end;
+}
+
+static inline void mask_ack_irq(struct irq_desc *desc, int irq)
+{
+	if (desc->chip->mask_ack)
+		desc->chip->mask_ack(irq);
+	else {
+		desc->chip->mask(irq);
+		if (desc->chip->ack)
+			desc->chip->ack(irq);
+	}
+}
+
+/*
+ *	handle_nested_irq - Handle a nested irq from a irq thread
+ *	@irq:	the interrupt number
+ *
+ *	Handle interrupts which are nested into a threaded interrupt
+ *	handler. The handler function is called inside the calling
+ *	threads context.
+ */
+void handle_nested_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irqaction *action;
+	irqreturn_t action_ret;
+
+	might_sleep();
+
+	spin_lock_irq(&desc->lock);
+
+	kstat_incr_irqs_this_cpu(irq, desc);
+
+	action = desc->action;
+	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+		goto out_unlock;
+
+	desc->status |= IRQ_INPROGRESS;
+	spin_unlock_irq(&desc->lock);
+
+	action_ret = action->thread_fn(action->irq, action->dev_id);
+	if (!noirqdebug)
+		note_interrupt(irq, desc, action_ret);
+
+	spin_lock_irq(&desc->lock);
+	desc->status &= ~IRQ_INPROGRESS;
+
+out_unlock:
+	spin_unlock_irq(&desc->lock);
+}
+EXPORT_SYMBOL_GPL(handle_nested_irq);
+
+/**
+ *	handle_simple_irq - Simple and software-decoded IRQs.
+ *	@irq:	the interrupt number
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	Simple interrupts are either sent from a demultiplexing interrupt
+ *	handler or come from hardware, where no interrupt hardware control
+ *	is necessary.
+ *
+ *	Note: The caller is expected to handle the ack, clear, mask and
+ *	unmask issues if necessary.
+ */
+void
+handle_simple_irq(unsigned int irq, struct irq_desc *desc)
+{
+	struct irqaction *action;
+	irqreturn_t action_ret;
+
+	spin_lock(&desc->lock);
+
+	if (unlikely(desc->status & IRQ_INPROGRESS))
+		goto out_unlock;
+	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+	kstat_incr_irqs_this_cpu(irq, desc);
+
+	action = desc->action;
+	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+		goto out_unlock;
+
+	desc->status |= IRQ_INPROGRESS;
+	spin_unlock(&desc->lock);
+
+	action_ret = handle_IRQ_event(irq, action);
+	if (!noirqdebug)
+		note_interrupt(irq, desc, action_ret);
+
+	spin_lock(&desc->lock);
+	desc->status &= ~IRQ_INPROGRESS;
+out_unlock:
+	spin_unlock(&desc->lock);
+}
+
+/**
+ *	handle_level_irq - Level type irq handler
+ *	@irq:	the interrupt number
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	Level type interrupts are active as long as the hardware line has
+ *	the active level. This may require to mask the interrupt and unmask
+ *	it after the associated handler has acknowledged the device, so the
+ *	interrupt line is back to inactive.
+ */
+void
+handle_level_irq(unsigned int irq, struct irq_desc *desc)
+{
+	struct irqaction *action;
+	irqreturn_t action_ret;
+
+	spin_lock(&desc->lock);
+	mask_ack_irq(desc, irq);
+
+	if (unlikely(desc->status & IRQ_INPROGRESS))
+		goto out_unlock;
+	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+	kstat_incr_irqs_this_cpu(irq, desc);
+
+	/*
+	 * If its disabled or no action available
+	 * keep it masked and get out of here
+	 */
+	action = desc->action;
+	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+		goto out_unlock;
+
+	desc->status |= IRQ_INPROGRESS;
+	spin_unlock(&desc->lock);
+
+	action_ret = handle_IRQ_event(irq, action);
+	if (!noirqdebug)
+		note_interrupt(irq, desc, action_ret);
+
+	spin_lock(&desc->lock);
+	desc->status &= ~IRQ_INPROGRESS;
+
+	if (unlikely(desc->status & IRQ_ONESHOT))
+		desc->status |= IRQ_MASKED;
+	else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
+		desc->chip->unmask(irq);
+out_unlock:
+	spin_unlock(&desc->lock);
+}
+EXPORT_SYMBOL_GPL(handle_level_irq);
+
+/**
+ *	handle_fasteoi_irq - irq handler for transparent controllers
+ *	@irq:	the interrupt number
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	Only a single callback will be issued to the chip: an ->eoi()
+ *	call when the interrupt has been serviced. This enables support
+ *	for modern forms of interrupt handlers, which handle the flow
+ *	details in hardware, transparently.
+ */
+void
+handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
+{
+	struct irqaction *action;
+	irqreturn_t action_ret;
+
+	spin_lock(&desc->lock);
+
+	if (unlikely(desc->status & IRQ_INPROGRESS))
+		goto out;
+
+	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+	kstat_incr_irqs_this_cpu(irq, desc);
+
+	/*
+	 * If its disabled or no action available
+	 * then mask it and get out of here:
+	 */
+	action = desc->action;
+	if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
+		desc->status |= IRQ_PENDING;
+		if (desc->chip->mask)
+			desc->chip->mask(irq);
+		goto out;
+	}
+
+	desc->status |= IRQ_INPROGRESS;
+	desc->status &= ~IRQ_PENDING;
+	spin_unlock(&desc->lock);
+
+	action_ret = handle_IRQ_event(irq, action);
+	if (!noirqdebug)
+		note_interrupt(irq, desc, action_ret);
+
+	spin_lock(&desc->lock);
+	desc->status &= ~IRQ_INPROGRESS;
+out:
+	desc->chip->eoi(irq);
+
+	spin_unlock(&desc->lock);
+}
+
+/**
+ *	handle_edge_irq - edge type IRQ handler
+ *	@irq:	the interrupt number
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	Interrupt occures on the falling and/or rising edge of a hardware
+ *	signal. The occurence is latched into the irq controller hardware
+ *	and must be acked in order to be reenabled. After the ack another
+ *	interrupt can happen on the same source even before the first one
+ *	is handled by the assosiacted event handler. If this happens it
+ *	might be necessary to disable (mask) the interrupt depending on the
+ *	controller hardware. This requires to reenable the interrupt inside
+ *	of the loop which handles the interrupts which have arrived while
+ *	the handler was running. If all pending interrupts are handled, the
+ *	loop is left.
+ */
+void
+handle_edge_irq(unsigned int irq, struct irq_desc *desc)
+{
+	spin_lock(&desc->lock);
+
+	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+
+	/*
+	 * If we're currently running this IRQ, or its disabled,
+	 * we shouldn't process the IRQ. Mark it pending, handle
+	 * the necessary masking and go out
+	 */
+	if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) ||
+		    !desc->action)) {
+		desc->status |= (IRQ_PENDING | IRQ_MASKED);
+		mask_ack_irq(desc, irq);
+		goto out_unlock;
+	}
+	kstat_incr_irqs_this_cpu(irq, desc);
+
+	/* Start handling the irq */
+	if (desc->chip->ack)
+		desc->chip->ack(irq);
+
+	/* Mark the IRQ currently in progress.*/
+	desc->status |= IRQ_INPROGRESS;
+
+	do {
+		struct irqaction *action = desc->action;
+		irqreturn_t action_ret;
+
+		if (unlikely(!action)) {
+			desc->chip->mask(irq);
+			goto out_unlock;
+		}
+
+		/*
+		 * When another irq arrived while we were handling
+		 * one, we could have masked the irq.
+		 * Renable it, if it was not disabled in meantime.
+		 */
+		if (unlikely((desc->status &
+			       (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
+			      (IRQ_PENDING | IRQ_MASKED))) {
+			desc->chip->unmask(irq);
+			desc->status &= ~IRQ_MASKED;
+		}
+
+		desc->status &= ~IRQ_PENDING;
+		spin_unlock(&desc->lock);
+		action_ret = handle_IRQ_event(irq, action);
+		if (!noirqdebug)
+			note_interrupt(irq, desc, action_ret);
+		spin_lock(&desc->lock);
+
+	} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
+
+	desc->status &= ~IRQ_INPROGRESS;
+out_unlock:
+	spin_unlock(&desc->lock);
+}
+
+/**
+ *	handle_percpu_IRQ - Per CPU local irq handler
+ *	@irq:	the interrupt number
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	Per CPU interrupts on SMP machines without locking requirements
+ */
+void
+handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
+{
+	irqreturn_t action_ret;
+
+	kstat_incr_irqs_this_cpu(irq, desc);
+
+	if (desc->chip->ack)
+		desc->chip->ack(irq);
+
+	action_ret = handle_IRQ_event(irq, desc->action);
+	if (!noirqdebug)
+		note_interrupt(irq, desc, action_ret);
+
+	if (desc->chip->eoi)
+		desc->chip->eoi(irq);
+}
+
+void
+__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+		  const char *name)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc) {
+		printk(KERN_ERR
+		       "Trying to install type control for IRQ%d\n", irq);
+		return;
+	}
+
+	if (!handle)
+		handle = handle_bad_irq;
+	else if (desc->chip == &no_irq_chip) {
+		printk(KERN_WARNING "Trying to install %sinterrupt handler "
+		       "for IRQ%d\n", is_chained ? "chained " : "", irq);
+		/*
+		 * Some ARM implementations install a handler for really dumb
+		 * interrupt hardware without setting an irq_chip. This worked
+		 * with the ARM no_irq_chip but the check in setup_irq would
+		 * prevent us to setup the interrupt at all. Switch it to
+		 * dummy_irq_chip for easy transition.
+		 */
+		desc->chip = &dummy_irq_chip;
+	}
+
+	chip_bus_lock(irq, desc);
+	spin_lock_irqsave(&desc->lock, flags);
+
+	/* Uninstall? */
+	if (handle == handle_bad_irq) {
+		if (desc->chip != &no_irq_chip)
+			mask_ack_irq(desc, irq);
+		desc->status |= IRQ_DISABLED;
+		desc->depth = 1;
+	}
+	desc->handle_irq = handle;
+	desc->name = name;
+
+	if (handle != handle_bad_irq && is_chained) {
+		desc->status &= ~IRQ_DISABLED;
+		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+		desc->depth = 0;
+		desc->chip->startup(irq);
+	}
+	spin_unlock_irqrestore(&desc->lock, flags);
+	chip_bus_sync_unlock(irq, desc);
+}
+EXPORT_SYMBOL_GPL(__set_irq_handler);
+
+void
+set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+			 irq_flow_handler_t handle)
+{
+	set_irq_chip(irq, chip);
+	__set_irq_handler(irq, handle, 0, NULL);
+}
+
+void
+set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+			      irq_flow_handler_t handle, const char *name)
+{
+	set_irq_chip(irq, chip);
+	__set_irq_handler(irq, handle, 0, name);
+}
+
+void __init set_irq_noprobe(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc) {
+		printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq);
+		return;
+	}
+
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->status |= IRQ_NOPROBE;
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+void __init set_irq_probe(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc) {
+		printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq);
+		return;
+	}
+
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->status &= ~IRQ_NOPROBE;
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
--- a/kernel/kernel/irq/devres.c
+++ b/kernel/kernel/irq/devres.c
@@ -0,0 +1,94 @@
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+
+/*
+ * Device resource management aware IRQ request/free implementation.
+ */
+struct irq_devres {
+	unsigned int irq;
+	void *dev_id;
+};
+
+static void devm_irq_release(struct device *dev, void *res)
+{
+	struct irq_devres *this = res;
+
+	free_irq(this->irq, this->dev_id);
+}
+
+static int devm_irq_match(struct device *dev, void *res, void *data)
+{
+	struct irq_devres *this = res, *match = data;
+
+	return this->irq == match->irq && this->dev_id == match->dev_id;
+}
+
+/**
+ *	devm_request_threaded_irq - allocate an interrupt line for a managed device
+ *	@dev: device to request interrupt for
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs
+ *	@thread_fn: function to be called in a threaded interrupt context. NULL
+ *		    for devices which handle everything in @handler
+ *	@irqflags: Interrupt type flags
+ *	@devname: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	Except for the extra @dev argument, this function takes the
+ *	same arguments and performs the same function as
+ *	request_irq().  IRQs requested with this function will be
+ *	automatically freed on driver detach.
+ *
+ *	If an IRQ allocated with this function needs to be freed
+ *	separately, dev_free_irq() must be used.
+ */
+int devm_request_threaded_irq(struct device *dev, unsigned int irq,
+			      irq_handler_t handler, irq_handler_t thread_fn,
+			      unsigned long irqflags, const char *devname,
+			      void *dev_id)
+{
+	struct irq_devres *dr;
+	int rc;
+
+	dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres),
+			  GFP_KERNEL);
+	if (!dr)
+		return -ENOMEM;
+
+	rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
+				  dev_id);
+	if (rc) {
+		devres_free(dr);
+		return rc;
+	}
+
+	dr->irq = irq;
+	dr->dev_id = dev_id;
+	devres_add(dev, dr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_request_threaded_irq);
+
+/**
+ *	devm_free_irq - free an interrupt
+ *	@dev: device to free interrupt for
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Except for the extra @dev argument, this function takes the
+ *	same arguments and performs the same function as free_irq().
+ *	This function instead of free_irq() should be used to manually
+ *	free IRQs allocated with dev_request_irq().
+ */
+void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
+{
+	struct irq_devres match_data = { irq, dev_id };
+
+	free_irq(irq, dev_id);
+	WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match,
+			       &match_data));
+}
+EXPORT_SYMBOL(devm_free_irq);
--- a/kernel/kernel/irq/handle.c
+++ b/kernel/kernel/irq/handle.c
@@ -0,0 +1,561 @@
+/*
+ * linux/kernel/irq/handle.c
+ *
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
+ *
+ * This file contains the core interrupt handling code.
+ *
+ * Detailed information is available in Documentation/DocBook/genericirq
+ *
+ */
+
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/rculist.h>
+#include <linux/hash.h>
+#include <linux/bootmem.h>
+#include <trace/events/irq.h>
+
+#include "internals.h"
+
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+struct lock_class_key irq_desc_lock_class;
+
+/**
+ * handle_bad_irq - handle spurious and unhandled irqs
+ * @irq:       the interrupt number
+ * @desc:      description of the interrupt
+ *
+ * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
+ */
+void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
+{
+	print_irq_desc(irq, desc);
+	kstat_incr_irqs_this_cpu(irq, desc);
+	ack_bad_irq(irq);
+}
+
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
+static void __init init_irq_default_affinity(void)
+{
+	alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+	cpumask_setall(irq_default_affinity);
+}
+#else
+static void __init init_irq_default_affinity(void)
+{
+}
+#endif
+
+/*
+ * Linux has a controller-independent interrupt architecture.
+ * Every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the appropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * The code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic or
+ * having to touch the generic code.
+ *
+ * Controller mappings for all interrupt sources:
+ */
+int nr_irqs = NR_IRQS;
+EXPORT_SYMBOL_GPL(nr_irqs);
+
+#ifdef CONFIG_SPARSE_IRQ
+
+static struct irq_desc irq_desc_init = {
+	.irq	    = -1,
+	.status	    = IRQ_DISABLED,
+	.chip	    = &no_irq_chip,
+	.handle_irq = handle_bad_irq,
+	.depth      = 1,
+	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+};
+
+void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
+{
+	void *ptr;
+
+	if (slab_is_available())
+		ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
+				   GFP_ATOMIC, node);
+	else
+		ptr = alloc_bootmem_node(NODE_DATA(node),
+				nr * sizeof(*desc->kstat_irqs));
+
+	/*
+	 * don't overwite if can not get new one
+	 * init_copy_kstat_irqs() could still use old one
+	 */
+	if (ptr) {
+		printk(KERN_DEBUG "  alloc kstat_irqs on node %d\n", node);
+		desc->kstat_irqs = ptr;
+	}
+}
+
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
+{
+	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
+
+	spin_lock_init(&desc->lock);
+	desc->irq = irq;
+#ifdef CONFIG_SMP
+	desc->node = node;
+#endif
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	init_kstat_irqs(desc, node, nr_cpu_ids);
+	if (!desc->kstat_irqs) {
+		printk(KERN_ERR "can not alloc kstat_irqs\n");
+		BUG_ON(1);
+	}
+	if (!alloc_desc_masks(desc, node, false)) {
+		printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
+		BUG_ON(1);
+	}
+	init_desc_masks(desc);
+	arch_init_chip_data(desc, node);
+}
+
+/*
+ * Protect the sparse_irqs:
+ */
+DEFINE_SPINLOCK(sparse_irq_lock);
+
+struct irq_desc **irq_desc_ptrs __read_mostly;
+
+static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
+	[0 ... NR_IRQS_LEGACY-1] = {
+		.irq	    = -1,
+		.status	    = IRQ_DISABLED,
+		.chip	    = &no_irq_chip,
+		.handle_irq = handle_bad_irq,
+		.depth	    = 1,
+		.lock	    = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+	}
+};
+
+static unsigned int *kstat_irqs_legacy;
+
+int __init early_irq_init(void)
+{
+	struct irq_desc *desc;
+	int legacy_count;
+	int node;
+	int i;
+
+	init_irq_default_affinity();
+
+	 /* initialize nr_irqs based on nr_cpu_ids */
+	arch_probe_nr_irqs();
+	printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
+
+	desc = irq_desc_legacy;
+	legacy_count = ARRAY_SIZE(irq_desc_legacy);
+	node = first_online_node;
+
+	/* allocate irq_desc_ptrs array based on nr_irqs */
+	irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT);
+
+	/* allocate based on nr_cpu_ids */
+	kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids *
+					  sizeof(int), GFP_NOWAIT, node);
+
+	for (i = 0; i < legacy_count; i++) {
+		desc[i].irq = i;
+#ifdef CONFIG_SMP
+		desc[i].node = node;
+#endif
+		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
+		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
+		alloc_desc_masks(&desc[i], node, true);
+		init_desc_masks(&desc[i]);
+		irq_desc_ptrs[i] = desc + i;
+	}
+
+	for (i = legacy_count; i < nr_irqs; i++)
+		irq_desc_ptrs[i] = NULL;
+
+	return arch_early_irq_init();
+}
+
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	if (irq_desc_ptrs && irq < nr_irqs)
+		return irq_desc_ptrs[irq];
+
+	return NULL;
+}
+
+struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+
+	if (irq >= nr_irqs) {
+		WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
+			irq, nr_irqs);
+		return NULL;
+	}
+
+	desc = irq_desc_ptrs[irq];
+	if (desc)
+		return desc;
+
+	spin_lock_irqsave(&sparse_irq_lock, flags);
+
+	/* We have to check it to avoid races with another CPU */
+	desc = irq_desc_ptrs[irq];
+	if (desc)
+		goto out_unlock;
+
+	if (slab_is_available())
+		desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	else
+		desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
+
+	printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
+	if (!desc) {
+		printk(KERN_ERR "can not alloc irq_desc\n");
+		BUG_ON(1);
+	}
+	init_one_irq_desc(irq, desc, node);
+
+	irq_desc_ptrs[irq] = desc;
+
+out_unlock:
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+	return desc;
+}
+
+#else /* !CONFIG_SPARSE_IRQ */
+
+struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
+	[0 ... NR_IRQS-1] = {
+		.status = IRQ_DISABLED,
+		.chip = &no_irq_chip,
+		.handle_irq = handle_bad_irq,
+		.depth = 1,
+		.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+	}
+};
+
+static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
+int __init early_irq_init(void)
+{
+	struct irq_desc *desc;
+	int count;
+	int i;
+
+	init_irq_default_affinity();
+
+	printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
+
+	desc = irq_desc;
+	count = ARRAY_SIZE(irq_desc);
+
+	for (i = 0; i < count; i++) {
+		desc[i].irq = i;
+		alloc_desc_masks(&desc[i], 0, true);
+		init_desc_masks(&desc[i]);
+		desc[i].kstat_irqs = kstat_irqs_all[i];
+	}
+	return arch_early_irq_init();
+}
+
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+}
+
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
+{
+	return irq_to_desc(irq);
+}
+#endif /* !CONFIG_SPARSE_IRQ */
+
+void clear_kstat_irqs(struct irq_desc *desc)
+{
+	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
+}
+
+/*
+ * What should we do if we get a hw irq event on an illegal vector?
+ * Each architecture has to answer this themself.
+ */
+static void ack_bad(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	print_irq_desc(irq, desc);
+	ack_bad_irq(irq);
+}
+
+/*
+ * NOP functions
+ */
+static void noop(unsigned int irq)
+{
+}
+
+static unsigned int noop_ret(unsigned int irq)
+{
+	return 0;
+}
+
+/*
+ * Generic no controller implementation
+ */
+struct irq_chip no_irq_chip = {
+	.name		= "none",
+	.startup	= noop_ret,
+	.shutdown	= noop,
+	.enable		= noop,
+	.disable	= noop,
+	.ack		= ack_bad,
+	.end		= noop,
+};
+
+/*
+ * Generic dummy implementation which can be used for
+ * real dumb interrupt sources
+ */
+struct irq_chip dummy_irq_chip = {
+	.name		= "dummy",
+	.startup	= noop_ret,
+	.shutdown	= noop,
+	.enable		= noop,
+	.disable	= noop,
+	.ack		= noop,
+	.mask		= noop,
+	.unmask		= noop,
+	.end		= noop,
+};
+
+/*
+ * Special, empty irq handler:
+ */
+irqreturn_t no_action(int cpl, void *dev_id)
+{
+	return IRQ_NONE;
+}
+
+static void warn_no_thread(unsigned int irq, struct irqaction *action)
+{
+	if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
+		return;
+
+	printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
+	       "but no thread function available.", irq, action->name);
+}
+
+/**
+ * handle_IRQ_event - irq action chain handler
+ * @irq:	the interrupt number
+ * @action:	the interrupt action chain for this irq
+ *
+ * Handles the action chain of an irq event
+ */
+irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
+{
+	irqreturn_t ret, retval = IRQ_NONE;
+	unsigned int status = 0;
+
+	if (!(action->flags & IRQF_DISABLED))
+		local_irq_enable_in_hardirq();
+
+	do {
+		trace_irq_handler_entry(irq, action);
+		ret = action->handler(irq, action->dev_id);
+		trace_irq_handler_exit(irq, action, ret);
+
+		switch (ret) {
+		case IRQ_WAKE_THREAD:
+			/*
+			 * Set result to handled so the spurious check
+			 * does not trigger.
+			 */
+			ret = IRQ_HANDLED;
+
+			/*
+			 * Catch drivers which return WAKE_THREAD but
+			 * did not set up a thread function
+			 */
+			if (unlikely(!action->thread_fn)) {
+				warn_no_thread(irq, action);
+				break;
+			}
+
+			/*
+			 * Wake up the handler thread for this
+			 * action. In case the thread crashed and was
+			 * killed we just pretend that we handled the
+			 * interrupt. The hardirq handler above has
+			 * disabled the device interrupt, so no irq
+			 * storm is lurking.
+			 */
+			if (likely(!test_bit(IRQTF_DIED,
+					     &action->thread_flags))) {
+				set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
+				wake_up_process(action->thread);
+			}
+
+			/* Fall through to add to randomness */
+		case IRQ_HANDLED:
+			status |= action->flags;
+			break;
+
+		default:
+			break;
+		}
+
+		retval |= ret;
+		action = action->next;
+	} while (action);
+
+	if (status & IRQF_SAMPLE_RANDOM)
+		add_interrupt_randomness(irq);
+	local_irq_disable();
+
+	return retval;
+}
+
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+
+#ifdef CONFIG_ENABLE_WARN_DEPRECATED
+# warning __do_IRQ is deprecated. Please convert to proper flow handlers
+#endif
+
+/**
+ * __do_IRQ - original all in one highlevel IRQ handler
+ * @irq:	the interrupt number
+ *
+ * __do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ *
+ * This is the original x86 implementation which is used for every
+ * interrupt type.
+ */
+unsigned int __do_IRQ(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irqaction *action;
+	unsigned int status;
+
+	kstat_incr_irqs_this_cpu(irq, desc);
+
+	if (CHECK_IRQ_PER_CPU(desc->status)) {
+		irqreturn_t action_ret;
+
+		/*
+		 * No locking required for CPU-local interrupts:
+		 */
+		if (desc->chip->ack)
+			desc->chip->ack(irq);
+		if (likely(!(desc->status & IRQ_DISABLED))) {
+			action_ret = handle_IRQ_event(irq, desc->action);
+			if (!noirqdebug)
+				note_interrupt(irq, desc, action_ret);
+		}
+		desc->chip->end(irq);
+		return 1;
+	}
+
+	spin_lock(&desc->lock);
+	if (desc->chip->ack)
+		desc->chip->ack(irq);
+	/*
+	 * REPLAY is when Linux resends an IRQ that was dropped earlier
+	 * WAITING is used by probe to mark irqs that are being tested
+	 */
+	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+	status |= IRQ_PENDING; /* we _want_ to handle it */
+
+	/*
+	 * If the IRQ is disabled for whatever reason, we cannot
+	 * use the action we have.
+	 */
+	action = NULL;
+	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
+		action = desc->action;
+		status &= ~IRQ_PENDING; /* we commit to handling */
+		status |= IRQ_INPROGRESS; /* we are handling it */
+	}
+	desc->status = status;
+
+	/*
+	 * If there is no IRQ handler or it was disabled, exit early.
+	 * Since we set PENDING, if another processor is handling
+	 * a different instance of this same irq, the other processor
+	 * will take care of it.
+	 */
+	if (unlikely(!action))
+		goto out;
+
+	/*
+	 * Edge triggered interrupts need to remember
+	 * pending events.
+	 * This applies to any hw interrupts that allow a second
+	 * instance of the same irq to arrive while we are in do_IRQ
+	 * or in the handler. But the code here only handles the _second_
+	 * instance of the irq, not the third or fourth. So it is mostly
+	 * useful for irq hardware that does not mask cleanly in an
+	 * SMP environment.
+	 */
+	for (;;) {
+		irqreturn_t action_ret;
+
+		spin_unlock(&desc->lock);
+
+		action_ret = handle_IRQ_event(irq, action);
+		if (!noirqdebug)
+			note_interrupt(irq, desc, action_ret);
+
+		spin_lock(&desc->lock);
+		if (likely(!(desc->status & IRQ_PENDING)))
+			break;
+		desc->status &= ~IRQ_PENDING;
+	}
+	desc->status &= ~IRQ_INPROGRESS;
+
+out:
+	/*
+	 * The ->end() handler has to deal with interrupts which got
+	 * disabled while the handler was running.
+	 */
+	desc->chip->end(irq);
+	spin_unlock(&desc->lock);
+
+	return 1;
+}
+#endif
+
+void early_init_irq_lock_class(void)
+{
+	struct irq_desc *desc;
+	int i;
+
+	for_each_irq_desc(i, desc) {
+		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	}
+}
+
+unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	return desc ? desc->kstat_irqs[cpu] : 0;
+}
+EXPORT_SYMBOL(kstat_irqs_cpu);
+
--- a/kernel/kernel/irq/internals.h
+++ b/kernel/kernel/irq/internals.h
@@ -0,0 +1,99 @@
+/*
+ * IRQ subsystem internal functions and variables:
+ */
+
+extern int noirqdebug;
+
+/* Set default functions for irq_chip structures: */
+extern void irq_chip_set_defaults(struct irq_chip *chip);
+
+/* Set default handler: */
+extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
+
+extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
+		unsigned long flags);
+extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
+extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
+
+extern struct lock_class_key irq_desc_lock_class;
+extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
+extern void clear_kstat_irqs(struct irq_desc *desc);
+extern spinlock_t sparse_irq_lock;
+
+#ifdef CONFIG_SPARSE_IRQ
+/* irq_desc_ptrs allocated at boot time */
+extern struct irq_desc **irq_desc_ptrs;
+#else
+/* irq_desc_ptrs is a fixed size array */
+extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
+#endif
+
+#ifdef CONFIG_PROC_FS
+extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
+extern void register_handler_proc(unsigned int irq, struct irqaction *action);
+extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
+#else
+static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
+static inline void register_handler_proc(unsigned int irq,
+					 struct irqaction *action) { }
+static inline void unregister_handler_proc(unsigned int irq,
+					   struct irqaction *action) { }
+#endif
+
+extern int irq_select_affinity_usr(unsigned int irq);
+
+extern void irq_set_thread_affinity(struct irq_desc *desc);
+
+/* Inline functions for support of irq chips on slow busses */
+static inline void chip_bus_lock(unsigned int irq, struct irq_desc *desc)
+{
+	if (unlikely(desc->chip->bus_lock))
+		desc->chip->bus_lock(irq);
+}
+
+static inline void chip_bus_sync_unlock(unsigned int irq, struct irq_desc *desc)
+{
+	if (unlikely(desc->chip->bus_sync_unlock))
+		desc->chip->bus_sync_unlock(irq);
+}
+
+/*
+ * Debugging printout:
+ */
+
+#include <linux/kallsyms.h>
+
+#define P(f) if (desc->status & f) printk("%14s set\n", #f)
+
+static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+	printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
+		irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
+	printk("->handle_irq():  %p, ", desc->handle_irq);
+	print_symbol("%s\n", (unsigned long)desc->handle_irq);
+	printk("->chip(): %p, ", desc->chip);
+	print_symbol("%s\n", (unsigned long)desc->chip);
+	printk("->action(): %p\n", desc->action);
+	if (desc->action) {
+		printk("->action->handler(): %p, ", desc->action->handler);
+		print_symbol("%s\n", (unsigned long)desc->action->handler);
+	}
+
+	P(IRQ_INPROGRESS);
+	P(IRQ_DISABLED);
+	P(IRQ_PENDING);
+	P(IRQ_REPLAY);
+	P(IRQ_AUTODETECT);
+	P(IRQ_WAITING);
+	P(IRQ_LEVEL);
+	P(IRQ_MASKED);
+#ifdef CONFIG_IRQ_PER_CPU
+	P(IRQ_PER_CPU);
+#endif
+	P(IRQ_NOPROBE);
+	P(IRQ_NOREQUEST);
+	P(IRQ_NOAUTOEN);
+}
+
+#undef P
+
--- a/kernel/kernel/irq/manage.c
+++ b/kernel/kernel/irq/manage.c
--- a/kernel/kernel/irq/migration.c
+++ b/kernel/kernel/irq/migration.c
@@ -0,0 +1,68 @@
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
+
+void move_masked_irq(int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+		return;
+
+	/*
+	 * Paranoia: cpu-local interrupts shouldn't be calling in here anyway.
+	 */
+	if (CHECK_IRQ_PER_CPU(desc->status)) {
+		WARN_ON(1);
+		return;
+	}
+
+	desc->status &= ~IRQ_MOVE_PENDING;
+
+	if (unlikely(cpumask_empty(desc->pending_mask)))
+		return;
+
+	if (!desc->chip->set_affinity)
+		return;
+
+	assert_spin_locked(&desc->lock);
+
+	/*
+	 * If there was a valid mask to work with, please
+	 * do the disable, re-program, enable sequence.
+	 * This is *not* particularly important for level triggered
+	 * but in a edge trigger case, we might be setting rte
+	 * when an active trigger is comming in. This could
+	 * cause some ioapics to mal-function.
+	 * Being paranoid i guess!
+	 *
+	 * For correct operation this depends on the caller
+	 * masking the irqs.
+	 */
+	if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
+		   < nr_cpu_ids))
+		if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
+			cpumask_copy(desc->affinity, desc->pending_mask);
+			irq_set_thread_affinity(desc);
+		}
+
+	cpumask_clear(desc->pending_mask);
+}
+
+void move_native_irq(int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+		return;
+
+	if (unlikely(desc->status & IRQ_DISABLED))
+		return;
+
+	desc->chip->mask(irq);
+	move_masked_irq(irq);
+	desc->chip->unmask(irq);
+}
+
--- a/kernel/kernel/irq/numa_migrate.c
+++ b/kernel/kernel/irq/numa_migrate.c
@@ -0,0 +1,119 @@
+/*
+ * NUMA irq-desc migration code
+ *
+ * Migrate IRQ data structures (irq_desc, chip_data, etc.) over to
+ * the new "home node" of the IRQ.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+static void init_copy_kstat_irqs(struct irq_desc *old_desc,
+				 struct irq_desc *desc,
+				 int node, int nr)
+{
+	init_kstat_irqs(desc, node, nr);
+
+	if (desc->kstat_irqs != old_desc->kstat_irqs)
+		memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
+			 nr * sizeof(*desc->kstat_irqs));
+}
+
+static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	if (old_desc->kstat_irqs == desc->kstat_irqs)
+		return;
+
+	kfree(old_desc->kstat_irqs);
+	old_desc->kstat_irqs = NULL;
+}
+
+static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
+		 struct irq_desc *desc, int node)
+{
+	memcpy(desc, old_desc, sizeof(struct irq_desc));
+	if (!alloc_desc_masks(desc, node, false)) {
+		printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
+				"for migration.\n", irq);
+		return false;
+	}
+	spin_lock_init(&desc->lock);
+	desc->node = node;
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
+	init_copy_desc_masks(old_desc, desc);
+	arch_init_copy_chip_data(old_desc, desc, node);
+	return true;
+}
+
+static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	free_kstat_irqs(old_desc, desc);
+	free_desc_masks(old_desc, desc);
+	arch_free_chip_data(old_desc, desc);
+}
+
+static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
+						int node)
+{
+	struct irq_desc *desc;
+	unsigned int irq;
+	unsigned long flags;
+
+	irq = old_desc->irq;
+
+	spin_lock_irqsave(&sparse_irq_lock, flags);
+
+	/* We have to check it to avoid races with another CPU */
+	desc = irq_desc_ptrs[irq];
+
+	if (desc && old_desc != desc)
+		goto out_unlock;
+
+	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	if (!desc) {
+		printk(KERN_ERR "irq %d: can not get new irq_desc "
+				"for migration.\n", irq);
+		/* still use old one */
+		desc = old_desc;
+		goto out_unlock;
+	}
+	if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) {
+		/* still use old one */
+		kfree(desc);
+		desc = old_desc;
+		goto out_unlock;
+	}
+
+	irq_desc_ptrs[irq] = desc;
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+	/* free the old one */
+	free_one_irq_desc(old_desc, desc);
+	kfree(old_desc);
+
+	return desc;
+
+out_unlock:
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+	return desc;
+}
+
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
+{
+	/* those static or target node is -1, do not move them */
+	if (desc->irq < NR_IRQS_LEGACY || node == -1)
+		return desc;
+
+	if (desc->node != node)
+		desc = __real_move_irq_desc(desc, node);
+
+	return desc;
+}
+
--- a/kernel/kernel/irq/pm.c
+++ b/kernel/kernel/irq/pm.c
@@ -0,0 +1,76 @@
+/*
+ * linux/kernel/irq/pm.c
+ *
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file contains power management functions related to interrupts.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
+
+/**
+ * suspend_device_irqs - disable all currently enabled interrupt lines
+ *
+ * During system-wide suspend or hibernation device drivers need to be prevented
+ * from receiving interrupts and this function is provided for this purpose.
+ * It marks all interrupt lines in use, except for the timer ones, as disabled
+ * and sets the IRQ_SUSPENDED flag for each of them.
+ */
+void suspend_device_irqs(void)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	for_each_irq_desc(irq, desc) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&desc->lock, flags);
+		__disable_irq(desc, irq, true);
+		spin_unlock_irqrestore(&desc->lock, flags);
+	}
+
+	for_each_irq_desc(irq, desc)
+		if (desc->status & IRQ_SUSPENDED)
+			synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(suspend_device_irqs);
+
+/**
+ * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs()
+ *
+ * Enable all interrupt lines previously disabled by suspend_device_irqs() that
+ * have the IRQ_SUSPENDED flag set.
+ */
+void resume_device_irqs(void)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	for_each_irq_desc(irq, desc) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&desc->lock, flags);
+		__enable_irq(desc, irq, true);
+		spin_unlock_irqrestore(&desc->lock, flags);
+	}
+}
+EXPORT_SYMBOL_GPL(resume_device_irqs);
+
+/**
+ * check_wakeup_irqs - check if any wake-up interrupts are pending
+ */
+int check_wakeup_irqs(void)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	for_each_irq_desc(irq, desc)
+		if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING))
+			return -EBUSY;
+
+	return 0;
+}
--- a/kernel/kernel/irq/proc.c
+++ b/kernel/kernel/irq/proc.c
@@ -0,0 +1,272 @@
+/*
+ * linux/kernel/irq/proc.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the /proc/irq/ handling code.
+ */
+
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
+
+static struct proc_dir_entry *root_irq_dir;
+
+#ifdef CONFIG_SMP
+
+static int irq_affinity_proc_show(struct seq_file *m, void *v)
+{
+	struct irq_desc *desc = irq_to_desc((long)m->private);
+	const struct cpumask *mask = desc->affinity;
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	if (desc->status & IRQ_MOVE_PENDING)
+		mask = desc->pending_mask;
+#endif
+	seq_cpumask(m, mask);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+#ifndef is_affinity_mask_valid
+#define is_affinity_mask_valid(val) 1
+#endif
+
+int no_irq_affinity;
+static ssize_t irq_affinity_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
+	cpumask_var_t new_value;
+	int err;
+
+	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
+	    irq_balancing_disabled(irq))
+		return -EIO;
+
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = cpumask_parse_user(buffer, count, new_value);
+	if (err)
+		goto free_cpumask;
+
+	if (!is_affinity_mask_valid(new_value)) {
+		err = -EINVAL;
+		goto free_cpumask;
+	}
+
+	/*
+	 * Do not allow disabling IRQs completely - it's a too easy
+	 * way to make the system unusable accidentally :-) At least
+	 * one online CPU still has to be targeted.
+	 */
+	if (!cpumask_intersects(new_value, cpu_online_mask)) {
+		/* Special case for empty set - allow the architecture
+		   code to set default SMP affinity. */
+		err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+	} else {
+		irq_set_affinity(irq, new_value);
+		err = count;
+	}
+
+free_cpumask:
+	free_cpumask_var(new_value);
+	return err;
+}
+
+static int irq_affinity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations irq_affinity_proc_fops = {
+	.open		= irq_affinity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= irq_affinity_proc_write,
+};
+
+static int default_affinity_show(struct seq_file *m, void *v)
+{
+	seq_cpumask(m, irq_default_affinity);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static ssize_t default_affinity_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *ppos)
+{
+	cpumask_var_t new_value;
+	int err;
+
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = cpumask_parse_user(buffer, count, new_value);
+	if (err)
+		goto out;
+
+	if (!is_affinity_mask_valid(new_value)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Do not allow disabling IRQs completely - it's a too easy
+	 * way to make the system unusable accidentally :-) At least
+	 * one online CPU still has to be targeted.
+	 */
+	if (!cpumask_intersects(new_value, cpu_online_mask)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	cpumask_copy(irq_default_affinity, new_value);
+	err = count;
+
+out:
+	free_cpumask_var(new_value);
+	return err;
+}
+
+static int default_affinity_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, default_affinity_show, NULL);
+}
+
+static const struct file_operations default_affinity_proc_fops = {
+	.open		= default_affinity_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= default_affinity_write,
+};
+#endif
+
+static int irq_spurious_read(char *page, char **start, off_t off,
+				  int count, int *eof, void *data)
+{
+	struct irq_desc *desc = irq_to_desc((long) data);
+	return sprintf(page, "count %u\n"
+			     "unhandled %u\n"
+			     "last_unhandled %u ms\n",
+			desc->irq_count,
+			desc->irqs_unhandled,
+			jiffies_to_msecs(desc->last_unhandled));
+}
+
+#define MAX_NAMELEN 128
+
+static int name_unique(unsigned int irq, struct irqaction *new_action)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irqaction *action;
+	unsigned long flags;
+	int ret = 1;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	for (action = desc->action ; action; action = action->next) {
+		if ((action != new_action) && action->name &&
+				!strcmp(new_action->name, action->name)) {
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return ret;
+}
+
+void register_handler_proc(unsigned int irq, struct irqaction *action)
+{
+	char name [MAX_NAMELEN];
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (!desc->dir || action->dir || !action->name ||
+					!name_unique(irq, action))
+		return;
+
+	memset(name, 0, MAX_NAMELEN);
+	snprintf(name, MAX_NAMELEN, "%s", action->name);
+
+	/* create /proc/irq/1234/handler/ */
+	action->dir = proc_mkdir(name, desc->dir);
+}
+
+#undef MAX_NAMELEN
+
+#define MAX_NAMELEN 10
+
+void register_irq_proc(unsigned int irq, struct irq_desc *desc)
+{
+	char name [MAX_NAMELEN];
+	struct proc_dir_entry *entry;
+
+	if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir)
+		return;
+
+	memset(name, 0, MAX_NAMELEN);
+	sprintf(name, "%d", irq);
+
+	/* create /proc/irq/1234 */
+	desc->dir = proc_mkdir(name, root_irq_dir);
+
+#ifdef CONFIG_SMP
+	/* create /proc/irq/<irq>/smp_affinity */
+	proc_create_data("smp_affinity", 0600, desc->dir,
+			 &irq_affinity_proc_fops, (void *)(long)irq);
+#endif
+
+	entry = create_proc_entry("spurious", 0444, desc->dir);
+	if (entry) {
+		entry->data = (void *)(long)irq;
+		entry->read_proc = irq_spurious_read;
+	}
+}
+
+#undef MAX_NAMELEN
+
+void unregister_handler_proc(unsigned int irq, struct irqaction *action)
+{
+	if (action->dir) {
+		struct irq_desc *desc = irq_to_desc(irq);
+
+		remove_proc_entry(action->dir->name, desc->dir);
+	}
+}
+
+static void register_default_affinity_proc(void)
+{
+#ifdef CONFIG_SMP
+	proc_create("irq/default_smp_affinity", 0600, NULL,
+		    &default_affinity_proc_fops);
+#endif
+}
+
+void init_irq_proc(void)
+{
+	unsigned int irq;
+	struct irq_desc *desc;
+
+	/* create /proc/irq */
+	root_irq_dir = proc_mkdir("irq", NULL);
+	if (!root_irq_dir)
+		return;
+
+	register_default_affinity_proc();
+
+	/*
+	 * Create entries for all existing IRQs.
+	 */
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
+		register_irq_proc(irq, desc);
+	}
+}
+
--- a/kernel/kernel/irq/resend.c
+++ b/kernel/kernel/irq/resend.c
@@ -0,0 +1,81 @@
+/*
+ * linux/kernel/irq/resend.c
+ *
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006, Thomas Gleixner
+ *
+ * This file contains the IRQ-resend code
+ *
+ * If the interrupt is waiting to be processed, we try to re-run it.
+ * We can't directly run it from here since the caller might be in an
+ * interrupt-protected region. Not all irq controller chips can
+ * retrigger interrupts at the hardware level, so in those cases
+ * we allow the resending of IRQs via a tasklet.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
+
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+
+/* Bitmap to handle software resend of interrupts: */
+static DECLARE_BITMAP(irqs_resend, NR_IRQS);
+
+/*
+ * Run software resends of IRQ's
+ */
+static void resend_irqs(unsigned long arg)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	while (!bitmap_empty(irqs_resend, nr_irqs)) {
+		irq = find_first_bit(irqs_resend, nr_irqs);
+		clear_bit(irq, irqs_resend);
+		desc = irq_to_desc(irq);
+		local_irq_disable();
+		desc->handle_irq(irq, desc);
+		local_irq_enable();
+	}
+}
+
+/* Tasklet to handle resend: */
+static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
+
+#endif
+
+/*
+ * IRQ resend
+ *
+ * Is called with interrupts disabled and desc->lock held.
+ */
+void check_irq_resend(struct irq_desc *desc, unsigned int irq)
+{
+	unsigned int status = desc->status;
+
+	/*
+	 * Make sure the interrupt is enabled, before resending it:
+	 */
+	desc->chip->enable(irq);
+
+	/*
+	 * We do not resend level type interrupts. Level type
+	 * interrupts are resent by hardware when they are still
+	 * active.
+	 */
+	if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+		desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY;
+
+		if (!desc->chip->retrigger || !desc->chip->retrigger(irq)) {
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+			/* Set it pending and activate the softirq: */
+			set_bit(irq, irqs_resend);
+			tasklet_schedule(&resend_tasklet);
+#endif
+		}
+	}
+}
--- a/kernel/kernel/irq/spurious.c
+++ b/kernel/kernel/irq/spurious.c
@@ -0,0 +1,313 @@
+/*
+ * linux/kernel/irq/spurious.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains spurious interrupt handling.
+ */
+
+#include <linux/jiffies.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
+#include <linux/timer.h>
+
+static int irqfixup __read_mostly;
+
+#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
+static void poll_spurious_irqs(unsigned long dummy);
+static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+
+/*
+ * Recovery handler for misrouted interrupts.
+ */
+static int try_one_irq(int irq, struct irq_desc *desc)
+{
+	struct irqaction *action;
+	int ok = 0, work = 0;
+
+	spin_lock(&desc->lock);
+	/* Already running on another processor */
+	if (desc->status & IRQ_INPROGRESS) {
+		/*
+		 * Already running: If it is shared get the other
+		 * CPU to go looking for our mystery interrupt too
+		 */
+		if (desc->action && (desc->action->flags & IRQF_SHARED))
+			desc->status |= IRQ_PENDING;
+		spin_unlock(&desc->lock);
+		return ok;
+	}
+	/* Honour the normal IRQ locking */
+	desc->status |= IRQ_INPROGRESS;
+	action = desc->action;
+	spin_unlock(&desc->lock);
+
+	while (action) {
+		/* Only shared IRQ handlers are safe to call */
+		if (action->flags & IRQF_SHARED) {
+			if (action->handler(irq, action->dev_id) ==
+				IRQ_HANDLED)
+				ok = 1;
+		}
+		action = action->next;
+	}
+	local_irq_disable();
+	/* Now clean up the flags */
+	spin_lock(&desc->lock);
+	action = desc->action;
+
+	/*
+	 * While we were looking for a fixup someone queued a real
+	 * IRQ clashing with our walk:
+	 */
+	while ((desc->status & IRQ_PENDING) && action) {
+		/*
+		 * Perform real IRQ processing for the IRQ we deferred
+		 */
+		work = 1;
+		spin_unlock(&desc->lock);
+		handle_IRQ_event(irq, action);
+		spin_lock(&desc->lock);
+		desc->status &= ~IRQ_PENDING;
+	}
+	desc->status &= ~IRQ_INPROGRESS;
+	/*
+	 * If we did actual work for the real IRQ line we must let the
+	 * IRQ controller clean up too
+	 */
+	if (work && desc->chip && desc->chip->end)
+		desc->chip->end(irq);
+	spin_unlock(&desc->lock);
+
+	return ok;
+}
+
+static int misrouted_irq(int irq)
+{
+	struct irq_desc *desc;
+	int i, ok = 0;
+
+	for_each_irq_desc(i, desc) {
+		if (!i)
+			 continue;
+
+		if (i == irq)	/* Already tried */
+			continue;
+
+		if (try_one_irq(i, desc))
+			ok = 1;
+	}
+	/* So the caller can adjust the irq error counts */
+	return ok;
+}
+
+static void poll_all_shared_irqs(void)
+{
+	struct irq_desc *desc;
+	int i;
+
+	for_each_irq_desc(i, desc) {
+		unsigned int status;
+
+		if (!i)
+			 continue;
+
+		/* Racy but it doesn't matter */
+		status = desc->status;
+		barrier();
+		if (!(status & IRQ_SPURIOUS_DISABLED))
+			continue;
+
+		local_irq_disable();
+		try_one_irq(i, desc);
+		local_irq_enable();
+	}
+}
+
+static void poll_spurious_irqs(unsigned long dummy)
+{
+	poll_all_shared_irqs();
+
+	mod_timer(&poll_spurious_irq_timer,
+		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+}
+
+#ifdef CONFIG_DEBUG_SHIRQ
+void debug_poll_all_shared_irqs(void)
+{
+	poll_all_shared_irqs();
+}
+#endif
+
+/*
+ * If 99,900 of the previous 100,000 interrupts have not been handled
+ * then assume that the IRQ is stuck in some manner. Drop a diagnostic
+ * and try to turn the IRQ off.
+ *
+ * (The other 100-of-100,000 interrupts may have been a correctly
+ *  functioning device sharing an IRQ with the failing one)
+ *
+ * Called under desc->lock
+ */
+
+static void
+__report_bad_irq(unsigned int irq, struct irq_desc *desc,
+		 irqreturn_t action_ret)
+{
+	struct irqaction *action;
+
+	if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+		printk(KERN_ERR "irq event %d: bogus return value %x\n",
+				irq, action_ret);
+	} else {
+		printk(KERN_ERR "irq %d: nobody cared (try booting with "
+				"the \"irqpoll\" option)\n", irq);
+	}
+	dump_stack();
+	printk(KERN_ERR "handlers:\n");
+
+	action = desc->action;
+	while (action) {
+		printk(KERN_ERR "[<%p>]", action->handler);
+		print_symbol(" (%s)",
+			(unsigned long)action->handler);
+		printk("\n");
+		action = action->next;
+	}
+}
+
+static void
+report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
+{
+	static int count = 100;
+
+	if (count > 0) {
+		count--;
+		__report_bad_irq(irq, desc, action_ret);
+	}
+}
+
+static inline int
+try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
+		  irqreturn_t action_ret)
+{
+	struct irqaction *action;
+
+	if (!irqfixup)
+		return 0;
+
+	/* We didn't actually handle the IRQ - see if it was misrouted? */
+	if (action_ret == IRQ_NONE)
+		return 1;
+
+	/*
+	 * But for 'irqfixup == 2' we also do it for handled interrupts if
+	 * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
+	 * traditional PC timer interrupt.. Legacy)
+	 */
+	if (irqfixup < 2)
+		return 0;
+
+	if (!irq)
+		return 1;
+
+	/*
+	 * Since we don't get the descriptor lock, "action" can
+	 * change under us.  We don't really care, but we don't
+	 * want to follow a NULL pointer. So tell the compiler to
+	 * just load it once by using a barrier.
+	 */
+	action = desc->action;
+	barrier();
+	return action && (action->flags & IRQF_IRQPOLL);
+}
+
+void note_interrupt(unsigned int irq, struct irq_desc *desc,
+		    irqreturn_t action_ret)
+{
+	if (unlikely(action_ret != IRQ_HANDLED)) {
+		/*
+		 * If we are seeing only the odd spurious IRQ caused by
+		 * bus asynchronicity then don't eventually trigger an error,
+		 * otherwise the couter becomes a doomsday timer for otherwise
+		 * working systems
+		 */
+		if (time_after(jiffies, desc->last_unhandled + HZ/10))
+			desc->irqs_unhandled = 1;
+		else
+			desc->irqs_unhandled++;
+		desc->last_unhandled = jiffies;
+		if (unlikely(action_ret != IRQ_NONE))
+			report_bad_irq(irq, desc, action_ret);
+	}
+
+	if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
+		int ok = misrouted_irq(irq);
+		if (action_ret == IRQ_NONE)
+			desc->irqs_unhandled -= ok;
+	}
+
+	desc->irq_count++;
+	if (likely(desc->irq_count < 100000))
+		return;
+
+	desc->irq_count = 0;
+	if (unlikely(desc->irqs_unhandled > 99900)) {
+		/*
+		 * The interrupt is stuck
+		 */
+		__report_bad_irq(irq, desc, action_ret);
+		/*
+		 * Now kill the IRQ
+		 */
+		printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
+		desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
+		desc->depth++;
+		desc->chip->disable(irq);
+
+		mod_timer(&poll_spurious_irq_timer,
+			  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+	}
+	desc->irqs_unhandled = 0;
+}
+
+int noirqdebug __read_mostly;
+
+int noirqdebug_setup(char *str)
+{
+	noirqdebug = 1;
+	printk(KERN_INFO "IRQ lockup detection disabled\n");
+
+	return 1;
+}
+
+__setup("noirqdebug", noirqdebug_setup);
+module_param(noirqdebug, bool, 0644);
+MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
+
+static int __init irqfixup_setup(char *str)
+{
+	irqfixup = 1;
+	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+	printk(KERN_WARNING "This may impact system performance.\n");
+
+	return 1;
+}
+
+__setup("irqfixup", irqfixup_setup);
+module_param(irqfixup, int, 0644);
+
+static int __init irqpoll_setup(char *str)
+{
+	irqfixup = 2;
+	printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
+				"enabled\n");
+	printk(KERN_WARNING "This may significantly impact system "
+				"performance\n");
+	return 1;
+}
+
+__setup("irqpoll", irqpoll_setup);
--- a/kernel/kernel/itimer.c
+++ b/kernel/kernel/itimer.c
@@ -0,0 +1,295 @@
+/*
+ * linux/kernel/itimer.c
+ *
+ * Copyright (C) 1992 Darren Senn
+ */
+
+/* These are all the functions necessary to implement itimers */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/syscalls.h>
+#include <linux/time.h>
+#include <linux/posix-timers.h>
+#include <linux/hrtimer.h>
+#include <trace/events/timer.h>
+
+#include <asm/uaccess.h>
+
+/**
+ * itimer_get_remtime - get remaining time for the timer
+ *
+ * @timer: the timer to read
+ *
+ * Returns the delta between the expiry time and now, which can be
+ * less than zero or 1usec for an pending expired timer
+ */
+static struct timeval itimer_get_remtime(struct hrtimer *timer)
+{
+	ktime_t rem = hrtimer_get_remaining(timer);
+
+	/*
+	 * Racy but safe: if the itimer expires after the above
+	 * hrtimer_get_remtime() call but before this condition
+	 * then we return 0 - which is correct.
+	 */
+	if (hrtimer_active(timer)) {
+		if (rem.tv64 <= 0)
+			rem.tv64 = NSEC_PER_USEC;
+	} else
+		rem.tv64 = 0;
+
+	return ktime_to_timeval(rem);
+}
+
+static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+			   struct itimerval *const value)
+{
+	cputime_t cval, cinterval;
+	struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+	spin_lock_irq(&tsk->sighand->siglock);
+
+	cval = it->expires;
+	cinterval = it->incr;
+	if (!cputime_eq(cval, cputime_zero)) {
+		struct task_cputime cputime;
+		cputime_t t;
+
+		thread_group_cputimer(tsk, &cputime);
+		if (clock_id == CPUCLOCK_PROF)
+			t = cputime_add(cputime.utime, cputime.stime);
+		else
+			/* CPUCLOCK_VIRT */
+			t = cputime.utime;
+
+		if (cputime_le(cval, t))
+			/* about to fire */
+			cval = cputime_one_jiffy;
+		else
+			cval = cputime_sub(cval, t);
+	}
+
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	cputime_to_timeval(cval, &value->it_value);
+	cputime_to_timeval(cinterval, &value->it_interval);
+}
+
+int do_getitimer(int which, struct itimerval *value)
+{
+	struct task_struct *tsk = current;
+
+	switch (which) {
+	case ITIMER_REAL:
+		spin_lock_irq(&tsk->sighand->siglock);
+		value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
+		value->it_interval =
+			ktime_to_timeval(tsk->signal->it_real_incr);
+		spin_unlock_irq(&tsk->sighand->siglock);
+		break;
+	case ITIMER_VIRTUAL:
+		get_cpu_itimer(tsk, CPUCLOCK_VIRT, value);
+		break;
+	case ITIMER_PROF:
+		get_cpu_itimer(tsk, CPUCLOCK_PROF, value);
+		break;
+	default:
+		return(-EINVAL);
+	}
+	return 0;
+}
+
+SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value)
+{
+	int error = -EFAULT;
+	struct itimerval get_buffer;
+
+	if (value) {
+		error = do_getitimer(which, &get_buffer);
+		if (!error &&
+		    copy_to_user(value, &get_buffer, sizeof(get_buffer)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+
+/*
+ * The timer is automagically restarted, when interval != 0
+ */
+enum hrtimer_restart it_real_fn(struct hrtimer *timer)
+{
+	struct signal_struct *sig =
+		container_of(timer, struct signal_struct, real_timer);
+
+	trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
+	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
+
+	return HRTIMER_NORESTART;
+}
+
+static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
+{
+	struct timespec ts;
+	s64 cpu_ns;
+
+	cputime_to_timespec(ct, &ts);
+	cpu_ns = timespec_to_ns(&ts);
+
+	return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
+}
+
+static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+			   const struct itimerval *const value,
+			   struct itimerval *const ovalue)
+{
+	cputime_t cval, nval, cinterval, ninterval;
+	s64 ns_ninterval, ns_nval;
+	struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+	nval = timeval_to_cputime(&value->it_value);
+	ns_nval = timeval_to_ns(&value->it_value);
+	ninterval = timeval_to_cputime(&value->it_interval);
+	ns_ninterval = timeval_to_ns(&value->it_interval);
+
+	it->incr_error = cputime_sub_ns(ninterval, ns_ninterval);
+	it->error = cputime_sub_ns(nval, ns_nval);
+
+	spin_lock_irq(&tsk->sighand->siglock);
+
+	cval = it->expires;
+	cinterval = it->incr;
+	if (!cputime_eq(cval, cputime_zero) ||
+	    !cputime_eq(nval, cputime_zero)) {
+		if (cputime_gt(nval, cputime_zero))
+			nval = cputime_add(nval, cputime_one_jiffy);
+		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
+	}
+	it->expires = nval;
+	it->incr = ninterval;
+	trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
+			   ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
+
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (ovalue) {
+		cputime_to_timeval(cval, &ovalue->it_value);
+		cputime_to_timeval(cinterval, &ovalue->it_interval);
+	}
+}
+
+/*
+ * Returns true if the timeval is in canonical form
+ */
+#define timeval_valid(t) \
+	(((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
+
+int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
+{
+	struct task_struct *tsk = current;
+	struct hrtimer *timer;
+	ktime_t expires;
+
+	/*
+	 * Validate the timevals in value.
+	 */
+	if (!timeval_valid(&value->it_value) ||
+	    !timeval_valid(&value->it_interval))
+		return -EINVAL;
+
+	switch (which) {
+	case ITIMER_REAL:
+again:
+		spin_lock_irq(&tsk->sighand->siglock);
+		timer = &tsk->signal->real_timer;
+		if (ovalue) {
+			ovalue->it_value = itimer_get_remtime(timer);
+			ovalue->it_interval
+				= ktime_to_timeval(tsk->signal->it_real_incr);
+		}
+		/* We are sharing ->siglock with it_real_fn() */
+		if (hrtimer_try_to_cancel(timer) < 0) {
+			spin_unlock_irq(&tsk->sighand->siglock);
+			goto again;
+		}
+		expires = timeval_to_ktime(value->it_value);
+		if (expires.tv64 != 0) {
+			tsk->signal->it_real_incr =
+				timeval_to_ktime(value->it_interval);
+			hrtimer_start(timer, expires, HRTIMER_MODE_REL);
+		} else
+			tsk->signal->it_real_incr.tv64 = 0;
+
+		trace_itimer_state(ITIMER_REAL, value, 0);
+		spin_unlock_irq(&tsk->sighand->siglock);
+		break;
+	case ITIMER_VIRTUAL:
+		set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue);
+		break;
+	case ITIMER_PROF:
+		set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * alarm_setitimer - set alarm in seconds
+ *
+ * @seconds:	number of seconds until alarm
+ *		0 disables the alarm
+ *
+ * Returns the remaining time in seconds of a pending timer or 0 when
+ * the timer is not active.
+ *
+ * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
+ * negative timeval settings which would cause immediate expiry.
+ */
+unsigned int alarm_setitimer(unsigned int seconds)
+{
+	struct itimerval it_new, it_old;
+
+#if BITS_PER_LONG < 64
+	if (seconds > INT_MAX)
+		seconds = INT_MAX;
+#endif
+	it_new.it_value.tv_sec = seconds;
+	it_new.it_value.tv_usec = 0;
+	it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+
+	do_setitimer(ITIMER_REAL, &it_new, &it_old);
+
+	/*
+	 * We can't return 0 if we have an alarm pending ...  And we'd
+	 * better return too much than too little anyway
+	 */
+	if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
+	      it_old.it_value.tv_usec >= 500000)
+		it_old.it_value.tv_sec++;
+
+	return it_old.it_value.tv_sec;
+}
+
+SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
+		struct itimerval __user *, ovalue)
+{
+	struct itimerval set_buffer, get_buffer;
+	int error;
+
+	if (value) {
+		if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
+			return -EFAULT;
+	} else
+		memset((char *) &set_buffer, 0, sizeof(set_buffer));
+
+	error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
+	if (error || !ovalue)
+		return error;
+
+	if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
+		return -EFAULT;
+	return 0;
+}
--- a/kernel/kernel/kallsyms.c
+++ b/kernel/kernel/kallsyms.c
@@ -0,0 +1,530 @@
+/*
+ * kallsyms.c: in-kernel printing of symbolic oopses and stack traces.
+ *
+ * Rewritten and vastly simplified by Rusty Russell for in-kernel
+ * module loader:
+ *   Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ *
+ * ChangeLog:
+ *
+ * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
+ *      Changed the compression method from stem compression to "table lookup"
+ *      compression (see scripts/kallsyms.c for a more complete description)
+ */
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>	/* for cond_resched */
+#include <linux/mm.h>
+#include <linux/ctype.h>
+
+#include <asm/sections.h>
+
+#ifdef CONFIG_KALLSYMS_ALL
+#define all_var 1
+#else
+#define all_var 0
+#endif
+
+/*
+ * These will be re-linked against their real values
+ * during the second link stage.
+ */
+extern const unsigned long kallsyms_addresses[] __attribute__((weak));
+extern const u8 kallsyms_names[] __attribute__((weak));
+
+/*
+ * Tell the compiler that the count isn't in the small data section if the arch
+ * has one (eg: FRV).
+ */
+extern const unsigned long kallsyms_num_syms
+__attribute__((weak, section(".rodata")));
+
+extern const u8 kallsyms_token_table[] __attribute__((weak));
+extern const u16 kallsyms_token_index[] __attribute__((weak));
+
+extern const unsigned long kallsyms_markers[] __attribute__((weak));
+
+static inline int is_kernel_inittext(unsigned long addr)
+{
+	if (addr >= (unsigned long)_sinittext
+	    && addr <= (unsigned long)_einittext)
+		return 1;
+	return 0;
+}
+
+static inline int is_kernel_text(unsigned long addr)
+{
+	if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
+	    arch_is_kernel_text(addr))
+		return 1;
+	return in_gate_area_no_task(addr);
+}
+
+static inline int is_kernel(unsigned long addr)
+{
+	if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
+		return 1;
+	return in_gate_area_no_task(addr);
+}
+
+static int is_ksym_addr(unsigned long addr)
+{
+	if (all_var)
+		return is_kernel(addr);
+
+	return is_kernel_text(addr) || is_kernel_inittext(addr);
+}
+
+/*
+ * Expand a compressed symbol data into the resulting uncompressed string,
+ * given the offset to where the symbol is in the compressed stream.
+ */
+static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
+{
+	int len, skipped_first = 0;
+	const u8 *tptr, *data;
+
+	/* Get the compressed symbol length from the first symbol byte. */
+	data = &kallsyms_names[off];
+	len = *data;
+	data++;
+
+	/*
+	 * Update the offset to return the offset for the next symbol on
+	 * the compressed stream.
+	 */
+	off += len + 1;
+
+	/*
+	 * For every byte on the compressed symbol data, copy the table
+	 * entry for that byte.
+	 */
+	while (len) {
+		tptr = &kallsyms_token_table[kallsyms_token_index[*data]];
+		data++;
+		len--;
+
+		while (*tptr) {
+			if (skipped_first) {
+				*result = *tptr;
+				result++;
+			} else
+				skipped_first = 1;
+			tptr++;
+		}
+	}
+
+	*result = '\0';
+
+	/* Return to offset to the next symbol. */
+	return off;
+}
+
+/*
+ * Get symbol type information. This is encoded as a single char at the
+ * beginning of the symbol name.
+ */
+static char kallsyms_get_symbol_type(unsigned int off)
+{
+	/*
+	 * Get just the first code, look it up in the token table,
+	 * and return the first char from this token.
+	 */
+	return kallsyms_token_table[kallsyms_token_index[kallsyms_names[off + 1]]];
+}
+
+
+/*
+ * Find the offset on the compressed stream given and index in the
+ * kallsyms array.
+ */
+static unsigned int get_symbol_offset(unsigned long pos)
+{
+	const u8 *name;
+	int i;
+
+	/*
+	 * Use the closest marker we have. We have markers every 256 positions,
+	 * so that should be close enough.
+	 */
+	name = &kallsyms_names[kallsyms_markers[pos >> 8]];
+
+	/*
+	 * Sequentially scan all the symbols up to the point we're searching
+	 * for. Every symbol is stored in a [<len>][<len> bytes of data] format,
+	 * so we just need to add the len to the current pointer for every
+	 * symbol we wish to skip.
+	 */
+	for (i = 0; i < (pos & 0xFF); i++)
+		name = name + (*name) + 1;
+
+	return name - kallsyms_names;
+}
+EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
+
+/* Lookup the address for this symbol. Returns 0 if not found. */
+unsigned long kallsyms_lookup_name(const char *name)
+{
+	char namebuf[KSYM_NAME_LEN];
+	unsigned long i;
+	unsigned int off;
+
+	for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
+		off = kallsyms_expand_symbol(off, namebuf);
+
+		if (strcmp(namebuf, name) == 0)
+			return kallsyms_addresses[i];
+	}
+	return module_kallsyms_lookup_name(name);
+}
+
+int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
+				      unsigned long),
+			    void *data)
+{
+	char namebuf[KSYM_NAME_LEN];
+	unsigned long i;
+	unsigned int off;
+	int ret;
+
+	for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
+		off = kallsyms_expand_symbol(off, namebuf);
+		ret = fn(data, namebuf, NULL, kallsyms_addresses[i]);
+		if (ret != 0)
+			return ret;
+	}
+	return module_kallsyms_on_each_symbol(fn, data);
+}
+EXPORT_SYMBOL_GPL(kallsyms_on_each_symbol);
+
+static unsigned long get_symbol_pos(unsigned long addr,
+				    unsigned long *symbolsize,
+				    unsigned long *offset)
+{
+	unsigned long symbol_start = 0, symbol_end = 0;
+	unsigned long i, low, high, mid;
+
+	/* This kernel should never had been booted. */
+	BUG_ON(!kallsyms_addresses);
+
+	/* Do a binary search on the sorted kallsyms_addresses array. */
+	low = 0;
+	high = kallsyms_num_syms;
+
+	while (high - low > 1) {
+		mid = low + (high - low) / 2;
+		if (kallsyms_addresses[mid] <= addr)
+			low = mid;
+		else
+			high = mid;
+	}
+
+	/*
+	 * Search for the first aliased symbol. Aliased
+	 * symbols are symbols with the same address.
+	 */
+	while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low])
+		--low;
+
+	symbol_start = kallsyms_addresses[low];
+
+	/* Search for next non-aliased symbol. */
+	for (i = low + 1; i < kallsyms_num_syms; i++) {
+		if (kallsyms_addresses[i] > symbol_start) {
+			symbol_end = kallsyms_addresses[i];
+			break;
+		}
+	}
+
+	/* If we found no next symbol, we use the end of the section. */
+	if (!symbol_end) {
+		if (is_kernel_inittext(addr))
+			symbol_end = (unsigned long)_einittext;
+		else if (all_var)
+			symbol_end = (unsigned long)_end;
+		else
+			symbol_end = (unsigned long)_etext;
+	}
+
+	if (symbolsize)
+		*symbolsize = symbol_end - symbol_start;
+	if (offset)
+		*offset = addr - symbol_start;
+
+	return low;
+}
+
+/*
+ * Lookup an address but don't bother to find any names.
+ */
+int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
+				unsigned long *offset)
+{
+	char namebuf[KSYM_NAME_LEN];
+	if (is_ksym_addr(addr))
+		return !!get_symbol_pos(addr, symbolsize, offset);
+
+	return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf);
+}
+
+/*
+ * Lookup an address
+ * - modname is set to NULL if it's in the kernel.
+ * - We guarantee that the returned name is valid until we reschedule even if.
+ *   It resides in a module.
+ * - We also guarantee that modname will be valid until rescheduled.
+ */
+const char *kallsyms_lookup(unsigned long addr,
+			    unsigned long *symbolsize,
+			    unsigned long *offset,
+			    char **modname, char *namebuf)
+{
+	namebuf[KSYM_NAME_LEN - 1] = 0;
+	namebuf[0] = 0;
+
+	if (is_ksym_addr(addr)) {
+		unsigned long pos;
+
+		pos = get_symbol_pos(addr, symbolsize, offset);
+		/* Grab name */
+		kallsyms_expand_symbol(get_symbol_offset(pos), namebuf);
+		if (modname)
+			*modname = NULL;
+		return namebuf;
+	}
+
+	/* See if it's in a module. */
+	return module_address_lookup(addr, symbolsize, offset, modname,
+				     namebuf);
+}
+
+int lookup_symbol_name(unsigned long addr, char *symname)
+{
+	symname[0] = '\0';
+	symname[KSYM_NAME_LEN - 1] = '\0';
+
+	if (is_ksym_addr(addr)) {
+		unsigned long pos;
+
+		pos = get_symbol_pos(addr, NULL, NULL);
+		/* Grab name */
+		kallsyms_expand_symbol(get_symbol_offset(pos), symname);
+		return 0;
+	}
+	/* See if it's in a module. */
+	return lookup_module_symbol_name(addr, symname);
+}
+
+int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
+			unsigned long *offset, char *modname, char *name)
+{
+	name[0] = '\0';
+	name[KSYM_NAME_LEN - 1] = '\0';
+
+	if (is_ksym_addr(addr)) {
+		unsigned long pos;
+
+		pos = get_symbol_pos(addr, size, offset);
+		/* Grab name */
+		kallsyms_expand_symbol(get_symbol_offset(pos), name);
+		modname[0] = '\0';
+		return 0;
+	}
+	/* See if it's in a module. */
+	return lookup_module_symbol_attrs(addr, size, offset, modname, name);
+}
+
+/* Look up a kernel symbol and return it in a text buffer. */
+int sprint_symbol(char *buffer, unsigned long address)
+{
+	char *modname;
+	const char *name;
+	unsigned long offset, size;
+	int len;
+
+	name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
+	if (!name)
+		return sprintf(buffer, "0x%lx", address);
+
+	if (name != buffer)
+		strcpy(buffer, name);
+	len = strlen(buffer);
+	buffer += len;
+
+	if (modname)
+		len += sprintf(buffer, "+%#lx/%#lx [%s]",
+						offset, size, modname);
+	else
+		len += sprintf(buffer, "+%#lx/%#lx", offset, size);
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(sprint_symbol);
+
+/* Look up a kernel symbol and print it to the kernel messages. */
+void __print_symbol(const char *fmt, unsigned long address)
+{
+	char buffer[KSYM_SYMBOL_LEN];
+
+	sprint_symbol(buffer, address);
+
+	printk(fmt, buffer);
+}
+EXPORT_SYMBOL(__print_symbol);
+
+/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
+struct kallsym_iter {
+	loff_t pos;
+	unsigned long value;
+	unsigned int nameoff; /* If iterating in core kernel symbols. */
+	char type;
+	char name[KSYM_NAME_LEN];
+	char module_name[MODULE_NAME_LEN];
+	int exported;
+};
+
+static int get_ksymbol_mod(struct kallsym_iter *iter)
+{
+	if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value,
+				&iter->type, iter->name, iter->module_name,
+				&iter->exported) < 0)
+		return 0;
+	return 1;
+}
+
+/* Returns space to next name. */
+static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
+{
+	unsigned off = iter->nameoff;
+
+	iter->module_name[0] = '\0';
+	iter->value = kallsyms_addresses[iter->pos];
+
+	iter->type = kallsyms_get_symbol_type(off);
+
+	off = kallsyms_expand_symbol(off, iter->name);
+
+	return off - iter->nameoff;
+}
+
+static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
+{
+	iter->name[0] = '\0';
+	iter->nameoff = get_symbol_offset(new_pos);
+	iter->pos = new_pos;
+}
+
+/* Returns false if pos at or past end of file. */
+static int update_iter(struct kallsym_iter *iter, loff_t pos)
+{
+	/* Module symbols can be accessed randomly. */
+	if (pos >= kallsyms_num_syms) {
+		iter->pos = pos;
+		return get_ksymbol_mod(iter);
+	}
+
+	/* If we're not on the desired position, reset to new position. */
+	if (pos != iter->pos)
+		reset_iter(iter, pos);
+
+	iter->nameoff += get_ksymbol_core(iter);
+	iter->pos++;
+
+	return 1;
+}
+
+static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	(*pos)++;
+
+	if (!update_iter(m->private, *pos))
+		return NULL;
+	return p;
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+	if (!update_iter(m->private, *pos))
+		return NULL;
+	return m->private;
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+}
+
+static int s_show(struct seq_file *m, void *p)
+{
+	struct kallsym_iter *iter = m->private;
+
+	/* Some debugging symbols have no name.  Ignore them. */
+	if (!iter->name[0])
+		return 0;
+
+	if (iter->module_name[0]) {
+		char type;
+
+		/*
+		 * Label it "global" if it is exported,
+		 * "local" if not exported.
+		 */
+		type = iter->exported ? toupper(iter->type) :
+					tolower(iter->type);
+		seq_printf(m, "%0*lx %c %s\t[%s]\n",
+			   (int)(2 * sizeof(void *)),
+			   iter->value, type, iter->name, iter->module_name);
+	} else
+		seq_printf(m, "%0*lx %c %s\n",
+			   (int)(2 * sizeof(void *)),
+			   iter->value, iter->type, iter->name);
+	return 0;
+}
+
+static const struct seq_operations kallsyms_op = {
+	.start = s_start,
+	.next = s_next,
+	.stop = s_stop,
+	.show = s_show
+};
+
+static int kallsyms_open(struct inode *inode, struct file *file)
+{
+	/*
+	 * We keep iterator in m->private, since normal case is to
+	 * s_start from where we left off, so we avoid doing
+	 * using get_symbol_offset for every symbol.
+	 */
+	struct kallsym_iter *iter;
+	int ret;
+
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
+	reset_iter(iter, 0);
+
+	ret = seq_open(file, &kallsyms_op);
+	if (ret == 0)
+		((struct seq_file *)file->private_data)->private = iter;
+	else
+		kfree(iter);
+	return ret;
+}
+
+static const struct file_operations kallsyms_operations = {
+	.open = kallsyms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+static int __init kallsyms_init(void)
+{
+	proc_create("kallsyms", 0444, NULL, &kallsyms_operations);
+	return 0;
+}
+device_initcall(kallsyms_init);
--- a/kernel/kernel/kexec.c
+++ b/kernel/kernel/kexec.c
--- a/kernel/kernel/kfifo.c
+++ b/kernel/kernel/kfifo.c
@@ -0,0 +1,197 @@
+/*
+ * A simple kernel FIFO implementation.
+ *
+ * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/kfifo.h>
+#include <linux/log2.h>
+
+/**
+ * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * @buffer: the preallocated buffer to be used.
+ * @size: the size of the internal buffer, this have to be a power of 2.
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @lock: the lock to be used to protect the fifo buffer
+ *
+ * Do NOT pass the kfifo to kfifo_free() after use! Simply free the
+ * &struct kfifo with kfree().
+ */
+struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
+			 gfp_t gfp_mask, spinlock_t *lock)
+{
+	struct kfifo *fifo;
+
+	/* size must be a power of 2 */
+	BUG_ON(!is_power_of_2(size));
+
+	fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
+	if (!fifo)
+		return ERR_PTR(-ENOMEM);
+
+	fifo->buffer = buffer;
+	fifo->size = size;
+	fifo->in = fifo->out = 0;
+	fifo->lock = lock;
+
+	return fifo;
+}
+EXPORT_SYMBOL(kfifo_init);
+
+/**
+ * kfifo_alloc - allocates a new FIFO and its internal buffer
+ * @size: the size of the internal buffer to be allocated.
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @lock: the lock to be used to protect the fifo buffer
+ *
+ * The size will be rounded-up to a power of 2.
+ */
+struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
+{
+	unsigned char *buffer;
+	struct kfifo *ret;
+
+	/*
+	 * round up to the next power of 2, since our 'let the indices
+	 * wrap' technique works only in this case.
+	 */
+	if (!is_power_of_2(size)) {
+		BUG_ON(size > 0x80000000);
+		size = roundup_pow_of_two(size);
+	}
+
+	buffer = kmalloc(size, gfp_mask);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+
+	ret = kfifo_init(buffer, size, gfp_mask, lock);
+
+	if (IS_ERR(ret))
+		kfree(buffer);
+
+	return ret;
+}
+EXPORT_SYMBOL(kfifo_alloc);
+
+/**
+ * kfifo_free - frees the FIFO
+ * @fifo: the fifo to be freed.
+ */
+void kfifo_free(struct kfifo *fifo)
+{
+	kfree(fifo->buffer);
+	kfree(fifo);
+}
+EXPORT_SYMBOL(kfifo_free);
+
+/**
+ * __kfifo_put - puts some data into the FIFO, no locking version
+ * @fifo: the fifo to be used.
+ * @buffer: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @buffer into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int __kfifo_put(struct kfifo *fifo,
+			const unsigned char *buffer, unsigned int len)
+{
+	unsigned int l;
+
+	len = min(len, fifo->size - fifo->in + fifo->out);
+
+	/*
+	 * Ensure that we sample the fifo->out index -before- we
+	 * start putting bytes into the kfifo.
+	 */
+
+	smp_mb();
+
+	/* first put the data starting from fifo->in to buffer end */
+	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
+	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+
+	/* then put the rest (if any) at the beginning of the buffer */
+	memcpy(fifo->buffer, buffer + l, len - l);
+
+	/*
+	 * Ensure that we add the bytes to the kfifo -before-
+	 * we update the fifo->in index.
+	 */
+
+	smp_wmb();
+
+	fifo->in += len;
+
+	return len;
+}
+EXPORT_SYMBOL(__kfifo_put);
+
+/**
+ * __kfifo_get - gets some data from the FIFO, no locking version
+ * @fifo: the fifo to be used.
+ * @buffer: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int __kfifo_get(struct kfifo *fifo,
+			 unsigned char *buffer, unsigned int len)
+{
+	unsigned int l;
+
+	len = min(len, fifo->in - fifo->out);
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
+	memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+
+	/* then get the rest (if any) from the beginning of the buffer */
+	memcpy(buffer + l, fifo->buffer, len - l);
+
+	/*
+	 * Ensure that we remove the bytes from the kfifo -before-
+	 * we update the fifo->out index.
+	 */
+
+	smp_mb();
+
+	fifo->out += len;
+
+	return len;
+}
+EXPORT_SYMBOL(__kfifo_get);
--- a/kernel/kernel/kgdb.c
+++ b/kernel/kernel/kgdb.c
--- a/kernel/kernel/kmod.c
+++ b/kernel/kernel/kmod.c
@@ -0,0 +1,538 @@
+/*
+	kmod, the new module loader (replaces kerneld)
+	Kirk Petersen
+
+	Reorganized not to be a daemon by Adam Richter, with guidance
+	from Greg Zornetzer.
+
+	Modified to avoid chroot and file sharing problems.
+	Mikael Pettersson
+
+	Limit the concurrent number of kmod modprobes to catch loops from
+	"modprobe needs a service that is in a module".
+	Keith Owens <kaos@ocs.com.au> December 1999
+
+	Unblock all signals when we exec a usermode process.
+	Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000
+
+	call_usermodehelper wait flag, and remove exec_usermodehelper.
+	Rusty Russell <rusty@rustcorp.com.au>  Jan 2003
+*/
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/workqueue.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/resource.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+#include <asm/uaccess.h>
+
+#include <trace/events/module.h>
+
+extern int max_threads;
+
+static struct workqueue_struct *khelper_wq;
+
+#ifdef CONFIG_MODULES
+
+/*
+	modprobe_path is set via /proc/sys.
+*/
+char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
+
+/**
+ * __request_module - try to load a kernel module
+ * @wait: wait (or not) for the operation to complete
+ * @fmt: printf style format string for the name of the module
+ * @...: arguments as specified in the format string
+ *
+ * Load a module using the user mode module loader. The function returns
+ * zero on success or a negative errno code on failure. Note that a
+ * successful module load does not mean the module did not then unload
+ * and exit on an error of its own. Callers must check that the service
+ * they requested is now available not blindly invoke it.
+ *
+ * If module auto-loading support is disabled then this function
+ * becomes a no-operation.
+ */
+int __request_module(bool wait, const char *fmt, ...)
+{
+	va_list args;
+	char module_name[MODULE_NAME_LEN];
+	unsigned int max_modprobes;
+	int ret;
+	char *argv[] = { modprobe_path, "-q", "--", module_name, NULL };
+	static char *envp[] = { "HOME=/",
+				"TERM=linux",
+				"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+				NULL };
+	static atomic_t kmod_concurrent = ATOMIC_INIT(0);
+#define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
+	static int kmod_loop_msg;
+
+	ret = security_kernel_module_request();
+	if (ret)
+		return ret;
+
+	va_start(args, fmt);
+	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+	va_end(args);
+	if (ret >= MODULE_NAME_LEN)
+		return -ENAMETOOLONG;
+
+	/* If modprobe needs a service that is in a module, we get a recursive
+	 * loop.  Limit the number of running kmod threads to max_threads/2 or
+	 * MAX_KMOD_CONCURRENT, whichever is the smaller.  A cleaner method
+	 * would be to run the parents of this process, counting how many times
+	 * kmod was invoked.  That would mean accessing the internals of the
+	 * process tables to get the command line, proc_pid_cmdline is static
+	 * and it is not worth changing the proc code just to handle this case. 
+	 * KAO.
+	 *
+	 * "trace the ppid" is simple, but will fail if someone's
+	 * parent exits.  I think this is as good as it gets. --RR
+	 */
+	max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
+	atomic_inc(&kmod_concurrent);
+	if (atomic_read(&kmod_concurrent) > max_modprobes) {
+		/* We may be blaming an innocent here, but unlikely */
+		if (kmod_loop_msg++ < 5)
+			printk(KERN_ERR
+			       "request_module: runaway loop modprobe %s\n",
+			       module_name);
+		atomic_dec(&kmod_concurrent);
+		return -ENOMEM;
+	}
+
+	trace_module_request(module_name, wait, _RET_IP_);
+
+	ret = call_usermodehelper(modprobe_path, argv, envp,
+			wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
+	atomic_dec(&kmod_concurrent);
+	return ret;
+}
+EXPORT_SYMBOL(__request_module);
+#endif /* CONFIG_MODULES */
+
+struct subprocess_info {
+	struct work_struct work;
+	struct completion *complete;
+	struct cred *cred;
+	char *path;
+	char **argv;
+	char **envp;
+	enum umh_wait wait;
+	int retval;
+	struct file *stdin;
+	void (*cleanup)(char **argv, char **envp);
+};
+
+/*
+ * This is the task which runs the usermode application
+ */
+static int ____call_usermodehelper(void *data)
+{
+	struct subprocess_info *sub_info = data;
+	int retval;
+
+	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+
+	/* Unblock all signals */
+	spin_lock_irq(&current->sighand->siglock);
+	flush_signal_handlers(current, 1);
+	sigemptyset(&current->blocked);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	/* Install the credentials */
+	commit_creds(sub_info->cred);
+	sub_info->cred = NULL;
+
+	/* Install input pipe when needed */
+	if (sub_info->stdin) {
+		struct files_struct *f = current->files;
+		struct fdtable *fdt;
+		/* no races because files should be private here */
+		sys_close(0);
+		fd_install(0, sub_info->stdin);
+		spin_lock(&f->file_lock);
+		fdt = files_fdtable(f);
+		FD_SET(0, fdt->open_fds);
+		FD_CLR(0, fdt->close_on_exec);
+		spin_unlock(&f->file_lock);
+
+		/* and disallow core files too */
+		current->signal->rlim[RLIMIT_CORE] = (struct rlimit){0, 0};
+	}
+
+	/* We can run anywhere, unlike our parent keventd(). */
+	set_cpus_allowed_ptr(current, cpu_all_mask);
+
+	/*
+	 * Our parent is keventd, which runs with elevated scheduling priority.
+	 * Avoid propagating that into the userspace child.
+	 */
+	set_user_nice(current, 0);
+
+	retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
+
+	/* Exec failed? */
+	sub_info->retval = retval;
+	do_exit(0);
+}
+
+void call_usermodehelper_freeinfo(struct subprocess_info *info)
+{
+	if (info->cleanup)
+		(*info->cleanup)(info->argv, info->envp);
+	if (info->cred)
+		put_cred(info->cred);
+	kfree(info);
+}
+EXPORT_SYMBOL(call_usermodehelper_freeinfo);
+
+/* Keventd can't block, but this (a child) can. */
+static int wait_for_helper(void *data)
+{
+	struct subprocess_info *sub_info = data;
+	pid_t pid;
+
+	/* Install a handler: if SIGCLD isn't handled sys_wait4 won't
+	 * populate the status, but will return -ECHILD. */
+	allow_signal(SIGCHLD);
+
+	pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
+	if (pid < 0) {
+		sub_info->retval = pid;
+	} else {
+		int ret;
+
+		/*
+		 * Normally it is bogus to call wait4() from in-kernel because
+		 * wait4() wants to write the exit code to a userspace address.
+		 * But wait_for_helper() always runs as keventd, and put_user()
+		 * to a kernel address works OK for kernel threads, due to their
+		 * having an mm_segment_t which spans the entire address space.
+		 *
+		 * Thus the __user pointer cast is valid here.
+		 */
+		sys_wait4(pid, (int __user *)&ret, 0, NULL);
+
+		/*
+		 * If ret is 0, either ____call_usermodehelper failed and the
+		 * real error code is already in sub_info->retval or
+		 * sub_info->retval is 0 anyway, so don't mess with it then.
+		 */
+		if (ret)
+			sub_info->retval = ret;
+	}
+
+	if (sub_info->wait == UMH_NO_WAIT)
+		call_usermodehelper_freeinfo(sub_info);
+	else
+		complete(sub_info->complete);
+	return 0;
+}
+
+/* This is run by khelper thread  */
+static void __call_usermodehelper(struct work_struct *work)
+{
+	struct subprocess_info *sub_info =
+		container_of(work, struct subprocess_info, work);
+	pid_t pid;
+	enum umh_wait wait = sub_info->wait;
+
+	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+
+	/* CLONE_VFORK: wait until the usermode helper has execve'd
+	 * successfully We need the data structures to stay around
+	 * until that is done.  */
+	if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT)
+		pid = kernel_thread(wait_for_helper, sub_info,
+				    CLONE_FS | CLONE_FILES | SIGCHLD);
+	else
+		pid = kernel_thread(____call_usermodehelper, sub_info,
+				    CLONE_VFORK | SIGCHLD);
+
+	switch (wait) {
+	case UMH_NO_WAIT:
+		break;
+
+	case UMH_WAIT_PROC:
+		if (pid > 0)
+			break;
+		sub_info->retval = pid;
+		/* FALLTHROUGH */
+
+	case UMH_WAIT_EXEC:
+		complete(sub_info->complete);
+	}
+}
+
+#ifdef CONFIG_PM_SLEEP
+/*
+ * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
+ * (used for preventing user land processes from being created after the user
+ * land has been frozen during a system-wide hibernation or suspend operation).
+ */
+static int usermodehelper_disabled;
+
+/* Number of helpers running */
+static atomic_t running_helpers = ATOMIC_INIT(0);
+
+/*
+ * Wait queue head used by usermodehelper_pm_callback() to wait for all running
+ * helpers to finish.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
+
+/*
+ * Time to wait for running_helpers to become zero before the setting of
+ * usermodehelper_disabled in usermodehelper_pm_callback() fails
+ */
+#define RUNNING_HELPERS_TIMEOUT	(5 * HZ)
+
+/**
+ * usermodehelper_disable - prevent new helpers from being started
+ */
+int usermodehelper_disable(void)
+{
+	long retval;
+
+	usermodehelper_disabled = 1;
+	smp_mb();
+	/*
+	 * From now on call_usermodehelper_exec() won't start any new
+	 * helpers, so it is sufficient if running_helpers turns out to
+	 * be zero at one point (it may be increased later, but that
+	 * doesn't matter).
+	 */
+	retval = wait_event_timeout(running_helpers_waitq,
+					atomic_read(&running_helpers) == 0,
+					RUNNING_HELPERS_TIMEOUT);
+	if (retval)
+		return 0;
+
+	usermodehelper_disabled = 0;
+	return -EAGAIN;
+}
+
+/**
+ * usermodehelper_enable - allow new helpers to be started again
+ */
+void usermodehelper_enable(void)
+{
+	usermodehelper_disabled = 0;
+}
+
+static void helper_lock(void)
+{
+	atomic_inc(&running_helpers);
+	smp_mb__after_atomic_inc();
+}
+
+static void helper_unlock(void)
+{
+	if (atomic_dec_and_test(&running_helpers))
+		wake_up(&running_helpers_waitq);
+}
+#else /* CONFIG_PM_SLEEP */
+#define usermodehelper_disabled	0
+
+static inline void helper_lock(void) {}
+static inline void helper_unlock(void) {}
+#endif /* CONFIG_PM_SLEEP */
+
+/**
+ * call_usermodehelper_setup - prepare to call a usermode helper
+ * @path: path to usermode executable
+ * @argv: arg vector for process
+ * @envp: environment for process
+ * @gfp_mask: gfp mask for memory allocation
+ *
+ * Returns either %NULL on allocation failure, or a subprocess_info
+ * structure.  This should be passed to call_usermodehelper_exec to
+ * exec the process and free the structure.
+ */
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+						  char **envp, gfp_t gfp_mask)
+{
+	struct subprocess_info *sub_info;
+	sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
+	if (!sub_info)
+		goto out;
+
+	INIT_WORK(&sub_info->work, __call_usermodehelper);
+	sub_info->path = path;
+	sub_info->argv = argv;
+	sub_info->envp = envp;
+	sub_info->cred = prepare_usermodehelper_creds();
+	if (!sub_info->cred) {
+		kfree(sub_info);
+		return NULL;
+	}
+
+  out:
+	return sub_info;
+}
+EXPORT_SYMBOL(call_usermodehelper_setup);
+
+/**
+ * call_usermodehelper_setkeys - set the session keys for usermode helper
+ * @info: a subprocess_info returned by call_usermodehelper_setup
+ * @session_keyring: the session keyring for the process
+ */
+void call_usermodehelper_setkeys(struct subprocess_info *info,
+				 struct key *session_keyring)
+{
+#ifdef CONFIG_KEYS
+	struct thread_group_cred *tgcred = info->cred->tgcred;
+	key_put(tgcred->session_keyring);
+	tgcred->session_keyring = key_get(session_keyring);
+#else
+	BUG();
+#endif
+}
+EXPORT_SYMBOL(call_usermodehelper_setkeys);
+
+/**
+ * call_usermodehelper_setcleanup - set a cleanup function
+ * @info: a subprocess_info returned by call_usermodehelper_setup
+ * @cleanup: a cleanup function
+ *
+ * The cleanup function is just befor ethe subprocess_info is about to
+ * be freed.  This can be used for freeing the argv and envp.  The
+ * Function must be runnable in either a process context or the
+ * context in which call_usermodehelper_exec is called.
+ */
+void call_usermodehelper_setcleanup(struct subprocess_info *info,
+				    void (*cleanup)(char **argv, char **envp))
+{
+	info->cleanup = cleanup;
+}
+EXPORT_SYMBOL(call_usermodehelper_setcleanup);
+
+/**
+ * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin
+ * @sub_info: a subprocess_info returned by call_usermodehelper_setup
+ * @filp: set to the write-end of a pipe
+ *
+ * This constructs a pipe, and sets the read end to be the stdin of the
+ * subprocess, and returns the write-end in *@filp.
+ */
+int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
+				  struct file **filp)
+{
+	struct file *f;
+
+	f = create_write_pipe(0);
+	if (IS_ERR(f))
+		return PTR_ERR(f);
+	*filp = f;
+
+	f = create_read_pipe(f, 0);
+	if (IS_ERR(f)) {
+		free_write_pipe(*filp);
+		return PTR_ERR(f);
+	}
+	sub_info->stdin = f;
+
+	return 0;
+}
+EXPORT_SYMBOL(call_usermodehelper_stdinpipe);
+
+/**
+ * call_usermodehelper_exec - start a usermode application
+ * @sub_info: information about the subprocessa
+ * @wait: wait for the application to finish and return status.
+ *        when -1 don't wait at all, but you get no useful error back when
+ *        the program couldn't be exec'ed. This makes it safe to call
+ *        from interrupt context.
+ *
+ * Runs a user-space application.  The application is started
+ * asynchronously if wait is not set, and runs as a child of keventd.
+ * (ie. it runs with full root capabilities).
+ */
+int call_usermodehelper_exec(struct subprocess_info *sub_info,
+			     enum umh_wait wait)
+{
+	DECLARE_COMPLETION_ONSTACK(done);
+	int retval = 0;
+
+	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+	validate_creds(sub_info->cred);
+
+	helper_lock();
+	if (sub_info->path[0] == '\0')
+		goto out;
+
+	if (!khelper_wq || usermodehelper_disabled) {
+		retval = -EBUSY;
+		goto out;
+	}
+
+	sub_info->complete = &done;
+	sub_info->wait = wait;
+
+	queue_work(khelper_wq, &sub_info->work);
+	if (wait == UMH_NO_WAIT)	/* task has freed sub_info */
+		goto unlock;
+	wait_for_completion(&done);
+	retval = sub_info->retval;
+
+out:
+	call_usermodehelper_freeinfo(sub_info);
+unlock:
+	helper_unlock();
+	return retval;
+}
+EXPORT_SYMBOL(call_usermodehelper_exec);
+
+/**
+ * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin
+ * @path: path to usermode executable
+ * @argv: arg vector for process
+ * @envp: environment for process
+ * @filp: set to the write-end of a pipe
+ *
+ * This is a simple wrapper which executes a usermode-helper function
+ * with a pipe as stdin.  It is implemented entirely in terms of
+ * lower-level call_usermodehelper_* functions.
+ */
+int call_usermodehelper_pipe(char *path, char **argv, char **envp,
+			     struct file **filp)
+{
+	struct subprocess_info *sub_info;
+	int ret;
+
+	sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
+	if (sub_info == NULL)
+		return -ENOMEM;
+
+	ret = call_usermodehelper_stdinpipe(sub_info, filp);
+	if (ret < 0)
+		goto out;
+
+	return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+
+  out:
+	call_usermodehelper_freeinfo(sub_info);
+	return ret;
+}
+EXPORT_SYMBOL(call_usermodehelper_pipe);
+
+void __init usermodehelper_init(void)
+{
+	khelper_wq = create_singlethread_workqueue("khelper");
+	BUG_ON(!khelper_wq);
+}
--- a/kernel/kernel/kprobes.c
+++ b/kernel/kernel/kprobes.c
--- a/kernel/kernel/ksysfs.c
+++ b/kernel/kernel/ksysfs.c
@@ -0,0 +1,189 @@
+/*
+ * kernel/ksysfs.c - sysfs attributes in /sys/kernel, which
+ * 		     are not related to any other subsystem
+ *
+ * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
+ * 
+ * This file is release under the GPLv2
+ *
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kexec.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+
+#define KERNEL_ATTR_RO(_name) \
+static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+#define KERNEL_ATTR_RW(_name) \
+static struct kobj_attribute _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+#if defined(CONFIG_HOTPLUG)
+/* current uevent sequence number */
+static ssize_t uevent_seqnum_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%llu\n", (unsigned long long)uevent_seqnum);
+}
+KERNEL_ATTR_RO(uevent_seqnum);
+
+/* uevent helper program, used during early boo */
+static ssize_t uevent_helper_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", uevent_helper);
+}
+static ssize_t uevent_helper_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	if (count+1 > UEVENT_HELPER_PATH_LEN)
+		return -ENOENT;
+	memcpy(uevent_helper, buf, count);
+	uevent_helper[count] = '\0';
+	if (count && uevent_helper[count-1] == '\n')
+		uevent_helper[count-1] = '\0';
+	return count;
+}
+KERNEL_ATTR_RW(uevent_helper);
+#endif
+
+#ifdef CONFIG_PROFILING
+static ssize_t profiling_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", prof_on);
+}
+static ssize_t profiling_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int ret;
+
+	if (prof_on)
+		return -EEXIST;
+	/*
+	 * This eventually calls into get_option() which
+	 * has a ton of callers and is not const.  It is
+	 * easiest to cast it away here.
+	 */
+	profile_setup((char *)buf);
+	ret = profile_init();
+	if (ret)
+		return ret;
+	ret = create_proc_profile();
+	if (ret)
+		return ret;
+	return count;
+}
+KERNEL_ATTR_RW(profiling);
+#endif
+
+#ifdef CONFIG_KEXEC
+static ssize_t kexec_loaded_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", !!kexec_image);
+}
+KERNEL_ATTR_RO(kexec_loaded);
+
+static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", !!kexec_crash_image);
+}
+KERNEL_ATTR_RO(kexec_crash_loaded);
+
+static ssize_t vmcoreinfo_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lx %x\n",
+		       paddr_vmcoreinfo_note(),
+		       (unsigned int)vmcoreinfo_max_size);
+}
+KERNEL_ATTR_RO(vmcoreinfo);
+
+#endif /* CONFIG_KEXEC */
+
+/*
+ * Make /sys/kernel/notes give the raw contents of our kernel .notes section.
+ */
+extern const void __start_notes __attribute__((weak));
+extern const void __stop_notes __attribute__((weak));
+#define	notes_size (&__stop_notes - &__start_notes)
+
+static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+			  char *buf, loff_t off, size_t count)
+{
+	memcpy(buf, &__start_notes + off, count);
+	return count;
+}
+
+static struct bin_attribute notes_attr = {
+	.attr = {
+		.name = "notes",
+		.mode = S_IRUGO,
+	},
+	.read = &notes_read,
+};
+
+struct kobject *kernel_kobj;
+EXPORT_SYMBOL_GPL(kernel_kobj);
+
+static struct attribute * kernel_attrs[] = {
+#if defined(CONFIG_HOTPLUG)
+	&uevent_seqnum_attr.attr,
+	&uevent_helper_attr.attr,
+#endif
+#ifdef CONFIG_PROFILING
+	&profiling_attr.attr,
+#endif
+#ifdef CONFIG_KEXEC
+	&kexec_loaded_attr.attr,
+	&kexec_crash_loaded_attr.attr,
+	&vmcoreinfo_attr.attr,
+#endif
+	NULL
+};
+
+static struct attribute_group kernel_attr_group = {
+	.attrs = kernel_attrs,
+};
+
+static int __init ksysfs_init(void)
+{
+	int error;
+
+	kernel_kobj = kobject_create_and_add("kernel", NULL);
+	if (!kernel_kobj) {
+		error = -ENOMEM;
+		goto exit;
+	}
+	error = sysfs_create_group(kernel_kobj, &kernel_attr_group);
+	if (error)
+		goto kset_exit;
+
+	if (notes_size > 0) {
+		notes_attr.size = notes_size;
+		error = sysfs_create_bin_file(kernel_kobj, &notes_attr);
+		if (error)
+			goto group_exit;
+	}
+
+	return 0;
+
+group_exit:
+	sysfs_remove_group(kernel_kobj, &kernel_attr_group);
+kset_exit:
+	kobject_put(kernel_kobj);
+exit:
+	return error;
+}
+
+core_initcall(ksysfs_init);
--- a/kernel/kernel/kthread.c
+++ b/kernel/kernel/kthread.c
@@ -0,0 +1,226 @@
+/* Kernel thread helper functions.
+ *   Copyright (C) 2004 IBM Corporation, Rusty Russell.
+ *
+ * Creation is done via kthreadd, so that we get a clean environment
+ * even if we're invoked from userspace (think modprobe, hotplug cpu,
+ * etc.).
+ */
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/cpuset.h>
+#include <linux/unistd.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <trace/events/sched.h>
+
+static DEFINE_SPINLOCK(kthread_create_lock);
+static LIST_HEAD(kthread_create_list);
+struct task_struct *kthreadd_task;
+
+struct kthread_create_info
+{
+	/* Information passed to kthread() from kthreadd. */
+	int (*threadfn)(void *data);
+	void *data;
+
+	/* Result passed back to kthread_create() from kthreadd. */
+	struct task_struct *result;
+	struct completion done;
+
+	struct list_head list;
+};
+
+struct kthread {
+	int should_stop;
+	struct completion exited;
+};
+
+#define to_kthread(tsk)	\
+	container_of((tsk)->vfork_done, struct kthread, exited)
+
+/**
+ * kthread_should_stop - should this kthread return now?
+ *
+ * When someone calls kthread_stop() on your kthread, it will be woken
+ * and this will return true.  You should then return, and your return
+ * value will be passed through to kthread_stop().
+ */
+int kthread_should_stop(void)
+{
+	return to_kthread(current)->should_stop;
+}
+EXPORT_SYMBOL(kthread_should_stop);
+
+static int kthread(void *_create)
+{
+	/* Copy data: it's on kthread's stack */
+	struct kthread_create_info *create = _create;
+	int (*threadfn)(void *data) = create->threadfn;
+	void *data = create->data;
+	struct kthread self;
+	int ret;
+
+	self.should_stop = 0;
+	init_completion(&self.exited);
+	current->vfork_done = &self.exited;
+
+	/* OK, tell user we're spawned, wait for stop or wakeup */
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	create->result = current;
+	complete(&create->done);
+	schedule();
+
+	ret = -EINTR;
+	if (!self.should_stop)
+		ret = threadfn(data);
+
+	/* we can't just return, we must preserve "self" on stack */
+	do_exit(ret);
+}
+
+static void create_kthread(struct kthread_create_info *create)
+{
+	int pid;
+
+	/* We want our own signal handler (we take no signals by default). */
+	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+	if (pid < 0) {
+		create->result = ERR_PTR(pid);
+		complete(&create->done);
+	}
+}
+
+/**
+ * kthread_create - create a kthread.
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @namefmt: printf-style name for the thread.
+ *
+ * Description: This helper function creates and names a kernel
+ * thread.  The thread will be stopped: use wake_up_process() to start
+ * it.  See also kthread_run(), kthread_create_on_cpu().
+ *
+ * When woken, the thread will run @threadfn() with @data as its
+ * argument. @threadfn() can either call do_exit() directly if it is a
+ * standalone thread for which noone will call kthread_stop(), or
+ * return when 'kthread_should_stop()' is true (which means
+ * kthread_stop() has been called).  The return value should be zero
+ * or a negative error number; it will be passed to kthread_stop().
+ *
+ * Returns a task_struct or ERR_PTR(-ENOMEM).
+ */
+struct task_struct *kthread_create(int (*threadfn)(void *data),
+				   void *data,
+				   const char namefmt[],
+				   ...)
+{
+	struct kthread_create_info create;
+
+	create.threadfn = threadfn;
+	create.data = data;
+	init_completion(&create.done);
+
+	spin_lock(&kthread_create_lock);
+	list_add_tail(&create.list, &kthread_create_list);
+	spin_unlock(&kthread_create_lock);
+
+	wake_up_process(kthreadd_task);
+	wait_for_completion(&create.done);
+
+	if (!IS_ERR(create.result)) {
+		struct sched_param param = { .sched_priority = 0 };
+		va_list args;
+
+		va_start(args, namefmt);
+		vsnprintf(create.result->comm, sizeof(create.result->comm),
+			  namefmt, args);
+		va_end(args);
+		/*
+		 * root may have changed our (kthreadd's) priority or CPU mask.
+		 * The kernel thread should not inherit these properties.
+		 */
+		sched_setscheduler_nocheck(create.result, SCHED_NORMAL, &param);
+		set_cpus_allowed_ptr(create.result, cpu_all_mask);
+	}
+	return create.result;
+}
+EXPORT_SYMBOL(kthread_create);
+
+/**
+ * kthread_stop - stop a thread created by kthread_create().
+ * @k: thread created by kthread_create().
+ *
+ * Sets kthread_should_stop() for @k to return true, wakes it, and
+ * waits for it to exit. This can also be called after kthread_create()
+ * instead of calling wake_up_process(): the thread will exit without
+ * calling threadfn().
+ *
+ * If threadfn() may call do_exit() itself, the caller must ensure
+ * task_struct can't go away.
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+ * was never called.
+ */
+int kthread_stop(struct task_struct *k)
+{
+	struct kthread *kthread;
+	int ret;
+
+	trace_sched_kthread_stop(k);
+	get_task_struct(k);
+
+	kthread = to_kthread(k);
+	barrier(); /* it might have exited */
+	if (k->vfork_done != NULL) {
+		kthread->should_stop = 1;
+		wake_up_process(k);
+		wait_for_completion(&kthread->exited);
+	}
+	ret = k->exit_code;
+
+	put_task_struct(k);
+	trace_sched_kthread_stop_ret(ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(kthread_stop);
+
+int kthreadd(void *unused)
+{
+	struct task_struct *tsk = current;
+
+	/* Setup a clean context for our children to inherit. */
+	set_task_comm(tsk, "kthreadd");
+	ignore_signals(tsk);
+	set_cpus_allowed_ptr(tsk, cpu_all_mask);
+	set_mems_allowed(node_states[N_HIGH_MEMORY]);
+
+	current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
+
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (list_empty(&kthread_create_list))
+			schedule();
+		__set_current_state(TASK_RUNNING);
+
+		spin_lock(&kthread_create_lock);
+		while (!list_empty(&kthread_create_list)) {
+			struct kthread_create_info *create;
+
+			create = list_entry(kthread_create_list.next,
+					    struct kthread_create_info, list);
+			list_del_init(&create->list);
+			spin_unlock(&kthread_create_lock);
+
+			create_kthread(create);
+
+			spin_lock(&kthread_create_lock);
+		}
+		spin_unlock(&kthread_create_lock);
+	}
+
+	return 0;
+}
--- a/kernel/kernel/latencytop.c
+++ b/kernel/kernel/latencytop.c
@@ -0,0 +1,297 @@
+/*
+ * latencytop.c: Latency display infrastructure
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/*
+ * CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
+ * used by the "latencytop" userspace tool. The latency that is tracked is not
+ * the 'traditional' interrupt latency (which is primarily caused by something
+ * else consuming CPU), but instead, it is the latency an application encounters
+ * because the kernel sleeps on its behalf for various reasons.
+ *
+ * This code tracks 2 levels of statistics:
+ * 1) System level latency
+ * 2) Per process latency
+ *
+ * The latency is stored in fixed sized data structures in an accumulated form;
+ * if the "same" latency cause is hit twice, this will be tracked as one entry
+ * in the data structure. Both the count, total accumulated latency and maximum
+ * latency are tracked in this data structure. When the fixed size structure is
+ * full, no new causes are tracked until the buffer is flushed by writing to
+ * the /proc file; the userspace tool does this on a regular basis.
+ *
+ * A latency cause is identified by a stringified backtrace at the point that
+ * the scheduler gets invoked. The userland tool will use this string to
+ * identify the cause of the latency in human readable form.
+ *
+ * The information is exported via /proc/latency_stats and /proc/<pid>/latency.
+ * These files look like this:
+ *
+ * Latency Top version : v0.1
+ * 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
+ * |    |    |    |
+ * |    |    |    +----> the stringified backtrace
+ * |    |    +---------> The maximum latency for this entry in microseconds
+ * |    +--------------> The accumulated latency for this entry (microseconds)
+ * +-------------------> The number of times this entry is hit
+ *
+ * (note: the average latency is the accumulated latency divided by the number
+ * of times)
+ */
+
+#include <linux/latencytop.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+
+static DEFINE_SPINLOCK(latency_lock);
+
+#define MAXLR 128
+static struct latency_record latency_record[MAXLR];
+
+int latencytop_enabled;
+
+void clear_all_latency_tracing(struct task_struct *p)
+{
+	unsigned long flags;
+
+	if (!latencytop_enabled)
+		return;
+
+	spin_lock_irqsave(&latency_lock, flags);
+	memset(&p->latency_record, 0, sizeof(p->latency_record));
+	p->latency_record_count = 0;
+	spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+static void clear_global_latency_tracing(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&latency_lock, flags);
+	memset(&latency_record, 0, sizeof(latency_record));
+	spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+static void __sched
+account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat)
+{
+	int firstnonnull = MAXLR + 1;
+	int i;
+
+	if (!latencytop_enabled)
+		return;
+
+	/* skip kernel threads for now */
+	if (!tsk->mm)
+		return;
+
+	for (i = 0; i < MAXLR; i++) {
+		int q, same = 1;
+
+		/* Nothing stored: */
+		if (!latency_record[i].backtrace[0]) {
+			if (firstnonnull > i)
+				firstnonnull = i;
+			continue;
+		}
+		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
+			unsigned long record = lat->backtrace[q];
+
+			if (latency_record[i].backtrace[q] != record) {
+				same = 0;
+				break;
+			}
+
+			/* 0 and ULONG_MAX entries mean end of backtrace: */
+			if (record == 0 || record == ULONG_MAX)
+				break;
+		}
+		if (same) {
+			latency_record[i].count++;
+			latency_record[i].time += lat->time;
+			if (lat->time > latency_record[i].max)
+				latency_record[i].max = lat->time;
+			return;
+		}
+	}
+
+	i = firstnonnull;
+	if (i >= MAXLR - 1)
+		return;
+
+	/* Allocted a new one: */
+	memcpy(&latency_record[i], lat, sizeof(struct latency_record));
+}
+
+/*
+ * Iterator to store a backtrace into a latency record entry
+ */
+static inline void store_stacktrace(struct task_struct *tsk,
+					struct latency_record *lat)
+{
+	struct stack_trace trace;
+
+	memset(&trace, 0, sizeof(trace));
+	trace.max_entries = LT_BACKTRACEDEPTH;
+	trace.entries = &lat->backtrace[0];
+	save_stack_trace_tsk(tsk, &trace);
+}
+
+/**
+ * __account_scheduler_latency - record an occured latency
+ * @tsk - the task struct of the task hitting the latency
+ * @usecs - the duration of the latency in microseconds
+ * @inter - 1 if the sleep was interruptible, 0 if uninterruptible
+ *
+ * This function is the main entry point for recording latency entries
+ * as called by the scheduler.
+ *
+ * This function has a few special cases to deal with normal 'non-latency'
+ * sleeps: specifically, interruptible sleep longer than 5 msec is skipped
+ * since this usually is caused by waiting for events via select() and co.
+ *
+ * Negative latencies (caused by time going backwards) are also explicitly
+ * skipped.
+ */
+void __sched
+__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
+{
+	unsigned long flags;
+	int i, q;
+	struct latency_record lat;
+
+	/* Long interruptible waits are generally user requested... */
+	if (inter && usecs > 5000)
+		return;
+
+	/* Negative sleeps are time going backwards */
+	/* Zero-time sleeps are non-interesting */
+	if (usecs <= 0)
+		return;
+
+	memset(&lat, 0, sizeof(lat));
+	lat.count = 1;
+	lat.time = usecs;
+	lat.max = usecs;
+	store_stacktrace(tsk, &lat);
+
+	spin_lock_irqsave(&latency_lock, flags);
+
+	account_global_scheduler_latency(tsk, &lat);
+
+	for (i = 0; i < tsk->latency_record_count; i++) {
+		struct latency_record *mylat;
+		int same = 1;
+
+		mylat = &tsk->latency_record[i];
+		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
+			unsigned long record = lat.backtrace[q];
+
+			if (mylat->backtrace[q] != record) {
+				same = 0;
+				break;
+			}
+
+			/* 0 and ULONG_MAX entries mean end of backtrace: */
+			if (record == 0 || record == ULONG_MAX)
+				break;
+		}
+		if (same) {
+			mylat->count++;
+			mylat->time += lat.time;
+			if (lat.time > mylat->max)
+				mylat->max = lat.time;
+			goto out_unlock;
+		}
+	}
+
+	/*
+	 * short term hack; if we're > 32 we stop; future we recycle:
+	 */
+	if (tsk->latency_record_count >= LT_SAVECOUNT)
+		goto out_unlock;
+
+	/* Allocated a new one: */
+	i = tsk->latency_record_count++;
+	memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
+
+out_unlock:
+	spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+static int lstats_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	seq_puts(m, "Latency Top version : v0.1\n");
+
+	for (i = 0; i < MAXLR; i++) {
+		if (latency_record[i].backtrace[0]) {
+			int q;
+			seq_printf(m, "%i %lu %lu ",
+				latency_record[i].count,
+				latency_record[i].time,
+				latency_record[i].max);
+			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
+				char sym[KSYM_SYMBOL_LEN];
+				char *c;
+				if (!latency_record[i].backtrace[q])
+					break;
+				if (latency_record[i].backtrace[q] == ULONG_MAX)
+					break;
+				sprint_symbol(sym, latency_record[i].backtrace[q]);
+				c = strchr(sym, '+');
+				if (c)
+					*c = 0;
+				seq_printf(m, "%s ", sym);
+			}
+			seq_printf(m, "\n");
+		}
+	}
+	return 0;
+}
+
+static ssize_t
+lstats_write(struct file *file, const char __user *buf, size_t count,
+	     loff_t *offs)
+{
+	clear_global_latency_tracing();
+
+	return count;
+}
+
+static int lstats_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, lstats_show, NULL);
+}
+
+static const struct file_operations lstats_fops = {
+	.open		= lstats_open,
+	.read		= seq_read,
+	.write		= lstats_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init init_lstats_procfs(void)
+{
+	proc_create("latency_stats", 0644, NULL, &lstats_fops);
+	return 0;
+}
+device_initcall(init_lstats_procfs);
--- a/kernel/kernel/lockdep.c
+++ b/kernel/kernel/lockdep.c
--- a/kernel/kernel/lockdep_internals.h
+++ b/kernel/kernel/lockdep_internals.h
@@ -0,0 +1,140 @@
+/*
+ * kernel/lockdep_internals.h
+ *
+ * Runtime locking correctness validator
+ *
+ * lockdep subsystem internal functions and variables.
+ */
+
+/*
+ * Lock-class usage-state bits:
+ */
+enum lock_usage_bit {
+#define LOCKDEP_STATE(__STATE)		\
+	LOCK_USED_IN_##__STATE,		\
+	LOCK_USED_IN_##__STATE##_READ,	\
+	LOCK_ENABLED_##__STATE,		\
+	LOCK_ENABLED_##__STATE##_READ,
+#include "lockdep_states.h"
+#undef LOCKDEP_STATE
+	LOCK_USED,
+	LOCK_USAGE_STATES
+};
+
+/*
+ * Usage-state bitmasks:
+ */
+#define __LOCKF(__STATE)	LOCKF_##__STATE = (1 << LOCK_##__STATE),
+
+enum {
+#define LOCKDEP_STATE(__STATE)						\
+	__LOCKF(USED_IN_##__STATE)					\
+	__LOCKF(USED_IN_##__STATE##_READ)				\
+	__LOCKF(ENABLED_##__STATE)					\
+	__LOCKF(ENABLED_##__STATE##_READ)
+#include "lockdep_states.h"
+#undef LOCKDEP_STATE
+	__LOCKF(USED)
+};
+
+#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
+#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
+
+#define LOCKF_ENABLED_IRQ_READ \
+		(LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
+#define LOCKF_USED_IN_IRQ_READ \
+		(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
+
+/*
+ * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
+ * we track.
+ *
+ * We use the per-lock dependency maps in two ways: we grow it by adding
+ * every to-be-taken lock to all currently held lock's own dependency
+ * table (if it's not there yet), and we check it for lock order
+ * conflicts and deadlocks.
+ */
+#define MAX_LOCKDEP_ENTRIES	16384UL
+
+#define MAX_LOCKDEP_CHAINS_BITS	15
+#define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
+
+/*
+ * Stack-trace: tightly packed array of stack backtrace
+ * addresses. Protected by the hash_lock.
+ */
+#define MAX_STACK_TRACE_ENTRIES	262144UL
+
+extern struct list_head all_lock_classes;
+extern struct lock_chain lock_chains[];
+
+#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2)
+
+extern void get_usage_chars(struct lock_class *class,
+			    char usage[LOCK_USAGE_CHARS]);
+
+extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
+
+struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i);
+
+extern unsigned long nr_lock_classes;
+extern unsigned long nr_list_entries;
+extern unsigned long nr_lock_chains;
+extern int nr_chain_hlocks;
+extern unsigned long nr_stack_trace_entries;
+
+extern unsigned int nr_hardirq_chains;
+extern unsigned int nr_softirq_chains;
+extern unsigned int nr_process_chains;
+extern unsigned int max_lockdep_depth;
+extern unsigned int max_recursion_depth;
+
+extern unsigned int max_bfs_queue_depth;
+
+#ifdef CONFIG_PROVE_LOCKING
+extern unsigned long lockdep_count_forward_deps(struct lock_class *);
+extern unsigned long lockdep_count_backward_deps(struct lock_class *);
+#else
+static inline unsigned long
+lockdep_count_forward_deps(struct lock_class *class)
+{
+	return 0;
+}
+static inline unsigned long
+lockdep_count_backward_deps(struct lock_class *class)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+/*
+ * Various lockdep statistics:
+ */
+extern atomic_t chain_lookup_hits;
+extern atomic_t chain_lookup_misses;
+extern atomic_t hardirqs_on_events;
+extern atomic_t hardirqs_off_events;
+extern atomic_t redundant_hardirqs_on;
+extern atomic_t redundant_hardirqs_off;
+extern atomic_t softirqs_on_events;
+extern atomic_t softirqs_off_events;
+extern atomic_t redundant_softirqs_on;
+extern atomic_t redundant_softirqs_off;
+extern atomic_t nr_unused_locks;
+extern atomic_t nr_cyclic_checks;
+extern atomic_t nr_cyclic_check_recursions;
+extern atomic_t nr_find_usage_forwards_checks;
+extern atomic_t nr_find_usage_forwards_recursions;
+extern atomic_t nr_find_usage_backwards_checks;
+extern atomic_t nr_find_usage_backwards_recursions;
+# define debug_atomic_inc(ptr)		atomic_inc(ptr)
+# define debug_atomic_dec(ptr)		atomic_dec(ptr)
+# define debug_atomic_read(ptr)		atomic_read(ptr)
+#else
+# define debug_atomic_inc(ptr)		do { } while (0)
+# define debug_atomic_dec(ptr)		do { } while (0)
+# define debug_atomic_read(ptr)		0
+#endif
--- a/kernel/kernel/lockdep_proc.c
+++ b/kernel/kernel/lockdep_proc.c
@@ -0,0 +1,691 @@
+/*
+ * kernel/lockdep_proc.c
+ *
+ * Runtime locking correctness validator
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Code for /proc/lockdep and /proc/lockdep_stats:
+ *
+ */
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/debug_locks.h>
+#include <linux/vmalloc.h>
+#include <linux/sort.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+
+#include "lockdep_internals.h"
+
+static void *l_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &all_lock_classes, pos);
+}
+
+static void *l_start(struct seq_file *m, loff_t *pos)
+{
+	return seq_list_start_head(&all_lock_classes, *pos);
+}
+
+static void l_stop(struct seq_file *m, void *v)
+{
+}
+
+static void print_name(struct seq_file *m, struct lock_class *class)
+{
+	char str[128];
+	const char *name = class->name;
+
+	if (!name) {
+		name = __get_key_name(class->key, str);
+		seq_printf(m, "%s", name);
+	} else{
+		seq_printf(m, "%s", name);
+		if (class->name_version > 1)
+			seq_printf(m, "#%d", class->name_version);
+		if (class->subclass)
+			seq_printf(m, "/%d", class->subclass);
+	}
+}
+
+static int l_show(struct seq_file *m, void *v)
+{
+	struct lock_class *class = list_entry(v, struct lock_class, lock_entry);
+	struct lock_list *entry;
+	char usage[LOCK_USAGE_CHARS];
+
+	if (v == &all_lock_classes) {
+		seq_printf(m, "all lock classes:\n");
+		return 0;
+	}
+
+	seq_printf(m, "%p", class->key);
+#ifdef CONFIG_DEBUG_LOCKDEP
+	seq_printf(m, " OPS:%8ld", class->ops);
+#endif
+#ifdef CONFIG_PROVE_LOCKING
+	seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));
+	seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
+#endif
+
+	get_usage_chars(class, usage);
+	seq_printf(m, " %s", usage);
+
+	seq_printf(m, ": ");
+	print_name(m, class);
+	seq_puts(m, "\n");
+
+	list_for_each_entry(entry, &class->locks_after, entry) {
+		if (entry->distance == 1) {
+			seq_printf(m, " -> [%p] ", entry->class->key);
+			print_name(m, entry->class);
+			seq_puts(m, "\n");
+		}
+	}
+	seq_puts(m, "\n");
+
+	return 0;
+}
+
+static const struct seq_operations lockdep_ops = {
+	.start	= l_start,
+	.next	= l_next,
+	.stop	= l_stop,
+	.show	= l_show,
+};
+
+static int lockdep_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &lockdep_ops);
+}
+
+static const struct file_operations proc_lockdep_operations = {
+	.open		= lockdep_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+#ifdef CONFIG_PROVE_LOCKING
+static void *lc_start(struct seq_file *m, loff_t *pos)
+{
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	if (*pos - 1 < nr_lock_chains)
+		return lock_chains + (*pos - 1);
+
+	return NULL;
+}
+
+static void *lc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return lc_start(m, pos);
+}
+
+static void lc_stop(struct seq_file *m, void *v)
+{
+}
+
+static int lc_show(struct seq_file *m, void *v)
+{
+	struct lock_chain *chain = v;
+	struct lock_class *class;
+	int i;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(m, "all lock chains:\n");
+		return 0;
+	}
+
+	seq_printf(m, "irq_context: %d\n", chain->irq_context);
+
+	for (i = 0; i < chain->depth; i++) {
+		class = lock_chain_get_class(chain, i);
+		if (!class->key)
+			continue;
+
+		seq_printf(m, "[%p] ", class->key);
+		print_name(m, class);
+		seq_puts(m, "\n");
+	}
+	seq_puts(m, "\n");
+
+	return 0;
+}
+
+static const struct seq_operations lockdep_chains_ops = {
+	.start	= lc_start,
+	.next	= lc_next,
+	.stop	= lc_stop,
+	.show	= lc_show,
+};
+
+static int lockdep_chains_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &lockdep_chains_ops);
+}
+
+static const struct file_operations proc_lockdep_chains_operations = {
+	.open		= lockdep_chains_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+#endif /* CONFIG_PROVE_LOCKING */
+
+static void lockdep_stats_debug_show(struct seq_file *m)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+	unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
+		     hi2 = debug_atomic_read(&hardirqs_off_events),
+		     hr1 = debug_atomic_read(&redundant_hardirqs_on),
+		     hr2 = debug_atomic_read(&redundant_hardirqs_off),
+		     si1 = debug_atomic_read(&softirqs_on_events),
+		     si2 = debug_atomic_read(&softirqs_off_events),
+		     sr1 = debug_atomic_read(&redundant_softirqs_on),
+		     sr2 = debug_atomic_read(&redundant_softirqs_off);
+
+	seq_printf(m, " chain lookup misses:           %11u\n",
+		debug_atomic_read(&chain_lookup_misses));
+	seq_printf(m, " chain lookup hits:             %11u\n",
+		debug_atomic_read(&chain_lookup_hits));
+	seq_printf(m, " cyclic checks:                 %11u\n",
+		debug_atomic_read(&nr_cyclic_checks));
+	seq_printf(m, " find-mask forwards checks:     %11u\n",
+		debug_atomic_read(&nr_find_usage_forwards_checks));
+	seq_printf(m, " find-mask backwards checks:    %11u\n",
+		debug_atomic_read(&nr_find_usage_backwards_checks));
+
+	seq_printf(m, " hardirq on events:             %11u\n", hi1);
+	seq_printf(m, " hardirq off events:            %11u\n", hi2);
+	seq_printf(m, " redundant hardirq ons:         %11u\n", hr1);
+	seq_printf(m, " redundant hardirq offs:        %11u\n", hr2);
+	seq_printf(m, " softirq on events:             %11u\n", si1);
+	seq_printf(m, " softirq off events:            %11u\n", si2);
+	seq_printf(m, " redundant softirq ons:         %11u\n", sr1);
+	seq_printf(m, " redundant softirq offs:        %11u\n", sr2);
+#endif
+}
+
+static int lockdep_stats_show(struct seq_file *m, void *v)
+{
+	struct lock_class *class;
+	unsigned long nr_unused = 0, nr_uncategorized = 0,
+		      nr_irq_safe = 0, nr_irq_unsafe = 0,
+		      nr_softirq_safe = 0, nr_softirq_unsafe = 0,
+		      nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
+		      nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
+		      nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
+		      nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
+		      sum_forward_deps = 0, factor = 0;
+
+	list_for_each_entry(class, &all_lock_classes, lock_entry) {
+
+		if (class->usage_mask == 0)
+			nr_unused++;
+		if (class->usage_mask == LOCKF_USED)
+			nr_uncategorized++;
+		if (class->usage_mask & LOCKF_USED_IN_IRQ)
+			nr_irq_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_IRQ)
+			nr_irq_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
+			nr_softirq_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ)
+			nr_softirq_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
+			nr_hardirq_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_HARDIRQ)
+			nr_hardirq_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
+			nr_irq_read_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_IRQ_READ)
+			nr_irq_read_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
+			nr_softirq_read_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ)
+			nr_softirq_read_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
+			nr_hardirq_read_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ)
+			nr_hardirq_read_unsafe++;
+
+#ifdef CONFIG_PROVE_LOCKING
+		sum_forward_deps += lockdep_count_forward_deps(class);
+#endif
+	}
+#ifdef CONFIG_DEBUG_LOCKDEP
+	DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
+#endif
+	seq_printf(m, " lock-classes:                  %11lu [max: %lu]\n",
+			nr_lock_classes, MAX_LOCKDEP_KEYS);
+	seq_printf(m, " direct dependencies:           %11lu [max: %lu]\n",
+			nr_list_entries, MAX_LOCKDEP_ENTRIES);
+	seq_printf(m, " indirect dependencies:         %11lu\n",
+			sum_forward_deps);
+
+	/*
+	 * Total number of dependencies:
+	 *
+	 * All irq-safe locks may nest inside irq-unsafe locks,
+	 * plus all the other known dependencies:
+	 */
+	seq_printf(m, " all direct dependencies:       %11lu\n",
+			nr_irq_unsafe * nr_irq_safe +
+			nr_hardirq_unsafe * nr_hardirq_safe +
+			nr_list_entries);
+
+	/*
+	 * Estimated factor between direct and indirect
+	 * dependencies:
+	 */
+	if (nr_list_entries)
+		factor = sum_forward_deps / nr_list_entries;
+
+#ifdef CONFIG_PROVE_LOCKING
+	seq_printf(m, " dependency chains:             %11lu [max: %lu]\n",
+			nr_lock_chains, MAX_LOCKDEP_CHAINS);
+	seq_printf(m, " dependency chain hlocks:       %11d [max: %lu]\n",
+			nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS);
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	seq_printf(m, " in-hardirq chains:             %11u\n",
+			nr_hardirq_chains);
+	seq_printf(m, " in-softirq chains:             %11u\n",
+			nr_softirq_chains);
+#endif
+	seq_printf(m, " in-process chains:             %11u\n",
+			nr_process_chains);
+	seq_printf(m, " stack-trace entries:           %11lu [max: %lu]\n",
+			nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
+	seq_printf(m, " combined max dependencies:     %11u\n",
+			(nr_hardirq_chains + 1) *
+			(nr_softirq_chains + 1) *
+			(nr_process_chains + 1)
+	);
+	seq_printf(m, " hardirq-safe locks:            %11lu\n",
+			nr_hardirq_safe);
+	seq_printf(m, " hardirq-unsafe locks:          %11lu\n",
+			nr_hardirq_unsafe);
+	seq_printf(m, " softirq-safe locks:            %11lu\n",
+			nr_softirq_safe);
+	seq_printf(m, " softirq-unsafe locks:          %11lu\n",
+			nr_softirq_unsafe);
+	seq_printf(m, " irq-safe locks:                %11lu\n",
+			nr_irq_safe);
+	seq_printf(m, " irq-unsafe locks:              %11lu\n",
+			nr_irq_unsafe);
+
+	seq_printf(m, " hardirq-read-safe locks:       %11lu\n",
+			nr_hardirq_read_safe);
+	seq_printf(m, " hardirq-read-unsafe locks:     %11lu\n",
+			nr_hardirq_read_unsafe);
+	seq_printf(m, " softirq-read-safe locks:       %11lu\n",
+			nr_softirq_read_safe);
+	seq_printf(m, " softirq-read-unsafe locks:     %11lu\n",
+			nr_softirq_read_unsafe);
+	seq_printf(m, " irq-read-safe locks:           %11lu\n",
+			nr_irq_read_safe);
+	seq_printf(m, " irq-read-unsafe locks:         %11lu\n",
+			nr_irq_read_unsafe);
+
+	seq_printf(m, " uncategorized locks:           %11lu\n",
+			nr_uncategorized);
+	seq_printf(m, " unused locks:                  %11lu\n",
+			nr_unused);
+	seq_printf(m, " max locking depth:             %11u\n",
+			max_lockdep_depth);
+#ifdef CONFIG_PROVE_LOCKING
+	seq_printf(m, " max bfs queue depth:           %11u\n",
+			max_bfs_queue_depth);
+#endif
+	lockdep_stats_debug_show(m);
+	seq_printf(m, " debug_locks:                   %11u\n",
+			debug_locks);
+
+	return 0;
+}
+
+static int lockdep_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lockdep_stats_show, NULL);
+}
+
+static const struct file_operations proc_lockdep_stats_operations = {
+	.open		= lockdep_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+#ifdef CONFIG_LOCK_STAT
+
+struct lock_stat_data {
+	struct lock_class *class;
+	struct lock_class_stats stats;
+};
+
+struct lock_stat_seq {
+	struct lock_stat_data *iter_end;
+	struct lock_stat_data stats[MAX_LOCKDEP_KEYS];
+};
+
+/*
+ * sort on absolute number of contentions
+ */
+static int lock_stat_cmp(const void *l, const void *r)
+{
+	const struct lock_stat_data *dl = l, *dr = r;
+	unsigned long nl, nr;
+
+	nl = dl->stats.read_waittime.nr + dl->stats.write_waittime.nr;
+	nr = dr->stats.read_waittime.nr + dr->stats.write_waittime.nr;
+
+	return nr - nl;
+}
+
+static void seq_line(struct seq_file *m, char c, int offset, int length)
+{
+	int i;
+
+	for (i = 0; i < offset; i++)
+		seq_puts(m, " ");
+	for (i = 0; i < length; i++)
+		seq_printf(m, "%c", c);
+	seq_puts(m, "\n");
+}
+
+static void snprint_time(char *buf, size_t bufsiz, s64 nr)
+{
+	s64 div;
+	s32 rem;
+
+	nr += 5; /* for display rounding */
+	div = div_s64_rem(nr, 1000, &rem);
+	snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
+}
+
+static void seq_time(struct seq_file *m, s64 time)
+{
+	char num[15];
+
+	snprint_time(num, sizeof(num), time);
+	seq_printf(m, " %14s", num);
+}
+
+static void seq_lock_time(struct seq_file *m, struct lock_time *lt)
+{
+	seq_printf(m, "%14lu", lt->nr);
+	seq_time(m, lt->min);
+	seq_time(m, lt->max);
+	seq_time(m, lt->total);
+}
+
+static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
+{
+	char name[39];
+	struct lock_class *class;
+	struct lock_class_stats *stats;
+	int i, namelen;
+
+	class = data->class;
+	stats = &data->stats;
+
+	namelen = 38;
+	if (class->name_version > 1)
+		namelen -= 2; /* XXX truncates versions > 9 */
+	if (class->subclass)
+		namelen -= 2;
+
+	if (!class->name) {
+		char str[KSYM_NAME_LEN];
+		const char *key_name;
+
+		key_name = __get_key_name(class->key, str);
+		snprintf(name, namelen, "%s", key_name);
+	} else {
+		snprintf(name, namelen, "%s", class->name);
+	}
+	namelen = strlen(name);
+	if (class->name_version > 1) {
+		snprintf(name+namelen, 3, "#%d", class->name_version);
+		namelen += 2;
+	}
+	if (class->subclass) {
+		snprintf(name+namelen, 3, "/%d", class->subclass);
+		namelen += 2;
+	}
+
+	if (stats->write_holdtime.nr) {
+		if (stats->read_holdtime.nr)
+			seq_printf(m, "%38s-W:", name);
+		else
+			seq_printf(m, "%40s:", name);
+
+		seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
+		seq_lock_time(m, &stats->write_waittime);
+		seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
+		seq_lock_time(m, &stats->write_holdtime);
+		seq_puts(m, "\n");
+	}
+
+	if (stats->read_holdtime.nr) {
+		seq_printf(m, "%38s-R:", name);
+		seq_printf(m, "%14lu ", stats->bounces[bounce_contended_read]);
+		seq_lock_time(m, &stats->read_waittime);
+		seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_read]);
+		seq_lock_time(m, &stats->read_holdtime);
+		seq_puts(m, "\n");
+	}
+
+	if (stats->read_waittime.nr + stats->write_waittime.nr == 0)
+		return;
+
+	if (stats->read_holdtime.nr)
+		namelen += 2;
+
+	for (i = 0; i < LOCKSTAT_POINTS; i++) {
+		char sym[KSYM_SYMBOL_LEN];
+		char ip[32];
+
+		if (class->contention_point[i] == 0)
+			break;
+
+		if (!i)
+			seq_line(m, '-', 40-namelen, namelen);
+
+		sprint_symbol(sym, class->contention_point[i]);
+		snprintf(ip, sizeof(ip), "[<%p>]",
+				(void *)class->contention_point[i]);
+		seq_printf(m, "%40s %14lu %29s %s\n", name,
+				stats->contention_point[i],
+				ip, sym);
+	}
+	for (i = 0; i < LOCKSTAT_POINTS; i++) {
+		char sym[KSYM_SYMBOL_LEN];
+		char ip[32];
+
+		if (class->contending_point[i] == 0)
+			break;
+
+		if (!i)
+			seq_line(m, '-', 40-namelen, namelen);
+
+		sprint_symbol(sym, class->contending_point[i]);
+		snprintf(ip, sizeof(ip), "[<%p>]",
+				(void *)class->contending_point[i]);
+		seq_printf(m, "%40s %14lu %29s %s\n", name,
+				stats->contending_point[i],
+				ip, sym);
+	}
+	if (i) {
+		seq_puts(m, "\n");
+		seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
+		seq_puts(m, "\n");
+	}
+}
+
+static void seq_header(struct seq_file *m)
+{
+	seq_printf(m, "lock_stat version 0.3\n");
+
+	if (unlikely(!debug_locks))
+		seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n");
+
+	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+	seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+			"%14s %14s\n",
+			"class name",
+			"con-bounces",
+			"contentions",
+			"waittime-min",
+			"waittime-max",
+			"waittime-total",
+			"acq-bounces",
+			"acquisitions",
+			"holdtime-min",
+			"holdtime-max",
+			"holdtime-total");
+	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+	seq_printf(m, "\n");
+}
+
+static void *ls_start(struct seq_file *m, loff_t *pos)
+{
+	struct lock_stat_seq *data = m->private;
+	struct lock_stat_data *iter;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	iter = data->stats + (*pos - 1);
+	if (iter >= data->iter_end)
+		iter = NULL;
+
+	return iter;
+}
+
+static void *ls_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return ls_start(m, pos);
+}
+
+static void ls_stop(struct seq_file *m, void *v)
+{
+}
+
+static int ls_show(struct seq_file *m, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_header(m);
+	else
+		seq_stats(m, v);
+
+	return 0;
+}
+
+static const struct seq_operations lockstat_ops = {
+	.start	= ls_start,
+	.next	= ls_next,
+	.stop	= ls_stop,
+	.show	= ls_show,
+};
+
+static int lock_stat_open(struct inode *inode, struct file *file)
+{
+	int res;
+	struct lock_class *class;
+	struct lock_stat_seq *data = vmalloc(sizeof(struct lock_stat_seq));
+
+	if (!data)
+		return -ENOMEM;
+
+	res = seq_open(file, &lockstat_ops);
+	if (!res) {
+		struct lock_stat_data *iter = data->stats;
+		struct seq_file *m = file->private_data;
+
+		list_for_each_entry(class, &all_lock_classes, lock_entry) {
+			iter->class = class;
+			iter->stats = lock_stats(class);
+			iter++;
+		}
+		data->iter_end = iter;
+
+		sort(data->stats, data->iter_end - data->stats,
+				sizeof(struct lock_stat_data),
+				lock_stat_cmp, NULL);
+
+		m->private = data;
+	} else
+		vfree(data);
+
+	return res;
+}
+
+static ssize_t lock_stat_write(struct file *file, const char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct lock_class *class;
+	char c;
+
+	if (count) {
+		if (get_user(c, buf))
+			return -EFAULT;
+
+		if (c != '0')
+			return count;
+
+		list_for_each_entry(class, &all_lock_classes, lock_entry)
+			clear_lock_stats(class);
+	}
+	return count;
+}
+
+static int lock_stat_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+
+	vfree(seq->private);
+	return seq_release(inode, file);
+}
+
+static const struct file_operations proc_lock_stat_operations = {
+	.open		= lock_stat_open,
+	.write		= lock_stat_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= lock_stat_release,
+};
+#endif /* CONFIG_LOCK_STAT */
+
+static int __init lockdep_proc_init(void)
+{
+	proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations);
+#ifdef CONFIG_PROVE_LOCKING
+	proc_create("lockdep_chains", S_IRUSR, NULL,
+		    &proc_lockdep_chains_operations);
+#endif
+	proc_create("lockdep_stats", S_IRUSR, NULL,
+		    &proc_lockdep_stats_operations);
+
+#ifdef CONFIG_LOCK_STAT
+	proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
+		    &proc_lock_stat_operations);
+#endif
+
+	return 0;
+}
+
+__initcall(lockdep_proc_init);
+
--- a/kernel/kernel/lockdep_states.h
+++ b/kernel/kernel/lockdep_states.h
@@ -0,0 +1,9 @@
+/*
+ * Lockdep states,
+ *
+ * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever
+ * you add one, or come up with a nice dynamic solution.
+ */
+LOCKDEP_STATE(HARDIRQ)
+LOCKDEP_STATE(SOFTIRQ)
+LOCKDEP_STATE(RECLAIM_FS)
--- a/kernel/kernel/module.c
+++ b/kernel/kernel/module.c
--- a/kernel/kernel/mutex-debug.c
+++ b/kernel/kernel/mutex-debug.c
@@ -0,0 +1,110 @@
+/*
+ * kernel/mutex-debug.c
+ *
+ * Debugging code for mutexes
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * lock debugging, locking tree, deadlock detection started by:
+ *
+ *  Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
+ *  Released under the General Public License (GPL).
+ */
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/poison.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
+
+#include "mutex-debug.h"
+
+/*
+ * Must be called with lock->wait_lock held.
+ */
+void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
+{
+	memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
+	waiter->magic = waiter;
+	INIT_LIST_HEAD(&waiter->list);
+}
+
+void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
+{
+	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
+	DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+	DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
+	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+}
+
+void debug_mutex_free_waiter(struct mutex_waiter *waiter)
+{
+	DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
+	memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
+}
+
+void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+			    struct thread_info *ti)
+{
+	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
+
+	/* Mark the current thread as blocked on the lock: */
+	ti->task->blocked_on = waiter;
+}
+
+void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+			 struct thread_info *ti)
+{
+	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+	DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
+	DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
+	ti->task->blocked_on = NULL;
+
+	list_del_init(&waiter->list);
+	waiter->task = NULL;
+}
+
+void debug_mutex_unlock(struct mutex *lock)
+{
+	if (unlikely(!debug_locks))
+		return;
+
+	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
+	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
+	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
+	mutex_clear_owner(lock);
+}
+
+void debug_mutex_init(struct mutex *lock, const char *name,
+		      struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
+	lock->magic = lock;
+}
+
+/***
+ * mutex_destroy - mark a mutex unusable
+ * @lock: the mutex to be destroyed
+ *
+ * This function marks the mutex uninitialized, and any subsequent
+ * use of the mutex is forbidden. The mutex must not be locked when
+ * this function is called.
+ */
+void mutex_destroy(struct mutex *lock)
+{
+	DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
+	lock->magic = NULL;
+}
+
+EXPORT_SYMBOL_GPL(mutex_destroy);
--- a/kernel/kernel/mutex-debug.h
+++ b/kernel/kernel/mutex-debug.h
@@ -0,0 +1,55 @@
+/*
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * This file contains mutex debugging related internal declarations,
+ * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case.
+ * More details are in kernel/mutex-debug.c.
+ */
+
+/*
+ * This must be called with lock->wait_lock held.
+ */
+extern void debug_mutex_lock_common(struct mutex *lock,
+				    struct mutex_waiter *waiter);
+extern void debug_mutex_wake_waiter(struct mutex *lock,
+				    struct mutex_waiter *waiter);
+extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
+extern void debug_mutex_add_waiter(struct mutex *lock,
+				   struct mutex_waiter *waiter,
+				   struct thread_info *ti);
+extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+				struct thread_info *ti);
+extern void debug_mutex_unlock(struct mutex *lock);
+extern void debug_mutex_init(struct mutex *lock, const char *name,
+			     struct lock_class_key *key);
+
+static inline void mutex_set_owner(struct mutex *lock)
+{
+	lock->owner = current_thread_info();
+}
+
+static inline void mutex_clear_owner(struct mutex *lock)
+{
+	lock->owner = NULL;
+}
+
+#define spin_lock_mutex(lock, flags)			\
+	do {						\
+		struct mutex *l = container_of(lock, struct mutex, wait_lock); \
+							\
+		DEBUG_LOCKS_WARN_ON(in_interrupt());	\
+		local_irq_save(flags);			\
+		__raw_spin_lock(&(lock)->raw_lock);	\
+		DEBUG_LOCKS_WARN_ON(l->magic != l);	\
+	} while (0)
+
+#define spin_unlock_mutex(lock, flags)			\
+	do {						\
+		__raw_spin_unlock(&(lock)->raw_lock);	\
+		local_irq_restore(flags);		\
+		preempt_check_resched();		\
+	} while (0)
--- a/kernel/kernel/mutex.c
+++ b/kernel/kernel/mutex.c
@@ -0,0 +1,507 @@
+/*
+ * kernel/mutex.c
+ *
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
+ * David Howells for suggestions and improvements.
+ *
+ *  - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
+ *    from the -rt tree, where it was originally implemented for rtmutexes
+ *    by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
+ *    and Sven Dietrich.
+ *
+ * Also see Documentation/mutex-design.txt.
+ */
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
+
+/*
+ * In the DEBUG case we are using the "NULL fastpath" for mutexes,
+ * which forces all calls into the slowpath:
+ */
+#ifdef CONFIG_DEBUG_MUTEXES
+# include "mutex-debug.h"
+# include <asm-generic/mutex-null.h>
+#else
+# include "mutex.h"
+# include <asm/mutex.h>
+#endif
+
+/***
+ * mutex_init - initialize the mutex
+ * @lock: the mutex to be initialized
+ * @key: the lock_class_key for the class; used by mutex lock debugging
+ *
+ * Initialize the mutex to unlocked state.
+ *
+ * It is not allowed to initialize an already locked mutex.
+ */
+void
+__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
+{
+	atomic_set(&lock->count, 1);
+	spin_lock_init(&lock->wait_lock);
+	INIT_LIST_HEAD(&lock->wait_list);
+	mutex_clear_owner(lock);
+
+	debug_mutex_init(lock, name, key);
+}
+
+EXPORT_SYMBOL(__mutex_init);
+
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * We split the mutex lock/unlock logic into separate fastpath and
+ * slowpath functions, to reduce the register pressure on the fastpath.
+ * We also put the fastpath first in the kernel image, to make sure the
+ * branch is predicted by the CPU as default-untaken.
+ */
+static __used noinline void __sched
+__mutex_lock_slowpath(atomic_t *lock_count);
+
+/***
+ * mutex_lock - acquire the mutex
+ * @lock: the mutex to be acquired
+ *
+ * Lock the mutex exclusively for this task. If the mutex is not
+ * available right now, it will sleep until it can get it.
+ *
+ * The mutex must later on be released by the same task that
+ * acquired it. Recursive locking is not allowed. The task
+ * may not exit without first unlocking the mutex. Also, kernel
+ * memory where the mutex resides mutex must not be freed with
+ * the mutex still locked. The mutex must first be initialized
+ * (or statically defined) before it can be locked. memset()-ing
+ * the mutex to 0 is not allowed.
+ *
+ * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
+ *   checks that will enforce the restrictions and will also do
+ *   deadlock debugging. )
+ *
+ * This function is similar to (but not equivalent to) down().
+ */
+void __sched mutex_lock(struct mutex *lock)
+{
+	might_sleep();
+	/*
+	 * The locking fastpath is the 1->0 transition from
+	 * 'unlocked' into 'locked' state.
+	 */
+	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+	mutex_set_owner(lock);
+}
+
+EXPORT_SYMBOL(mutex_lock);
+#endif
+
+static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
+
+/***
+ * mutex_unlock - release the mutex
+ * @lock: the mutex to be released
+ *
+ * Unlock a mutex that has been locked by this task previously.
+ *
+ * This function must not be used in interrupt context. Unlocking
+ * of a not locked mutex is not allowed.
+ *
+ * This function is similar to (but not equivalent to) up().
+ */
+void __sched mutex_unlock(struct mutex *lock)
+{
+	/*
+	 * The unlocking fastpath is the 0->1 transition from 'locked'
+	 * into 'unlocked' state:
+	 */
+#ifndef CONFIG_DEBUG_MUTEXES
+	/*
+	 * When debugging is enabled we must not clear the owner before time,
+	 * the slow path will always be taken, and that clears the owner field
+	 * after verifying that it was indeed current.
+	 */
+	mutex_clear_owner(lock);
+#endif
+	__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_unlock);
+
+/*
+ * Lock a mutex (possibly interruptible), slowpath:
+ */
+static inline int __sched
+__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+	       	unsigned long ip)
+{
+	struct task_struct *task = current;
+	struct mutex_waiter waiter;
+	unsigned long flags;
+
+	preempt_disable();
+	mutex_acquire(&lock->dep_map, subclass, 0, ip);
+#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \
+    !defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES)
+	/*
+	 * Optimistic spinning.
+	 *
+	 * We try to spin for acquisition when we find that there are no
+	 * pending waiters and the lock owner is currently running on a
+	 * (different) CPU.
+	 *
+	 * The rationale is that if the lock owner is running, it is likely to
+	 * release the lock soon.
+	 *
+	 * Since this needs the lock owner, and this mutex implementation
+	 * doesn't track the owner atomically in the lock field, we need to
+	 * track it non-atomically.
+	 *
+	 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
+	 * to serialize everything.
+	 */
+
+	for (;;) {
+		struct thread_info *owner;
+
+		/*
+		 * If we own the BKL, then don't spin. The owner of
+		 * the mutex might be waiting on us to release the BKL.
+		 */
+		if (unlikely(current->lock_depth >= 0))
+			break;
+
+		/*
+		 * If there's an owner, wait for it to either
+		 * release the lock or go to sleep.
+		 */
+		owner = ACCESS_ONCE(lock->owner);
+		if (owner && !mutex_spin_on_owner(lock, owner))
+			break;
+
+		if (atomic_cmpxchg(&lock->count, 1, 0) == 1) {
+			lock_acquired(&lock->dep_map, ip);
+			mutex_set_owner(lock);
+			preempt_enable();
+			return 0;
+		}
+
+		/*
+		 * When there's no owner, we might have preempted between the
+		 * owner acquiring the lock and setting the owner field. If
+		 * we're an RT task that will live-lock because we won't let
+		 * the owner complete.
+		 */
+		if (!owner && (need_resched() || rt_task(task)))
+			break;
+
+		/*
+		 * The cpu_relax() call is a compiler barrier which forces
+		 * everything in this loop to be re-loaded. We don't need
+		 * memory barriers as we'll eventually observe the right
+		 * values at the cost of a few extra spins.
+		 */
+		cpu_relax();
+	}
+#endif
+	spin_lock_mutex(&lock->wait_lock, flags);
+
+	debug_mutex_lock_common(lock, &waiter);
+	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+
+	/* add waiting tasks to the end of the waitqueue (FIFO): */
+	list_add_tail(&waiter.list, &lock->wait_list);
+	waiter.task = task;
+
+	if (atomic_xchg(&lock->count, -1) == 1)
+		goto done;
+
+	lock_contended(&lock->dep_map, ip);
+
+	for (;;) {
+		/*
+		 * Lets try to take the lock again - this is needed even if
+		 * we get here for the first time (shortly after failing to
+		 * acquire the lock), to make sure that we get a wakeup once
+		 * it's unlocked. Later on, if we sleep, this is the
+		 * operation that gives us the lock. We xchg it to -1, so
+		 * that when we release the lock, we properly wake up the
+		 * other waiters:
+		 */
+		if (atomic_xchg(&lock->count, -1) == 1)
+			break;
+
+		/*
+		 * got a signal? (This code gets eliminated in the
+		 * TASK_UNINTERRUPTIBLE case.)
+		 */
+		if (unlikely(signal_pending_state(state, task))) {
+			mutex_remove_waiter(lock, &waiter,
+					    task_thread_info(task));
+			mutex_release(&lock->dep_map, 1, ip);
+			spin_unlock_mutex(&lock->wait_lock, flags);
+
+			debug_mutex_free_waiter(&waiter);
+			preempt_enable();
+			return -EINTR;
+		}
+		__set_task_state(task, state);
+
+		/* didnt get the lock, go to sleep: */
+		spin_unlock_mutex(&lock->wait_lock, flags);
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+		spin_lock_mutex(&lock->wait_lock, flags);
+	}
+
+done:
+	lock_acquired(&lock->dep_map, ip);
+	/* got the lock - rejoice! */
+	mutex_remove_waiter(lock, &waiter, current_thread_info());
+	mutex_set_owner(lock);
+
+	/* set it to 0 if there are no waiters left: */
+	if (likely(list_empty(&lock->wait_list)))
+		atomic_set(&lock->count, 0);
+
+	spin_unlock_mutex(&lock->wait_lock, flags);
+
+	debug_mutex_free_waiter(&waiter);
+	preempt_enable();
+
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __sched
+mutex_lock_nested(struct mutex *lock, unsigned int subclass)
+{
+	might_sleep();
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
+}
+
+EXPORT_SYMBOL_GPL(mutex_lock_nested);
+
+int __sched
+mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
+{
+	might_sleep();
+	return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
+
+int __sched
+mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
+{
+	might_sleep();
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
+				   subclass, _RET_IP_);
+}
+
+EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
+#endif
+
+/*
+ * Release the lock, slowpath:
+ */
+static inline void
+__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+	unsigned long flags;
+
+	spin_lock_mutex(&lock->wait_lock, flags);
+	mutex_release(&lock->dep_map, nested, _RET_IP_);
+	debug_mutex_unlock(lock);
+
+	/*
+	 * some architectures leave the lock unlocked in the fastpath failure
+	 * case, others need to leave it locked. In the later case we have to
+	 * unlock it here
+	 */
+	if (__mutex_slowpath_needs_to_unlock())
+		atomic_set(&lock->count, 1);
+
+	if (!list_empty(&lock->wait_list)) {
+		/* get the first entry from the wait-list: */
+		struct mutex_waiter *waiter =
+				list_entry(lock->wait_list.next,
+					   struct mutex_waiter, list);
+
+		debug_mutex_wake_waiter(lock, waiter);
+
+		wake_up_process(waiter->task);
+	}
+
+	spin_unlock_mutex(&lock->wait_lock, flags);
+}
+
+/*
+ * Release the lock, slowpath:
+ */
+static __used noinline void
+__mutex_unlock_slowpath(atomic_t *lock_count)
+{
+	__mutex_unlock_common_slowpath(lock_count, 1);
+}
+
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * Here come the less common (and hence less performance-critical) APIs:
+ * mutex_lock_interruptible() and mutex_trylock().
+ */
+static noinline int __sched
+__mutex_lock_killable_slowpath(atomic_t *lock_count);
+
+static noinline int __sched
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
+
+/***
+ * mutex_lock_interruptible - acquire the mutex, interruptable
+ * @lock: the mutex to be acquired
+ *
+ * Lock the mutex like mutex_lock(), and return 0 if the mutex has
+ * been acquired or sleep until the mutex becomes available. If a
+ * signal arrives while waiting for the lock then this function
+ * returns -EINTR.
+ *
+ * This function is similar to (but not equivalent to) down_interruptible().
+ */
+int __sched mutex_lock_interruptible(struct mutex *lock)
+{
+	int ret;
+
+	might_sleep();
+	ret =  __mutex_fastpath_lock_retval
+			(&lock->count, __mutex_lock_interruptible_slowpath);
+	if (!ret)
+		mutex_set_owner(lock);
+
+	return ret;
+}
+
+EXPORT_SYMBOL(mutex_lock_interruptible);
+
+int __sched mutex_lock_killable(struct mutex *lock)
+{
+	int ret;
+
+	might_sleep();
+	ret = __mutex_fastpath_lock_retval
+			(&lock->count, __mutex_lock_killable_slowpath);
+	if (!ret)
+		mutex_set_owner(lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(mutex_lock_killable);
+
+static __used noinline void __sched
+__mutex_lock_slowpath(atomic_t *lock_count)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+}
+
+static noinline int __sched
+__mutex_lock_killable_slowpath(atomic_t *lock_count)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+	return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_);
+}
+
+static noinline int __sched
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
+}
+#endif
+
+/*
+ * Spinlock based trylock, we take the spinlock and check whether we
+ * can get the lock:
+ */
+static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+	unsigned long flags;
+	int prev;
+
+	spin_lock_mutex(&lock->wait_lock, flags);
+
+	prev = atomic_xchg(&lock->count, -1);
+	if (likely(prev == 1)) {
+		mutex_set_owner(lock);
+		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+	}
+
+	/* Set it back to 0 if there are no waiters: */
+	if (likely(list_empty(&lock->wait_list)))
+		atomic_set(&lock->count, 0);
+
+	spin_unlock_mutex(&lock->wait_lock, flags);
+
+	return prev == 1;
+}
+
+/***
+ * mutex_trylock - try acquire the mutex, without waiting
+ * @lock: the mutex to be acquired
+ *
+ * Try to acquire the mutex atomically. Returns 1 if the mutex
+ * has been acquired successfully, and 0 on contention.
+ *
+ * NOTE: this function follows the spin_trylock() convention, so
+ * it is negated to the down_trylock() return values! Be careful
+ * about this when converting semaphore users to mutexes.
+ *
+ * This function must not be used in interrupt context. The
+ * mutex must be released by the same task that acquired it.
+ */
+int __sched mutex_trylock(struct mutex *lock)
+{
+	int ret;
+
+	ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
+	if (ret)
+		mutex_set_owner(lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(mutex_trylock);
+
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+	/* dec if we can't possibly hit 0 */
+	if (atomic_add_unless(cnt, -1, 1))
+		return 0;
+	/* we might hit 0, so take the lock */
+	mutex_lock(lock);
+	if (!atomic_dec_and_test(cnt)) {
+		/* when we actually did the dec, we didn't hit 0 */
+		mutex_unlock(lock);
+		return 0;
+	}
+	/* we hit 0, and we hold the lock */
+	return 1;
+}
+EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
--- a/kernel/kernel/mutex.h
+++ b/kernel/kernel/mutex.h
@@ -0,0 +1,48 @@
+/*
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * This file contains mutex debugging related internal prototypes, for the
+ * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
+ */
+
+#define spin_lock_mutex(lock, flags) \
+		do { spin_lock(lock); (void)(flags); } while (0)
+#define spin_unlock_mutex(lock, flags) \
+		do { spin_unlock(lock); (void)(flags); } while (0)
+#define mutex_remove_waiter(lock, waiter, ti) \
+		__list_del((waiter)->list.prev, (waiter)->list.next)
+
+#ifdef CONFIG_SMP
+static inline void mutex_set_owner(struct mutex *lock)
+{
+	lock->owner = current_thread_info();
+}
+
+static inline void mutex_clear_owner(struct mutex *lock)
+{
+	lock->owner = NULL;
+}
+#else
+static inline void mutex_set_owner(struct mutex *lock)
+{
+}
+
+static inline void mutex_clear_owner(struct mutex *lock)
+{
+}
+#endif
+
+#define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)
+#define debug_mutex_free_waiter(waiter)			do { } while (0)
+#define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)
+#define debug_mutex_unlock(lock)			do { } while (0)
+#define debug_mutex_init(lock, name, key)		do { } while (0)
+
+static inline void
+debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
+{
+}
--- a/kernel/kernel/notifier.c
+++ b/kernel/kernel/notifier.c
@@ -0,0 +1,586 @@
+#include <linux/kdebug.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/vmalloc.h>
+#include <linux/reboot.h>
+
+/*
+ *	Notifier list for kernel code which wants to be called
+ *	at shutdown. This is used to stop any idling DMA operations
+ *	and the like.
+ */
+BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
+
+/*
+ *	Notifier chain core routines.  The exported routines below
+ *	are layered on top of these, with appropriate locking added.
+ */
+
+static int notifier_chain_register(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if (n->priority > (*nl)->priority)
+			break;
+		nl = &((*nl)->next);
+	}
+	n->next = *nl;
+	rcu_assign_pointer(*nl, n);
+	return 0;
+}
+
+static int notifier_chain_cond_register(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if ((*nl) == n)
+			return 0;
+		if (n->priority > (*nl)->priority)
+			break;
+		nl = &((*nl)->next);
+	}
+	n->next = *nl;
+	rcu_assign_pointer(*nl, n);
+	return 0;
+}
+
+static int notifier_chain_unregister(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if ((*nl) == n) {
+			rcu_assign_pointer(*nl, n->next);
+			return 0;
+		}
+		nl = &((*nl)->next);
+	}
+	return -ENOENT;
+}
+
+/**
+ * notifier_call_chain - Informs the registered notifiers about an event.
+ *	@nl:		Pointer to head of the blocking notifier chain
+ *	@val:		Value passed unmodified to notifier function
+ *	@v:		Pointer passed unmodified to notifier function
+ *	@nr_to_call:	Number of notifier functions to be called. Don't care
+ *			value of this parameter is -1.
+ *	@nr_calls:	Records the number of notifications sent. Don't care
+ *			value of this field is NULL.
+ *	@returns:	notifier_call_chain returns the value returned by the
+ *			last notifier function called.
+ */
+static int __kprobes notifier_call_chain(struct notifier_block **nl,
+					unsigned long val, void *v,
+					int nr_to_call,	int *nr_calls)
+{
+	int ret = NOTIFY_DONE;
+	struct notifier_block *nb, *next_nb;
+
+	nb = rcu_dereference(*nl);
+
+	while (nb && nr_to_call) {
+		next_nb = rcu_dereference(nb->next);
+
+#ifdef CONFIG_DEBUG_NOTIFIERS
+		if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
+			WARN(1, "Invalid notifier called!");
+			nb = next_nb;
+			continue;
+		}
+#endif
+		ret = nb->notifier_call(nb, val, v);
+
+		if (nr_calls)
+			(*nr_calls)++;
+
+		if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
+			break;
+		nb = next_nb;
+		nr_to_call--;
+	}
+	return ret;
+}
+
+/*
+ *	Atomic notifier chain routines.  Registration and unregistration
+ *	use a spinlock, and call_chain is synchronized by RCU (no locks).
+ */
+
+/**
+ *	atomic_notifier_chain_register - Add notifier to an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to an atomic notifier chain.
+ *
+ *	Currently always returns zero.
+ */
+int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+		struct notifier_block *n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nh->lock, flags);
+	ret = notifier_chain_register(&nh->head, n);
+	spin_unlock_irqrestore(&nh->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
+
+/**
+ *	atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an atomic notifier chain.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
+		struct notifier_block *n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nh->lock, flags);
+	ret = notifier_chain_unregister(&nh->head, n);
+	spin_unlock_irqrestore(&nh->lock, flags);
+	synchronize_rcu();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
+
+/**
+ *	__atomic_notifier_call_chain - Call functions in an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See the comment for notifier_call_chain.
+ *	@nr_calls: See the comment for notifier_call_chain.
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in an atomic context, so they must not block.
+ *	This routine uses RCU to synchronize with changes to the chain.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+					unsigned long val, void *v,
+					int nr_to_call, int *nr_calls)
+{
+	int ret;
+
+	rcu_read_lock();
+	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
+
+int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+
+/*
+ *	Blocking notifier chain routines.  All access to the chain is
+ *	synchronized by an rwsem.
+ */
+
+/**
+ *	blocking_notifier_chain_register - Add notifier to a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a blocking notifier chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call down_write().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_register(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
+
+/**
+ *	blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a blocking notifier chain, only if not already
+ *	present in the chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_cond_register(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register);
+
+/**
+ *	blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from a blocking notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call down_write().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_unregister(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
+
+/**
+ *	__blocking_notifier_call_chain - Call functions in a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain.
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+				   unsigned long val, void *v,
+				   int nr_to_call, int *nr_calls)
+{
+	int ret = NOTIFY_DONE;
+
+	/*
+	 * We check the head outside the lock, but if this access is
+	 * racy then it does not matter what the result of the test
+	 * is, we re-check the list after having taken the lock anyway:
+	 */
+	if (rcu_dereference(nh->head)) {
+		down_read(&nh->rwsem);
+		ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
+					nr_calls);
+		up_read(&nh->rwsem);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
+
+int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
+
+/*
+ *	Raw notifier chain routines.  There is no protection;
+ *	the caller must provide it.  Use at your own risk!
+ */
+
+/**
+ *	raw_notifier_chain_register - Add notifier to a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a raw notifier chain.
+ *	All locking must be provided by the caller.
+ *
+ *	Currently always returns zero.
+ */
+int raw_notifier_chain_register(struct raw_notifier_head *nh,
+		struct notifier_block *n)
+{
+	return notifier_chain_register(&nh->head, n);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
+
+/**
+ *	raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from a raw notifier chain.
+ *	All locking must be provided by the caller.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
+		struct notifier_block *n)
+{
+	return notifier_chain_unregister(&nh->head, n);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
+
+/**
+ *	__raw_notifier_call_chain - Call functions in a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in an undefined context.
+ *	All locking must be provided by the caller.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __raw_notifier_call_chain(struct raw_notifier_head *nh,
+			      unsigned long val, void *v,
+			      int nr_to_call, int *nr_calls)
+{
+	return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+}
+EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
+
+int raw_notifier_call_chain(struct raw_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __raw_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
+
+/*
+ *	SRCU notifier chain routines.    Registration and unregistration
+ *	use a mutex, and call_chain is synchronized by SRCU (no locks).
+ */
+
+/**
+ *	srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to an SRCU notifier chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_register(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
+
+/**
+ *	srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an SRCU notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_unregister(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	synchronize_srcu(&nh->srcu);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
+
+/**
+ *	__srcu_notifier_call_chain - Call functions in an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+			       unsigned long val, void *v,
+			       int nr_to_call, int *nr_calls)
+{
+	int ret;
+	int idx;
+
+	idx = srcu_read_lock(&nh->srcu);
+	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+	srcu_read_unlock(&nh->srcu, idx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
+
+int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
+
+/**
+ *	srcu_init_notifier_head - Initialize an SRCU notifier head
+ *	@nh: Pointer to head of the srcu notifier chain
+ *
+ *	Unlike other sorts of notifier heads, SRCU notifier heads require
+ *	dynamic initialization.  Be sure to call this routine before
+ *	calling any of the other SRCU notifier routines for this head.
+ *
+ *	If an SRCU notifier head is deallocated, it must first be cleaned
+ *	up by calling srcu_cleanup_notifier_head().  Otherwise the head's
+ *	per-cpu data (used by the SRCU mechanism) will leak.
+ */
+void srcu_init_notifier_head(struct srcu_notifier_head *nh)
+{
+	mutex_init(&nh->mutex);
+	if (init_srcu_struct(&nh->srcu) < 0)
+		BUG();
+	nh->head = NULL;
+}
+EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
+
+/**
+ *	register_reboot_notifier - Register function to be called at reboot time
+ *	@nb: Info about notifier function to be called
+ *
+ *	Registers a function with the list of functions
+ *	to be called at reboot time.
+ *
+ *	Currently always returns zero, as blocking_notifier_chain_register()
+ *	always returns zero.
+ */
+int register_reboot_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(register_reboot_notifier);
+
+/**
+ *	unregister_reboot_notifier - Unregister previously registered reboot notifier
+ *	@nb: Hook to be unregistered
+ *
+ *	Unregisters a previously registered reboot
+ *	notifier function.
+ *
+ *	Returns zero on success, or %-ENOENT on failure.
+ */
+int unregister_reboot_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(unregister_reboot_notifier);
+
+static ATOMIC_NOTIFIER_HEAD(die_chain);
+
+int notrace notify_die(enum die_val val, const char *str,
+	       struct pt_regs *regs, long err, int trap, int sig)
+{
+	struct die_args args = {
+		.regs	= regs,
+		.str	= str,
+		.err	= err,
+		.trapnr	= trap,
+		.signr	= sig,
+
+	};
+	return atomic_notifier_call_chain(&die_chain, val, &args);
+}
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	vmalloc_sync_all();
+	return atomic_notifier_chain_register(&die_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&die_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_die_notifier);
--- a/kernel/kernel/ns_cgroup.c
+++ b/kernel/kernel/ns_cgroup.c
@@ -0,0 +1,110 @@
+/*
+ * ns_cgroup.c - namespace cgroup subsystem
+ *
+ * Copyright 2006, 2007 IBM Corp
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/nsproxy.h>
+
+struct ns_cgroup {
+	struct cgroup_subsys_state css;
+};
+
+struct cgroup_subsys ns_subsys;
+
+static inline struct ns_cgroup *cgroup_to_ns(
+		struct cgroup *cgroup)
+{
+	return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
+			    struct ns_cgroup, css);
+}
+
+int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
+{
+	char name[PROC_NUMBUF];
+
+	snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
+	return cgroup_clone(task, &ns_subsys, name);
+}
+
+/*
+ * Rules:
+ *   1. you can only enter a cgroup which is a descendant of your current
+ *     cgroup
+ *   2. you can only place another process into a cgroup if
+ *     a. you have CAP_SYS_ADMIN
+ *     b. your cgroup is an ancestor of task's destination cgroup
+ *       (hence either you are in the same cgroup as task, or in an
+ *        ancestor cgroup thereof)
+ */
+static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
+			 struct task_struct *task, bool threadgroup)
+{
+	if (current != task) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (!cgroup_is_descendant(new_cgroup, current))
+			return -EPERM;
+	}
+
+	if (!cgroup_is_descendant(new_cgroup, task))
+		return -EPERM;
+
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+			if (!cgroup_is_descendant(new_cgroup, c)) {
+				rcu_read_unlock();
+				return -EPERM;
+			}
+		}
+		rcu_read_unlock();
+	}
+
+	return 0;
+}
+
+/*
+ * Rules: you can only create a cgroup if
+ *     1. you are capable(CAP_SYS_ADMIN)
+ *     2. the target cgroup is a descendant of your own cgroup
+ */
+static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
+						struct cgroup *cgroup)
+{
+	struct ns_cgroup *ns_cgroup;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (!cgroup_is_descendant(cgroup, current))
+		return ERR_PTR(-EPERM);
+
+	ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
+	if (!ns_cgroup)
+		return ERR_PTR(-ENOMEM);
+	return &ns_cgroup->css;
+}
+
+static void ns_destroy(struct cgroup_subsys *ss,
+			struct cgroup *cgroup)
+{
+	struct ns_cgroup *ns_cgroup;
+
+	ns_cgroup = cgroup_to_ns(cgroup);
+	kfree(ns_cgroup);
+}
+
+struct cgroup_subsys ns_subsys = {
+	.name = "ns",
+	.can_attach = ns_can_attach,
+	.create = ns_create,
+	.destroy  = ns_destroy,
+	.subsys_id = ns_subsys_id,
+};
--- a/kernel/kernel/nsproxy.c
+++ b/kernel/kernel/nsproxy.c
@@ -0,0 +1,230 @@
+/*
+ *  Copyright (C) 2006 IBM Corporation
+ *
+ *  Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ *
+ *  Jun 2006 - namespaces support
+ *             OpenVZ, SWsoft Inc.
+ *             Pavel Emelianov <xemul@openvz.org>
+ */
+
+#include <linux/module.h>
+#include <linux/nsproxy.h>
+#include <linux/init_task.h>
+#include <linux/mnt_namespace.h>
+#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
+#include <net/net_namespace.h>
+#include <linux/ipc_namespace.h>
+
+static struct kmem_cache *nsproxy_cachep;
+
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+
+static inline struct nsproxy *create_nsproxy(void)
+{
+	struct nsproxy *nsproxy;
+
+	nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
+	if (nsproxy)
+		atomic_set(&nsproxy->count, 1);
+	return nsproxy;
+}
+
+/*
+ * Create new nsproxy and all of its the associated namespaces.
+ * Return the newly created nsproxy.  Do not attach this to the task,
+ * leave it to the caller to do proper locking and attach it to task.
+ */
+static struct nsproxy *create_new_namespaces(unsigned long flags,
+			struct task_struct *tsk, struct fs_struct *new_fs)
+{
+	struct nsproxy *new_nsp;
+	int err;
+
+	new_nsp = create_nsproxy();
+	if (!new_nsp)
+		return ERR_PTR(-ENOMEM);
+
+	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
+	if (IS_ERR(new_nsp->mnt_ns)) {
+		err = PTR_ERR(new_nsp->mnt_ns);
+		goto out_ns;
+	}
+
+	new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
+	if (IS_ERR(new_nsp->uts_ns)) {
+		err = PTR_ERR(new_nsp->uts_ns);
+		goto out_uts;
+	}
+
+	new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
+	if (IS_ERR(new_nsp->ipc_ns)) {
+		err = PTR_ERR(new_nsp->ipc_ns);
+		goto out_ipc;
+	}
+
+	new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
+	if (IS_ERR(new_nsp->pid_ns)) {
+		err = PTR_ERR(new_nsp->pid_ns);
+		goto out_pid;
+	}
+
+	new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
+	if (IS_ERR(new_nsp->net_ns)) {
+		err = PTR_ERR(new_nsp->net_ns);
+		goto out_net;
+	}
+
+	return new_nsp;
+
+out_net:
+	if (new_nsp->pid_ns)
+		put_pid_ns(new_nsp->pid_ns);
+out_pid:
+	if (new_nsp->ipc_ns)
+		put_ipc_ns(new_nsp->ipc_ns);
+out_ipc:
+	if (new_nsp->uts_ns)
+		put_uts_ns(new_nsp->uts_ns);
+out_uts:
+	if (new_nsp->mnt_ns)
+		put_mnt_ns(new_nsp->mnt_ns);
+out_ns:
+	kmem_cache_free(nsproxy_cachep, new_nsp);
+	return ERR_PTR(err);
+}
+
+/*
+ * called from clone.  This now handles copy for nsproxy and all
+ * namespaces therein.
+ */
+int copy_namespaces(unsigned long flags, struct task_struct *tsk)
+{
+	struct nsproxy *old_ns = tsk->nsproxy;
+	struct nsproxy *new_ns;
+	int err = 0;
+
+	if (!old_ns)
+		return 0;
+
+	get_nsproxy(old_ns);
+
+	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+				CLONE_NEWPID | CLONE_NEWNET)))
+		return 0;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	/*
+	 * CLONE_NEWIPC must detach from the undolist: after switching
+	 * to a new ipc namespace, the semaphore arrays from the old
+	 * namespace are unreachable.  In clone parlance, CLONE_SYSVSEM
+	 * means share undolist with parent, so we must forbid using
+	 * it along with CLONE_NEWIPC.
+	 */
+	if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	new_ns = create_new_namespaces(flags, tsk, tsk->fs);
+	if (IS_ERR(new_ns)) {
+		err = PTR_ERR(new_ns);
+		goto out;
+	}
+
+	tsk->nsproxy = new_ns;
+
+out:
+	put_nsproxy(old_ns);
+	return err;
+}
+
+void free_nsproxy(struct nsproxy *ns)
+{
+	if (ns->mnt_ns)
+		put_mnt_ns(ns->mnt_ns);
+	if (ns->uts_ns)
+		put_uts_ns(ns->uts_ns);
+	if (ns->ipc_ns)
+		put_ipc_ns(ns->ipc_ns);
+	if (ns->pid_ns)
+		put_pid_ns(ns->pid_ns);
+	put_net(ns->net_ns);
+	kmem_cache_free(nsproxy_cachep, ns);
+}
+
+/*
+ * Called from unshare. Unshare all the namespaces part of nsproxy.
+ * On success, returns the new nsproxy.
+ */
+int unshare_nsproxy_namespaces(unsigned long unshare_flags,
+		struct nsproxy **new_nsp, struct fs_struct *new_fs)
+{
+	int err = 0;
+
+	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+			       CLONE_NEWNET)))
+		return 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	*new_nsp = create_new_namespaces(unshare_flags, current,
+				new_fs ? new_fs : current->fs);
+	if (IS_ERR(*new_nsp)) {
+		err = PTR_ERR(*new_nsp);
+		goto out;
+	}
+
+	err = ns_cgroup_clone(current, task_pid(current));
+	if (err)
+		put_nsproxy(*new_nsp);
+
+out:
+	return err;
+}
+
+void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
+{
+	struct nsproxy *ns;
+
+	might_sleep();
+
+	ns = p->nsproxy;
+
+	rcu_assign_pointer(p->nsproxy, new);
+
+	if (ns && atomic_dec_and_test(&ns->count)) {
+		/*
+		 * wait for others to get what they want from this nsproxy.
+		 *
+		 * cannot release this nsproxy via the call_rcu() since
+		 * put_mnt_ns() will want to sleep
+		 */
+		synchronize_rcu();
+		free_nsproxy(ns);
+	}
+}
+
+void exit_task_namespaces(struct task_struct *p)
+{
+	switch_task_namespaces(p, NULL);
+}
+
+static int __init nsproxy_cache_init(void)
+{
+	nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
+	return 0;
+}
+
+module_init(nsproxy_cache_init);
--- a/kernel/kernel/panic.c
+++ b/kernel/kernel/panic.c
@@ -0,0 +1,403 @@
+/*
+ *  linux/kernel/panic.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * This function is used through-out the kernel (including mm and fs)
+ * to indicate a major problem.
+ */
+#include <linux/debug_locks.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/kexec.h>
+#include <linux/sched.h>
+#include <linux/sysrq.h>
+#include <linux/init.h>
+#include <linux/nmi.h>
+#include <linux/dmi.h>
+
+int panic_on_oops;
+static unsigned long tainted_mask;
+static int pause_on_oops;
+static int pause_on_oops_flag;
+static DEFINE_SPINLOCK(pause_on_oops_lock);
+
+int panic_timeout;
+
+ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
+
+EXPORT_SYMBOL(panic_notifier_list);
+
+static long no_blink(long time)
+{
+	return 0;
+}
+
+/* Returns how long it waited in ms */
+long (*panic_blink)(long time);
+EXPORT_SYMBOL(panic_blink);
+
+/**
+ *	panic - halt the system
+ *	@fmt: The text string to print
+ *
+ *	Display a message, then perform cleanups.
+ *
+ *	This function never returns.
+ */
+NORET_TYPE void panic(const char * fmt, ...)
+{
+	static char buf[1024];
+	va_list args;
+	long i;
+
+	/*
+	 * It's possible to come here directly from a panic-assertion and
+	 * not have preempt disabled. Some functions called from here want
+	 * preempt to be disabled. No point enabling it later though...
+	 */
+	preempt_disable();
+
+	bust_spinlocks(1);
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+	dump_stack();
+#endif
+
+	/*
+	 * If we have crashed and we have a crash kernel loaded let it handle
+	 * everything else.
+	 * Do we want to call this before we try to display a message?
+	 */
+	crash_kexec(NULL);
+
+	/*
+	 * Note smp_send_stop is the usual smp shutdown function, which
+	 * unfortunately means it may not be hardened to work in a panic
+	 * situation.
+	 */
+	smp_send_stop();
+
+	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+
+	bust_spinlocks(0);
+
+	if (!panic_blink)
+		panic_blink = no_blink;
+
+	if (panic_timeout > 0) {
+		/*
+		 * Delay timeout seconds before rebooting the machine.
+		 * We can't use the "normal" timers since we just panicked.
+		 */
+		printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout);
+
+		for (i = 0; i < panic_timeout*1000; ) {
+			touch_nmi_watchdog();
+			i += panic_blink(i);
+			mdelay(1);
+			i++;
+		}
+		/*
+		 * This will not be a clean reboot, with everything
+		 * shutting down.  But if there is a chance of
+		 * rebooting the system it will be rebooted.
+		 */
+		emergency_restart();
+	}
+#ifdef __sparc__
+	{
+		extern int stop_a_enabled;
+		/* Make sure the user can actually press Stop-A (L1-A) */
+		stop_a_enabled = 1;
+		printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
+	}
+#endif
+#if defined(CONFIG_S390)
+	{
+		unsigned long caller;
+
+		caller = (unsigned long)__builtin_return_address(0);
+		disabled_wait(caller);
+	}
+#endif
+	local_irq_enable();
+	for (i = 0; ; ) {
+		touch_softlockup_watchdog();
+		i += panic_blink(i);
+		mdelay(1);
+		i++;
+	}
+}
+
+EXPORT_SYMBOL(panic);
+
+
+struct tnt {
+	u8	bit;
+	char	true;
+	char	false;
+};
+
+static const struct tnt tnts[] = {
+	{ TAINT_PROPRIETARY_MODULE,	'P', 'G' },
+	{ TAINT_FORCED_MODULE,		'F', ' ' },
+	{ TAINT_UNSAFE_SMP,		'S', ' ' },
+	{ TAINT_FORCED_RMMOD,		'R', ' ' },
+	{ TAINT_MACHINE_CHECK,		'M', ' ' },
+	{ TAINT_BAD_PAGE,		'B', ' ' },
+	{ TAINT_USER,			'U', ' ' },
+	{ TAINT_DIE,			'D', ' ' },
+	{ TAINT_OVERRIDDEN_ACPI_TABLE,	'A', ' ' },
+	{ TAINT_WARN,			'W', ' ' },
+	{ TAINT_CRAP,			'C', ' ' },
+};
+
+/**
+ *	print_tainted - return a string to represent the kernel taint state.
+ *
+ *  'P' - Proprietary module has been loaded.
+ *  'F' - Module has been forcibly loaded.
+ *  'S' - SMP with CPUs not designed for SMP.
+ *  'R' - User forced a module unload.
+ *  'M' - System experienced a machine check exception.
+ *  'B' - System has hit bad_page.
+ *  'U' - Userspace-defined naughtiness.
+ *  'D' - Kernel has oopsed before
+ *  'A' - ACPI table overridden.
+ *  'W' - Taint on warning.
+ *  'C' - modules from drivers/staging are loaded.
+ *
+ *	The string is overwritten by the next call to print_tainted().
+ */
+const char *print_tainted(void)
+{
+	static char buf[ARRAY_SIZE(tnts) + sizeof("Tainted: ") + 1];
+
+	if (tainted_mask) {
+		char *s;
+		int i;
+
+		s = buf + sprintf(buf, "Tainted: ");
+		for (i = 0; i < ARRAY_SIZE(tnts); i++) {
+			const struct tnt *t = &tnts[i];
+			*s++ = test_bit(t->bit, &tainted_mask) ?
+					t->true : t->false;
+		}
+		*s = 0;
+	} else
+		snprintf(buf, sizeof(buf), "Not tainted");
+
+	return buf;
+}
+
+int test_taint(unsigned flag)
+{
+	return test_bit(flag, &tainted_mask);
+}
+EXPORT_SYMBOL(test_taint);
+
+unsigned long get_taint(void)
+{
+	return tainted_mask;
+}
+
+void add_taint(unsigned flag)
+{
+	/*
+	 * Can't trust the integrity of the kernel anymore.
+	 * We don't call directly debug_locks_off() because the issue
+	 * is not necessarily serious enough to set oops_in_progress to 1
+	 * Also we want to keep up lockdep for staging development and
+	 * post-warning case.
+	 */
+	if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off())
+		printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n");
+
+	set_bit(flag, &tainted_mask);
+}
+EXPORT_SYMBOL(add_taint);
+
+static void spin_msec(int msecs)
+{
+	int i;
+
+	for (i = 0; i < msecs; i++) {
+		touch_nmi_watchdog();
+		mdelay(1);
+	}
+}
+
+/*
+ * It just happens that oops_enter() and oops_exit() are identically
+ * implemented...
+ */
+static void do_oops_enter_exit(void)
+{
+	unsigned long flags;
+	static int spin_counter;
+
+	if (!pause_on_oops)
+		return;
+
+	spin_lock_irqsave(&pause_on_oops_lock, flags);
+	if (pause_on_oops_flag == 0) {
+		/* This CPU may now print the oops message */
+		pause_on_oops_flag = 1;
+	} else {
+		/* We need to stall this CPU */
+		if (!spin_counter) {
+			/* This CPU gets to do the counting */
+			spin_counter = pause_on_oops;
+			do {
+				spin_unlock(&pause_on_oops_lock);
+				spin_msec(MSEC_PER_SEC);
+				spin_lock(&pause_on_oops_lock);
+			} while (--spin_counter);
+			pause_on_oops_flag = 0;
+		} else {
+			/* This CPU waits for a different one */
+			while (spin_counter) {
+				spin_unlock(&pause_on_oops_lock);
+				spin_msec(1);
+				spin_lock(&pause_on_oops_lock);
+			}
+		}
+	}
+	spin_unlock_irqrestore(&pause_on_oops_lock, flags);
+}
+
+/*
+ * Return true if the calling CPU is allowed to print oops-related info.
+ * This is a bit racy..
+ */
+int oops_may_print(void)
+{
+	return pause_on_oops_flag == 0;
+}
+
+/*
+ * Called when the architecture enters its oops handler, before it prints
+ * anything.  If this is the first CPU to oops, and it's oopsing the first
+ * time then let it proceed.
+ *
+ * This is all enabled by the pause_on_oops kernel boot option.  We do all
+ * this to ensure that oopses don't scroll off the screen.  It has the
+ * side-effect of preventing later-oopsing CPUs from mucking up the display,
+ * too.
+ *
+ * It turns out that the CPU which is allowed to print ends up pausing for
+ * the right duration, whereas all the other CPUs pause for twice as long:
+ * once in oops_enter(), once in oops_exit().
+ */
+void oops_enter(void)
+{
+	tracing_off();
+	/* can't trust the integrity of the kernel anymore: */
+	debug_locks_off();
+	do_oops_enter_exit();
+}
+
+/*
+ * 64-bit random ID for oopses:
+ */
+static u64 oops_id;
+
+static int init_oops_id(void)
+{
+	if (!oops_id)
+		get_random_bytes(&oops_id, sizeof(oops_id));
+	else
+		oops_id++;
+
+	return 0;
+}
+late_initcall(init_oops_id);
+
+static void print_oops_end_marker(void)
+{
+	init_oops_id();
+	printk(KERN_WARNING "---[ end trace %016llx ]---\n",
+		(unsigned long long)oops_id);
+}
+
+/*
+ * Called when the architecture exits its oops handler, after printing
+ * everything.
+ */
+void oops_exit(void)
+{
+	do_oops_enter_exit();
+	print_oops_end_marker();
+}
+
+#ifdef WANT_WARN_ON_SLOWPATH
+struct slowpath_args {
+	const char *fmt;
+	va_list args;
+};
+
+static void warn_slowpath_common(const char *file, int line, void *caller, struct slowpath_args *args)
+{
+	const char *board;
+
+	printk(KERN_WARNING "------------[ cut here ]------------\n");
+	printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller);
+	board = dmi_get_system_info(DMI_PRODUCT_NAME);
+	if (board)
+		printk(KERN_WARNING "Hardware name: %s\n", board);
+
+	if (args)
+		vprintk(args->fmt, args->args);
+
+	print_modules();
+	dump_stack();
+	print_oops_end_marker();
+	add_taint(TAINT_WARN);
+}
+
+void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
+{
+	struct slowpath_args args;
+
+	args.fmt = fmt;
+	va_start(args.args, fmt);
+	warn_slowpath_common(file, line, __builtin_return_address(0), &args);
+	va_end(args.args);
+}
+EXPORT_SYMBOL(warn_slowpath_fmt);
+
+void warn_slowpath_null(const char *file, int line)
+{
+	warn_slowpath_common(file, line, __builtin_return_address(0), NULL);
+}
+EXPORT_SYMBOL(warn_slowpath_null);
+#endif
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+
+/*
+ * Called when gcc's -fstack-protector feature is used, and
+ * gcc detects corruption of the on-stack canary value
+ */
+void __stack_chk_fail(void)
+{
+	panic("stack-protector: Kernel stack is corrupted in: %p\n",
+		__builtin_return_address(0));
+}
+EXPORT_SYMBOL(__stack_chk_fail);
+
+#endif
+
+core_param(panic, panic_timeout, int, 0644);
+core_param(pause_on_oops, pause_on_oops, int, 0644);
--- a/kernel/kernel/params.c
+++ b/kernel/kernel/params.c
@@ -0,0 +1,797 @@
+/* Helpers for initial module or kernel cmdline parsing
+   Copyright (C) 2001 Rusty Russell.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt, a...)
+#endif
+
+static inline char dash2underscore(char c)
+{
+	if (c == '-')
+		return '_';
+	return c;
+}
+
+static inline int parameq(const char *input, const char *paramname)
+{
+	unsigned int i;
+	for (i = 0; dash2underscore(input[i]) == paramname[i]; i++)
+		if (input[i] == '\0')
+			return 1;
+	return 0;
+}
+
+static int parse_one(char *param,
+		     char *val,
+		     struct kernel_param *params, 
+		     unsigned num_params,
+		     int (*handle_unknown)(char *param, char *val))
+{
+	unsigned int i;
+
+	/* Find parameter */
+	for (i = 0; i < num_params; i++) {
+		if (parameq(param, params[i].name)) {
+			DEBUGP("They are equal!  Calling %p\n",
+			       params[i].set);
+			return params[i].set(val, &params[i]);
+		}
+	}
+
+	if (handle_unknown) {
+		DEBUGP("Unknown argument: calling %p\n", handle_unknown);
+		return handle_unknown(param, val);
+	}
+
+	DEBUGP("Unknown argument `%s'\n", param);
+	return -ENOENT;
+}
+
+/* You can use " around spaces, but can't escape ". */
+/* Hyphens and underscores equivalent in parameter names. */
+static char *next_arg(char *args, char **param, char **val)
+{
+	unsigned int i, equals = 0;
+	int in_quote = 0, quoted = 0;
+	char *next;
+
+	if (*args == '"') {
+		args++;
+		in_quote = 1;
+		quoted = 1;
+	}
+
+	for (i = 0; args[i]; i++) {
+		if (isspace(args[i]) && !in_quote)
+			break;
+		if (equals == 0) {
+			if (args[i] == '=')
+				equals = i;
+		}
+		if (args[i] == '"')
+			in_quote = !in_quote;
+	}
+
+	*param = args;
+	if (!equals)
+		*val = NULL;
+	else {
+		args[equals] = '\0';
+		*val = args + equals + 1;
+
+		/* Don't include quotes in value. */
+		if (**val == '"') {
+			(*val)++;
+			if (args[i-1] == '"')
+				args[i-1] = '\0';
+		}
+		if (quoted && args[i-1] == '"')
+			args[i-1] = '\0';
+	}
+
+	if (args[i]) {
+		args[i] = '\0';
+		next = args + i + 1;
+	} else
+		next = args + i;
+
+	/* Chew up trailing spaces. */
+	while (isspace(*next))
+		next++;
+	return next;
+}
+
+/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
+int parse_args(const char *name,
+	       char *args,
+	       struct kernel_param *params,
+	       unsigned num,
+	       int (*unknown)(char *param, char *val))
+{
+	char *param, *val;
+
+	DEBUGP("Parsing ARGS: %s\n", args);
+
+	/* Chew leading spaces */
+	while (isspace(*args))
+		args++;
+
+	while (*args) {
+		int ret;
+		int irq_was_disabled;
+
+		args = next_arg(args, &param, &val);
+		irq_was_disabled = irqs_disabled();
+		ret = parse_one(param, val, params, num, unknown);
+		if (irq_was_disabled && !irqs_disabled()) {
+			printk(KERN_WARNING "parse_args(): option '%s' enabled "
+					"irq's!\n", param);
+		}
+		switch (ret) {
+		case -ENOENT:
+			printk(KERN_ERR "%s: Unknown parameter `%s'\n",
+			       name, param);
+			return ret;
+		case -ENOSPC:
+			printk(KERN_ERR
+			       "%s: `%s' too large for parameter `%s'\n",
+			       name, val ?: "", param);
+			return ret;
+		case 0:
+			break;
+		default:
+			printk(KERN_ERR
+			       "%s: `%s' invalid for parameter `%s'\n",
+			       name, val ?: "", param);
+			return ret;
+		}
+	}
+
+	/* All parsed OK. */
+	return 0;
+}
+
+/* Lazy bastard, eh? */
+#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn)      	\
+	int param_set_##name(const char *val, struct kernel_param *kp)	\
+	{								\
+		tmptype l;						\
+		int ret;						\
+									\
+		if (!val) return -EINVAL;				\
+		ret = strtolfn(val, 0, &l);				\
+		if (ret == -EINVAL || ((type)l != l))			\
+			return -EINVAL;					\
+		*((type *)kp->arg) = l;					\
+		return 0;						\
+	}								\
+	int param_get_##name(char *buffer, struct kernel_param *kp)	\
+	{								\
+		return sprintf(buffer, format, *((type *)kp->arg));	\
+	}
+
+STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul);
+STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol);
+STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul);
+STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol);
+STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul);
+STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol);
+STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul);
+
+int param_set_charp(const char *val, struct kernel_param *kp)
+{
+	if (!val) {
+		printk(KERN_ERR "%s: string parameter expected\n",
+		       kp->name);
+		return -EINVAL;
+	}
+
+	if (strlen(val) > 1024) {
+		printk(KERN_ERR "%s: string parameter too long\n",
+		       kp->name);
+		return -ENOSPC;
+	}
+
+	/* This is a hack.  We can't need to strdup in early boot, and we
+	 * don't need to; this mangled commandline is preserved. */
+	if (slab_is_available()) {
+		*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
+		if (!*(char **)kp->arg)
+			return -ENOMEM;
+	} else
+		*(const char **)kp->arg = val;
+
+	return 0;
+}
+
+int param_get_charp(char *buffer, struct kernel_param *kp)
+{
+	return sprintf(buffer, "%s", *((char **)kp->arg));
+}
+
+/* Actually could be a bool or an int, for historical reasons. */
+int param_set_bool(const char *val, struct kernel_param *kp)
+{
+	bool v;
+
+	/* No equals means "set"... */
+	if (!val) val = "1";
+
+	/* One of =[yYnN01] */
+	switch (val[0]) {
+	case 'y': case 'Y': case '1':
+		v = true;
+		break;
+	case 'n': case 'N': case '0':
+		v = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (kp->flags & KPARAM_ISBOOL)
+		*(bool *)kp->arg = v;
+	else
+		*(int *)kp->arg = v;
+	return 0;
+}
+
+int param_get_bool(char *buffer, struct kernel_param *kp)
+{
+	bool val;
+	if (kp->flags & KPARAM_ISBOOL)
+		val = *(bool *)kp->arg;
+	else
+		val = *(int *)kp->arg;
+
+	/* Y and N chosen as being relatively non-coder friendly */
+	return sprintf(buffer, "%c", val ? 'Y' : 'N');
+}
+
+/* This one must be bool. */
+int param_set_invbool(const char *val, struct kernel_param *kp)
+{
+	int ret;
+	bool boolval;
+	struct kernel_param dummy;
+
+	dummy.arg = &boolval;
+	dummy.flags = KPARAM_ISBOOL;
+	ret = param_set_bool(val, &dummy);
+	if (ret == 0)
+		*(bool *)kp->arg = !boolval;
+	return ret;
+}
+
+int param_get_invbool(char *buffer, struct kernel_param *kp)
+{
+	return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y');
+}
+
+/* We break the rule and mangle the string. */
+static int param_array(const char *name,
+		       const char *val,
+		       unsigned int min, unsigned int max,
+		       void *elem, int elemsize,
+		       int (*set)(const char *, struct kernel_param *kp),
+		       u16 flags,
+		       unsigned int *num)
+{
+	int ret;
+	struct kernel_param kp;
+	char save;
+
+	/* Get the name right for errors. */
+	kp.name = name;
+	kp.arg = elem;
+	kp.flags = flags;
+
+	/* No equals sign? */
+	if (!val) {
+		printk(KERN_ERR "%s: expects arguments\n", name);
+		return -EINVAL;
+	}
+
+	*num = 0;
+	/* We expect a comma-separated list of values. */
+	do {
+		int len;
+
+		if (*num == max) {
+			printk(KERN_ERR "%s: can only take %i arguments\n",
+			       name, max);
+			return -EINVAL;
+		}
+		len = strcspn(val, ",");
+
+		/* nul-terminate and parse */
+		save = val[len];
+		((char *)val)[len] = '\0';
+		ret = set(val, &kp);
+
+		if (ret != 0)
+			return ret;
+		kp.arg += elemsize;
+		val += len+1;
+		(*num)++;
+	} while (save == ',');
+
+	if (*num < min) {
+		printk(KERN_ERR "%s: needs at least %i arguments\n",
+		       name, min);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int param_array_set(const char *val, struct kernel_param *kp)
+{
+	const struct kparam_array *arr = kp->arr;
+	unsigned int temp_num;
+
+	return param_array(kp->name, val, 1, arr->max, arr->elem,
+			   arr->elemsize, arr->set, kp->flags,
+			   arr->num ?: &temp_num);
+}
+
+int param_array_get(char *buffer, struct kernel_param *kp)
+{
+	int i, off, ret;
+	const struct kparam_array *arr = kp->arr;
+	struct kernel_param p;
+
+	p = *kp;
+	for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
+		if (i)
+			buffer[off++] = ',';
+		p.arg = arr->elem + arr->elemsize * i;
+		ret = arr->get(buffer + off, &p);
+		if (ret < 0)
+			return ret;
+		off += ret;
+	}
+	buffer[off] = '\0';
+	return off;
+}
+
+int param_set_copystring(const char *val, struct kernel_param *kp)
+{
+	const struct kparam_string *kps = kp->str;
+
+	if (!val) {
+		printk(KERN_ERR "%s: missing param set value\n", kp->name);
+		return -EINVAL;
+	}
+	if (strlen(val)+1 > kps->maxlen) {
+		printk(KERN_ERR "%s: string doesn't fit in %u chars.\n",
+		       kp->name, kps->maxlen-1);
+		return -ENOSPC;
+	}
+	strcpy(kps->string, val);
+	return 0;
+}
+
+int param_get_string(char *buffer, struct kernel_param *kp)
+{
+	const struct kparam_string *kps = kp->str;
+	return strlcpy(buffer, kps->string, kps->maxlen);
+}
+
+/* sysfs output in /sys/modules/XYZ/parameters/ */
+#define to_module_attr(n) container_of(n, struct module_attribute, attr);
+#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
+
+extern struct kernel_param __start___param[], __stop___param[];
+
+struct param_attribute
+{
+	struct module_attribute mattr;
+	struct kernel_param *param;
+};
+
+struct module_param_attrs
+{
+	unsigned int num;
+	struct attribute_group grp;
+	struct param_attribute attrs[0];
+};
+
+#ifdef CONFIG_SYSFS
+#define to_param_attr(n) container_of(n, struct param_attribute, mattr);
+
+static ssize_t param_attr_show(struct module_attribute *mattr,
+			       struct module *mod, char *buf)
+{
+	int count;
+	struct param_attribute *attribute = to_param_attr(mattr);
+
+	if (!attribute->param->get)
+		return -EPERM;
+
+	count = attribute->param->get(buf, attribute->param);
+	if (count > 0) {
+		strcat(buf, "\n");
+		++count;
+	}
+	return count;
+}
+
+/* sysfs always hands a nul-terminated string in buf.  We rely on that. */
+static ssize_t param_attr_store(struct module_attribute *mattr,
+				struct module *owner,
+				const char *buf, size_t len)
+{
+ 	int err;
+	struct param_attribute *attribute = to_param_attr(mattr);
+
+	if (!attribute->param->set)
+		return -EPERM;
+
+	err = attribute->param->set(buf, attribute->param);
+	if (!err)
+		return len;
+	return err;
+}
+#endif
+
+#ifdef CONFIG_MODULES
+#define __modinit
+#else
+#define __modinit __init
+#endif
+
+#ifdef CONFIG_SYSFS
+/*
+ * add_sysfs_param - add a parameter to sysfs
+ * @mk: struct module_kobject
+ * @kparam: the actual parameter definition to add to sysfs
+ * @name: name of parameter
+ *
+ * Create a kobject if for a (per-module) parameter if mp NULL, and
+ * create file in sysfs.  Returns an error on out of memory.  Always cleans up
+ * if there's an error.
+ */
+static __modinit int add_sysfs_param(struct module_kobject *mk,
+				     struct kernel_param *kp,
+				     const char *name)
+{
+	struct module_param_attrs *new;
+	struct attribute **attrs;
+	int err, num;
+
+	/* We don't bother calling this with invisible parameters. */
+	BUG_ON(!kp->perm);
+
+	if (!mk->mp) {
+		num = 0;
+		attrs = NULL;
+	} else {
+		num = mk->mp->num;
+		attrs = mk->mp->grp.attrs;
+	}
+
+	/* Enlarge. */
+	new = krealloc(mk->mp,
+		       sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
+		       GFP_KERNEL);
+	if (!new) {
+		kfree(mk->mp);
+		err = -ENOMEM;
+		goto fail;
+	}
+	attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
+	if (!attrs) {
+		err = -ENOMEM;
+		goto fail_free_new;
+	}
+
+	/* Sysfs wants everything zeroed. */
+	memset(new, 0, sizeof(*new));
+	memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
+	memset(&attrs[num], 0, sizeof(attrs[num]));
+	new->grp.name = "parameters";
+	new->grp.attrs = attrs;
+
+	/* Tack new one on the end. */
+	new->attrs[num].param = kp;
+	new->attrs[num].mattr.show = param_attr_show;
+	new->attrs[num].mattr.store = param_attr_store;
+	new->attrs[num].mattr.attr.name = (char *)name;
+	new->attrs[num].mattr.attr.mode = kp->perm;
+	new->num = num+1;
+
+	/* Fix up all the pointers, since krealloc can move us */
+	for (num = 0; num < new->num; num++)
+		new->grp.attrs[num] = &new->attrs[num].mattr.attr;
+	new->grp.attrs[num] = NULL;
+
+	mk->mp = new;
+	return 0;
+
+fail_free_new:
+	kfree(new);
+fail:
+	mk->mp = NULL;
+	return err;
+}
+
+#ifdef CONFIG_MODULES
+static void free_module_param_attrs(struct module_kobject *mk)
+{
+	kfree(mk->mp->grp.attrs);
+	kfree(mk->mp);
+	mk->mp = NULL;
+}
+
+/*
+ * module_param_sysfs_setup - setup sysfs support for one module
+ * @mod: module
+ * @kparam: module parameters (array)
+ * @num_params: number of module parameters
+ *
+ * Adds sysfs entries for module parameters under
+ * /sys/module/[mod->name]/parameters/
+ */
+int module_param_sysfs_setup(struct module *mod,
+			     struct kernel_param *kparam,
+			     unsigned int num_params)
+{
+	int i, err;
+	bool params = false;
+
+	for (i = 0; i < num_params; i++) {
+		if (kparam[i].perm == 0)
+			continue;
+		err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
+		if (err)
+			return err;
+		params = true;
+	}
+
+	if (!params)
+		return 0;
+
+	/* Create the param group. */
+	err = sysfs_create_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp);
+	if (err)
+		free_module_param_attrs(&mod->mkobj);
+	return err;
+}
+
+/*
+ * module_param_sysfs_remove - remove sysfs support for one module
+ * @mod: module
+ *
+ * Remove sysfs entries for module parameters and the corresponding
+ * kobject.
+ */
+void module_param_sysfs_remove(struct module *mod)
+{
+	if (mod->mkobj.mp) {
+		sysfs_remove_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp);
+		/* We are positive that no one is using any param
+		 * attrs at this point.  Deallocate immediately. */
+		free_module_param_attrs(&mod->mkobj);
+	}
+}
+#endif
+
+void destroy_params(const struct kernel_param *params, unsigned num)
+{
+	/* FIXME: This should free kmalloced charp parameters.  It doesn't. */
+}
+
+static void __init kernel_add_sysfs_param(const char *name,
+					  struct kernel_param *kparam,
+					  unsigned int name_skip)
+{
+	struct module_kobject *mk;
+	struct kobject *kobj;
+	int err;
+
+	kobj = kset_find_obj(module_kset, name);
+	if (kobj) {
+		/* We already have one.  Remove params so we can add more. */
+		mk = to_module_kobject(kobj);
+		/* We need to remove it before adding parameters. */
+		sysfs_remove_group(&mk->kobj, &mk->mp->grp);
+	} else {
+		mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
+		BUG_ON(!mk);
+
+		mk->mod = THIS_MODULE;
+		mk->kobj.kset = module_kset;
+		err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL,
+					   "%s", name);
+		if (err) {
+			kobject_put(&mk->kobj);
+			printk(KERN_ERR "Module '%s' failed add to sysfs, "
+			       "error number %d\n", name, err);
+			printk(KERN_ERR	"The system will be unstable now.\n");
+			return;
+		}
+		/* So that exit path is even. */
+		kobject_get(&mk->kobj);
+	}
+
+	/* These should not fail at boot. */
+	err = add_sysfs_param(mk, kparam, kparam->name + name_skip);
+	BUG_ON(err);
+	err = sysfs_create_group(&mk->kobj, &mk->mp->grp);
+	BUG_ON(err);
+	kobject_uevent(&mk->kobj, KOBJ_ADD);
+	kobject_put(&mk->kobj);
+}
+
+/*
+ * param_sysfs_builtin - add contents in /sys/parameters for built-in modules
+ *
+ * Add module_parameters to sysfs for "modules" built into the kernel.
+ *
+ * The "module" name (KBUILD_MODNAME) is stored before a dot, the
+ * "parameter" name is stored behind a dot in kernel_param->name. So,
+ * extract the "module" name for all built-in kernel_param-eters,
+ * and for all who have the same, call kernel_add_sysfs_param.
+ */
+static void __init param_sysfs_builtin(void)
+{
+	struct kernel_param *kp;
+	unsigned int name_len;
+	char modname[MODULE_NAME_LEN];
+
+	for (kp = __start___param; kp < __stop___param; kp++) {
+		char *dot;
+
+		if (kp->perm == 0)
+			continue;
+
+		dot = strchr(kp->name, '.');
+		if (!dot) {
+			/* This happens for core_param() */
+			strcpy(modname, "kernel");
+			name_len = 0;
+		} else {
+			name_len = dot - kp->name + 1;
+			strlcpy(modname, kp->name, name_len);
+		}
+		kernel_add_sysfs_param(modname, kp, name_len);
+	}
+}
+
+
+/* module-related sysfs stuff */
+
+static ssize_t module_attr_show(struct kobject *kobj,
+				struct attribute *attr,
+				char *buf)
+{
+	struct module_attribute *attribute;
+	struct module_kobject *mk;
+	int ret;
+
+	attribute = to_module_attr(attr);
+	mk = to_module_kobject(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	ret = attribute->show(attribute, mk->mod, buf);
+
+	return ret;
+}
+
+static ssize_t module_attr_store(struct kobject *kobj,
+				struct attribute *attr,
+				const char *buf, size_t len)
+{
+	struct module_attribute *attribute;
+	struct module_kobject *mk;
+	int ret;
+
+	attribute = to_module_attr(attr);
+	mk = to_module_kobject(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	ret = attribute->store(attribute, mk->mod, buf, len);
+
+	return ret;
+}
+
+static struct sysfs_ops module_sysfs_ops = {
+	.show = module_attr_show,
+	.store = module_attr_store,
+};
+
+static int uevent_filter(struct kset *kset, struct kobject *kobj)
+{
+	struct kobj_type *ktype = get_ktype(kobj);
+
+	if (ktype == &module_ktype)
+		return 1;
+	return 0;
+}
+
+static struct kset_uevent_ops module_uevent_ops = {
+	.filter = uevent_filter,
+};
+
+struct kset *module_kset;
+int module_sysfs_initialized;
+
+struct kobj_type module_ktype = {
+	.sysfs_ops =	&module_sysfs_ops,
+};
+
+/*
+ * param_sysfs_init - wrapper for built-in params support
+ */
+static int __init param_sysfs_init(void)
+{
+	module_kset = kset_create_and_add("module", &module_uevent_ops, NULL);
+	if (!module_kset) {
+		printk(KERN_WARNING "%s (%d): error creating kset\n",
+			__FILE__, __LINE__);
+		return -ENOMEM;
+	}
+	module_sysfs_initialized = 1;
+
+	param_sysfs_builtin();
+
+	return 0;
+}
+subsys_initcall(param_sysfs_init);
+
+#endif /* CONFIG_SYSFS */
+
+EXPORT_SYMBOL(param_set_byte);
+EXPORT_SYMBOL(param_get_byte);
+EXPORT_SYMBOL(param_set_short);
+EXPORT_SYMBOL(param_get_short);
+EXPORT_SYMBOL(param_set_ushort);
+EXPORT_SYMBOL(param_get_ushort);
+EXPORT_SYMBOL(param_set_int);
+EXPORT_SYMBOL(param_get_int);
+EXPORT_SYMBOL(param_set_uint);
+EXPORT_SYMBOL(param_get_uint);
+EXPORT_SYMBOL(param_set_long);
+EXPORT_SYMBOL(param_get_long);
+EXPORT_SYMBOL(param_set_ulong);
+EXPORT_SYMBOL(param_get_ulong);
+EXPORT_SYMBOL(param_set_charp);
+EXPORT_SYMBOL(param_get_charp);
+EXPORT_SYMBOL(param_set_bool);
+EXPORT_SYMBOL(param_get_bool);
+EXPORT_SYMBOL(param_set_invbool);
+EXPORT_SYMBOL(param_get_invbool);
+EXPORT_SYMBOL(param_array_set);
+EXPORT_SYMBOL(param_array_get);
+EXPORT_SYMBOL(param_set_copystring);
+EXPORT_SYMBOL(param_get_string);
--- a/kernel/kernel/perf_event.c
+++ b/kernel/kernel/perf_event.c
--- a/kernel/kernel/pid.c
+++ b/kernel/kernel/pid.c
@@ -0,0 +1,524 @@
+/*
+ * Generic pidhash and scalable, time-bounded PID allocator
+ *
+ * (C) 2002-2003 William Irwin, IBM
+ * (C) 2004 William Irwin, Oracle
+ * (C) 2002-2004 Ingo Molnar, Red Hat
+ *
+ * pid-structures are backing objects for tasks sharing a given ID to chain
+ * against. There is very little to them aside from hashing them and
+ * parking tasks using given ID's on a list.
+ *
+ * The hash is always changed with the tasklist_lock write-acquired,
+ * and the hash is only accessed with the tasklist_lock at least
+ * read-acquired, so there's no additional SMP locking needed here.
+ *
+ * We have a list of bitmap pages, which bitmaps represent the PID space.
+ * Allocating and freeing PIDs is completely lockless. The worst-case
+ * allocation scenario when all but one out of 1 million PIDs possible are
+ * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
+ * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
+ *
+ * Pid namespaces:
+ *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
+ *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
+ *     Many thanks to Oleg Nesterov for comments and help
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/rculist.h>
+#include <linux/bootmem.h>
+#include <linux/hash.h>
+#include <linux/pid_namespace.h>
+#include <linux/init_task.h>
+#include <linux/syscalls.h>
+
+#define pid_hashfn(nr, ns)	\
+	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
+static struct hlist_head *pid_hash;
+static unsigned int pidhash_shift = 4;
+struct pid init_struct_pid = INIT_STRUCT_PID;
+
+int pid_max = PID_MAX_DEFAULT;
+
+#define RESERVED_PIDS		300
+
+int pid_max_min = RESERVED_PIDS + 1;
+int pid_max_max = PID_MAX_LIMIT;
+
+#define BITS_PER_PAGE		(PAGE_SIZE*8)
+#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
+
+static inline int mk_pid(struct pid_namespace *pid_ns,
+		struct pidmap *map, int off)
+{
+	return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
+}
+
+#define find_next_offset(map, off)					\
+		find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
+
+/*
+ * PID-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way a low pid_max
+ * value does not cause lots of bitmaps to be allocated, but
+ * the scheme scales to up to 4 million PIDs, runtime.
+ */
+struct pid_namespace init_pid_ns = {
+	.kref = {
+		.refcount       = ATOMIC_INIT(2),
+	},
+	.pidmap = {
+		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
+	},
+	.last_pid = 0,
+	.level = 0,
+	.child_reaper = &init_task,
+};
+EXPORT_SYMBOL_GPL(init_pid_ns);
+
+int is_container_init(struct task_struct *tsk)
+{
+	int ret = 0;
+	struct pid *pid;
+
+	rcu_read_lock();
+	pid = task_pid(tsk);
+	if (pid != NULL && pid->numbers[pid->level].nr == 1)
+		ret = 1;
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(is_container_init);
+
+/*
+ * Note: disable interrupts while the pidmap_lock is held as an
+ * interrupt might come in and do read_lock(&tasklist_lock).
+ *
+ * If we don't disable interrupts there is a nasty deadlock between
+ * detach_pid()->free_pid() and another cpu that does
+ * spin_lock(&pidmap_lock) followed by an interrupt routine that does
+ * read_lock(&tasklist_lock);
+ *
+ * After we clean up the tasklist_lock and know there are no
+ * irq handlers that take it we can leave the interrupts enabled.
+ * For now it is easier to be safe than to prove it can't happen.
+ */
+
+static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
+
+static void free_pidmap(struct upid *upid)
+{
+	int nr = upid->nr;
+	struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
+	int offset = nr & BITS_PER_PAGE_MASK;
+
+	clear_bit(offset, map->page);
+	atomic_inc(&map->nr_free);
+}
+
+static int alloc_pidmap(struct pid_namespace *pid_ns)
+{
+	int i, offset, max_scan, pid, last = pid_ns->last_pid;
+	struct pidmap *map;
+
+	pid = last + 1;
+	if (pid >= pid_max)
+		pid = RESERVED_PIDS;
+	offset = pid & BITS_PER_PAGE_MASK;
+	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
+	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
+	for (i = 0; i <= max_scan; ++i) {
+		if (unlikely(!map->page)) {
+			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			/*
+			 * Free the page if someone raced with us
+			 * installing it:
+			 */
+			spin_lock_irq(&pidmap_lock);
+			if (map->page)
+				kfree(page);
+			else
+				map->page = page;
+			spin_unlock_irq(&pidmap_lock);
+			if (unlikely(!map->page))
+				break;
+		}
+		if (likely(atomic_read(&map->nr_free))) {
+			do {
+				if (!test_and_set_bit(offset, map->page)) {
+					atomic_dec(&map->nr_free);
+					pid_ns->last_pid = pid;
+					return pid;
+				}
+				offset = find_next_offset(map, offset);
+				pid = mk_pid(pid_ns, map, offset);
+			/*
+			 * find_next_offset() found a bit, the pid from it
+			 * is in-bounds, and if we fell back to the last
+			 * bitmap block and the final block was the same
+			 * as the starting point, pid is before last_pid.
+			 */
+			} while (offset < BITS_PER_PAGE && pid < pid_max &&
+					(i != max_scan || pid < last ||
+					    !((last+1) & BITS_PER_PAGE_MASK)));
+		}
+		if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
+			++map;
+			offset = 0;
+		} else {
+			map = &pid_ns->pidmap[0];
+			offset = RESERVED_PIDS;
+			if (unlikely(last == offset))
+				break;
+		}
+		pid = mk_pid(pid_ns, map, offset);
+	}
+	return -1;
+}
+
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
+{
+	int offset;
+	struct pidmap *map, *end;
+
+	if (last >= PID_MAX_LIMIT)
+		return -1;
+
+	offset = (last + 1) & BITS_PER_PAGE_MASK;
+	map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
+	end = &pid_ns->pidmap[PIDMAP_ENTRIES];
+	for (; map < end; map++, offset = 0) {
+		if (unlikely(!map->page))
+			continue;
+		offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
+		if (offset < BITS_PER_PAGE)
+			return mk_pid(pid_ns, map, offset);
+	}
+	return -1;
+}
+
+void put_pid(struct pid *pid)
+{
+	struct pid_namespace *ns;
+
+	if (!pid)
+		return;
+
+	ns = pid->numbers[pid->level].ns;
+	if ((atomic_read(&pid->count) == 1) ||
+	     atomic_dec_and_test(&pid->count)) {
+		kmem_cache_free(ns->pid_cachep, pid);
+		put_pid_ns(ns);
+	}
+}
+EXPORT_SYMBOL_GPL(put_pid);
+
+static void delayed_put_pid(struct rcu_head *rhp)
+{
+	struct pid *pid = container_of(rhp, struct pid, rcu);
+	put_pid(pid);
+}
+
+void free_pid(struct pid *pid)
+{
+	/* We can be called with write_lock_irq(&tasklist_lock) held */
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pidmap_lock, flags);
+	for (i = 0; i <= pid->level; i++)
+		hlist_del_rcu(&pid->numbers[i].pid_chain);
+	spin_unlock_irqrestore(&pidmap_lock, flags);
+
+	for (i = 0; i <= pid->level; i++)
+		free_pidmap(pid->numbers + i);
+
+	call_rcu(&pid->rcu, delayed_put_pid);
+}
+
+struct pid *alloc_pid(struct pid_namespace *ns)
+{
+	struct pid *pid;
+	enum pid_type type;
+	int i, nr;
+	struct pid_namespace *tmp;
+	struct upid *upid;
+
+	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
+	if (!pid)
+		goto out;
+
+	tmp = ns;
+	for (i = ns->level; i >= 0; i--) {
+		nr = alloc_pidmap(tmp);
+		if (nr < 0)
+			goto out_free;
+
+		pid->numbers[i].nr = nr;
+		pid->numbers[i].ns = tmp;
+		tmp = tmp->parent;
+	}
+
+	get_pid_ns(ns);
+	pid->level = ns->level;
+	atomic_set(&pid->count, 1);
+	for (type = 0; type < PIDTYPE_MAX; ++type)
+		INIT_HLIST_HEAD(&pid->tasks[type]);
+
+	spin_lock_irq(&pidmap_lock);
+	for (i = ns->level; i >= 0; i--) {
+		upid = &pid->numbers[i];
+		hlist_add_head_rcu(&upid->pid_chain,
+				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+	}
+	spin_unlock_irq(&pidmap_lock);
+
+out:
+	return pid;
+
+out_free:
+	while (++i <= ns->level)
+		free_pidmap(pid->numbers + i);
+
+	kmem_cache_free(ns->pid_cachep, pid);
+	pid = NULL;
+	goto out;
+}
+
+struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
+{
+	struct hlist_node *elem;
+	struct upid *pnr;
+
+	hlist_for_each_entry_rcu(pnr, elem,
+			&pid_hash[pid_hashfn(nr, ns)], pid_chain)
+		if (pnr->nr == nr && pnr->ns == ns)
+			return container_of(pnr, struct pid,
+					numbers[ns->level]);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(find_pid_ns);
+
+struct pid *find_vpid(int nr)
+{
+	return find_pid_ns(nr, current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL_GPL(find_vpid);
+
+/*
+ * attach_pid() must be called with the tasklist_lock write-held.
+ */
+void attach_pid(struct task_struct *task, enum pid_type type,
+		struct pid *pid)
+{
+	struct pid_link *link;
+
+	link = &task->pids[type];
+	link->pid = pid;
+	hlist_add_head_rcu(&link->node, &pid->tasks[type]);
+}
+
+static void __change_pid(struct task_struct *task, enum pid_type type,
+			struct pid *new)
+{
+	struct pid_link *link;
+	struct pid *pid;
+	int tmp;
+
+	link = &task->pids[type];
+	pid = link->pid;
+
+	hlist_del_rcu(&link->node);
+	link->pid = new;
+
+	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
+		if (!hlist_empty(&pid->tasks[tmp]))
+			return;
+
+	free_pid(pid);
+}
+
+void detach_pid(struct task_struct *task, enum pid_type type)
+{
+	__change_pid(task, type, NULL);
+}
+
+void change_pid(struct task_struct *task, enum pid_type type,
+		struct pid *pid)
+{
+	__change_pid(task, type, pid);
+	attach_pid(task, type, pid);
+}
+
+/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
+void transfer_pid(struct task_struct *old, struct task_struct *new,
+			   enum pid_type type)
+{
+	new->pids[type].pid = old->pids[type].pid;
+	hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
+}
+
+struct task_struct *pid_task(struct pid *pid, enum pid_type type)
+{
+	struct task_struct *result = NULL;
+	if (pid) {
+		struct hlist_node *first;
+		first = rcu_dereference(pid->tasks[type].first);
+		if (first)
+			result = hlist_entry(first, struct task_struct, pids[(type)].node);
+	}
+	return result;
+}
+EXPORT_SYMBOL(pid_task);
+
+/*
+ * Must be called under rcu_read_lock() or with tasklist_lock read-held.
+ */
+struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
+{
+	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
+}
+
+struct task_struct *find_task_by_vpid(pid_t vnr)
+{
+	return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
+}
+
+struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
+{
+	struct pid *pid;
+	rcu_read_lock();
+	if (type != PIDTYPE_PID)
+		task = task->group_leader;
+	pid = get_pid(task->pids[type].pid);
+	rcu_read_unlock();
+	return pid;
+}
+
+struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
+{
+	struct task_struct *result;
+	rcu_read_lock();
+	result = pid_task(pid, type);
+	if (result)
+		get_task_struct(result);
+	rcu_read_unlock();
+	return result;
+}
+
+struct pid *find_get_pid(pid_t nr)
+{
+	struct pid *pid;
+
+	rcu_read_lock();
+	pid = get_pid(find_vpid(nr));
+	rcu_read_unlock();
+
+	return pid;
+}
+EXPORT_SYMBOL_GPL(find_get_pid);
+
+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+{
+	struct upid *upid;
+	pid_t nr = 0;
+
+	if (pid && ns->level <= pid->level) {
+		upid = &pid->numbers[ns->level];
+		if (upid->ns == ns)
+			nr = upid->nr;
+	}
+	return nr;
+}
+
+pid_t pid_vnr(struct pid *pid)
+{
+	return pid_nr_ns(pid, current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL_GPL(pid_vnr);
+
+pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+			struct pid_namespace *ns)
+{
+	pid_t nr = 0;
+
+	rcu_read_lock();
+	if (!ns)
+		ns = current->nsproxy->pid_ns;
+	if (likely(pid_alive(task))) {
+		if (type != PIDTYPE_PID)
+			task = task->group_leader;
+		nr = pid_nr_ns(task->pids[type].pid, ns);
+	}
+	rcu_read_unlock();
+
+	return nr;
+}
+EXPORT_SYMBOL(__task_pid_nr_ns);
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_tgid(tsk), ns);
+}
+EXPORT_SYMBOL(task_tgid_nr_ns);
+
+struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
+{
+	return ns_of_pid(task_pid(tsk));
+}
+EXPORT_SYMBOL_GPL(task_active_pid_ns);
+
+/*
+ * Used by proc to find the first pid that is greater than or equal to nr.
+ *
+ * If there is a pid at nr this function is exactly the same as find_pid_ns.
+ */
+struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
+{
+	struct pid *pid;
+
+	do {
+		pid = find_pid_ns(nr, ns);
+		if (pid)
+			break;
+		nr = next_pidmap(ns, nr);
+	} while (nr > 0);
+
+	return pid;
+}
+
+/*
+ * The pid hash table is scaled according to the amount of memory in the
+ * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
+ * more.
+ */
+void __init pidhash_init(void)
+{
+	int i, pidhash_size;
+
+	pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
+					   HASH_EARLY | HASH_SMALL,
+					   &pidhash_shift, NULL, 4096);
+	pidhash_size = 1 << pidhash_shift;
+
+	for (i = 0; i < pidhash_size; i++)
+		INIT_HLIST_HEAD(&pid_hash[i]);
+}
+
+void __init pidmap_init(void)
+{
+	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	/* Reserve PID 0. We never call free_pidmap(0) */
+	set_bit(0, init_pid_ns.pidmap[0].page);
+	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+
+	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+}
--- a/kernel/kernel/pid_namespace.c
+++ b/kernel/kernel/pid_namespace.c
@@ -0,0 +1,193 @@
+/*
+ * Pid namespaces
+ *
+ * Authors:
+ *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
+ *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
+ *     Many thanks to Oleg Nesterov for comments and help
+ *
+ */
+
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
+#include <linux/syscalls.h>
+#include <linux/err.h>
+#include <linux/acct.h>
+
+#define BITS_PER_PAGE		(PAGE_SIZE*8)
+
+struct pid_cache {
+	int nr_ids;
+	char name[16];
+	struct kmem_cache *cachep;
+	struct list_head list;
+};
+
+static LIST_HEAD(pid_caches_lh);
+static DEFINE_MUTEX(pid_caches_mutex);
+static struct kmem_cache *pid_ns_cachep;
+
+/*
+ * creates the kmem cache to allocate pids from.
+ * @nr_ids: the number of numerical ids this pid will have to carry
+ */
+
+static struct kmem_cache *create_pid_cachep(int nr_ids)
+{
+	struct pid_cache *pcache;
+	struct kmem_cache *cachep;
+
+	mutex_lock(&pid_caches_mutex);
+	list_for_each_entry(pcache, &pid_caches_lh, list)
+		if (pcache->nr_ids == nr_ids)
+			goto out;
+
+	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
+	if (pcache == NULL)
+		goto err_alloc;
+
+	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
+	cachep = kmem_cache_create(pcache->name,
+			sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
+			0, SLAB_HWCACHE_ALIGN, NULL);
+	if (cachep == NULL)
+		goto err_cachep;
+
+	pcache->nr_ids = nr_ids;
+	pcache->cachep = cachep;
+	list_add(&pcache->list, &pid_caches_lh);
+out:
+	mutex_unlock(&pid_caches_mutex);
+	return pcache->cachep;
+
+err_cachep:
+	kfree(pcache);
+err_alloc:
+	mutex_unlock(&pid_caches_mutex);
+	return NULL;
+}
+
+static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
+{
+	struct pid_namespace *ns;
+	unsigned int level = parent_pid_ns->level + 1;
+	int i;
+
+	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
+	if (ns == NULL)
+		goto out;
+
+	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!ns->pidmap[0].page)
+		goto out_free;
+
+	ns->pid_cachep = create_pid_cachep(level + 1);
+	if (ns->pid_cachep == NULL)
+		goto out_free_map;
+
+	kref_init(&ns->kref);
+	ns->level = level;
+	ns->parent = get_pid_ns(parent_pid_ns);
+
+	set_bit(0, ns->pidmap[0].page);
+	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
+
+	for (i = 1; i < PIDMAP_ENTRIES; i++)
+		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
+
+	return ns;
+
+out_free_map:
+	kfree(ns->pidmap[0].page);
+out_free:
+	kmem_cache_free(pid_ns_cachep, ns);
+out:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void destroy_pid_namespace(struct pid_namespace *ns)
+{
+	int i;
+
+	for (i = 0; i < PIDMAP_ENTRIES; i++)
+		kfree(ns->pidmap[i].page);
+	kmem_cache_free(pid_ns_cachep, ns);
+}
+
+struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
+{
+	if (!(flags & CLONE_NEWPID))
+		return get_pid_ns(old_ns);
+	if (flags & (CLONE_THREAD|CLONE_PARENT))
+		return ERR_PTR(-EINVAL);
+	return create_pid_namespace(old_ns);
+}
+
+void free_pid_ns(struct kref *kref)
+{
+	struct pid_namespace *ns, *parent;
+
+	ns = container_of(kref, struct pid_namespace, kref);
+
+	parent = ns->parent;
+	destroy_pid_namespace(ns);
+
+	if (parent != NULL)
+		put_pid_ns(parent);
+}
+
+void zap_pid_ns_processes(struct pid_namespace *pid_ns)
+{
+	int nr;
+	int rc;
+	struct task_struct *task;
+
+	/*
+	 * The last thread in the cgroup-init thread group is terminating.
+	 * Find remaining pid_ts in the namespace, signal and wait for them
+	 * to exit.
+	 *
+	 * Note:  This signals each threads in the namespace - even those that
+	 * 	  belong to the same thread group, To avoid this, we would have
+	 * 	  to walk the entire tasklist looking a processes in this
+	 * 	  namespace, but that could be unnecessarily expensive if the
+	 * 	  pid namespace has just a few processes. Or we need to
+	 * 	  maintain a tasklist for each pid namespace.
+	 *
+	 */
+	read_lock(&tasklist_lock);
+	nr = next_pidmap(pid_ns, 1);
+	while (nr > 0) {
+		rcu_read_lock();
+
+		/*
+		 * Use force_sig() since it clears SIGNAL_UNKILLABLE ensuring
+		 * any nested-container's init processes don't ignore the
+		 * signal
+		 */
+		task = pid_task(find_vpid(nr), PIDTYPE_PID);
+		if (task)
+			force_sig(SIGKILL, task);
+
+		rcu_read_unlock();
+
+		nr = next_pidmap(pid_ns, nr);
+	}
+	read_unlock(&tasklist_lock);
+
+	do {
+		clear_thread_flag(TIF_SIGPENDING);
+		rc = sys_wait4(-1, NULL, __WALL, NULL);
+	} while (rc != -ECHILD);
+
+	acct_exit_ns(pid_ns);
+	return;
+}
+
+static __init int pid_namespaces_init(void)
+{
+	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
+	return 0;
+}
+
+__initcall(pid_namespaces_init);
--- a/kernel/kernel/pm_qos_params.c
+++ b/kernel/kernel/pm_qos_params.c
@@ -0,0 +1,423 @@
+/*
+ * This module exposes the interface to kernel space for specifying
+ * QoS dependencies.  It provides infrastructure for registration of:
+ *
+ * Dependents on a QoS value : register requirements
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based.  Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ *
+ * There are 3 basic classes of QoS parameter: latency, timeout, throughput
+ * each have defined units:
+ * latency: usec
+ * timeout: usec <-- currently not used.
+ * throughput: kbs (kilo byte / sec)
+ *
+ * There are lists of pm_qos_objects each one wrapping requirements, notifiers
+ *
+ * User mode requirements on a QOS parameter register themselves to the
+ * subsystem by opening the device node /dev/... and writing there request to
+ * the node.  As long as the process holds a file handle open to the node the
+ * client continues to be accounted for.  Upon file release the usermode
+ * requirement is removed and a new qos target is computed.  This way when the
+ * requirement that the application has is cleaned up when closes the file
+ * pointer or exits the pm_qos_object will get an opportunity to clean up.
+ *
+ * Mark Gross <mgross@linux.intel.com>
+ */
+
+#include <linux/pm_qos_params.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+
+#include <linux/uaccess.h>
+
+/*
+ * locking rule: all changes to requirements or notifiers lists
+ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
+ * held, taken with _irqsave.  One lock to rule them all
+ */
+struct requirement_list {
+	struct list_head list;
+	union {
+		s32 value;
+		s32 usec;
+		s32 kbps;
+	};
+	char *name;
+};
+
+static s32 max_compare(s32 v1, s32 v2);
+static s32 min_compare(s32 v1, s32 v2);
+
+struct pm_qos_object {
+	struct requirement_list requirements;
+	struct blocking_notifier_head *notifiers;
+	struct miscdevice pm_qos_power_miscdev;
+	char *name;
+	s32 default_value;
+	atomic_t target_value;
+	s32 (*comparitor)(s32, s32);
+};
+
+static struct pm_qos_object null_pm_qos;
+static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
+static struct pm_qos_object cpu_dma_pm_qos = {
+	.requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)},
+	.notifiers = &cpu_dma_lat_notifier,
+	.name = "cpu_dma_latency",
+	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
+	.comparitor = min_compare
+};
+
+static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
+static struct pm_qos_object network_lat_pm_qos = {
+	.requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)},
+	.notifiers = &network_lat_notifier,
+	.name = "network_latency",
+	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
+	.comparitor = min_compare
+};
+
+
+static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
+static struct pm_qos_object network_throughput_pm_qos = {
+	.requirements =
+		{LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)},
+	.notifiers = &network_throughput_notifier,
+	.name = "network_throughput",
+	.default_value = 0,
+	.target_value = ATOMIC_INIT(0),
+	.comparitor = max_compare
+};
+
+
+static struct pm_qos_object *pm_qos_array[] = {
+	&null_pm_qos,
+	&cpu_dma_pm_qos,
+	&network_lat_pm_qos,
+	&network_throughput_pm_qos
+};
+
+static DEFINE_SPINLOCK(pm_qos_lock);
+
+static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
+		size_t count, loff_t *f_pos);
+static int pm_qos_power_open(struct inode *inode, struct file *filp);
+static int pm_qos_power_release(struct inode *inode, struct file *filp);
+
+static const struct file_operations pm_qos_power_fops = {
+	.write = pm_qos_power_write,
+	.open = pm_qos_power_open,
+	.release = pm_qos_power_release,
+};
+
+/* static helper functions */
+static s32 max_compare(s32 v1, s32 v2)
+{
+	return max(v1, v2);
+}
+
+static s32 min_compare(s32 v1, s32 v2)
+{
+	return min(v1, v2);
+}
+
+
+static void update_target(int target)
+{
+	s32 extreme_value;
+	struct requirement_list *node;
+	unsigned long flags;
+	int call_notifier = 0;
+
+	spin_lock_irqsave(&pm_qos_lock, flags);
+	extreme_value = pm_qos_array[target]->default_value;
+	list_for_each_entry(node,
+			&pm_qos_array[target]->requirements.list, list) {
+		extreme_value = pm_qos_array[target]->comparitor(
+				extreme_value, node->value);
+	}
+	if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) {
+		call_notifier = 1;
+		atomic_set(&pm_qos_array[target]->target_value, extreme_value);
+		pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
+			atomic_read(&pm_qos_array[target]->target_value));
+	}
+	spin_unlock_irqrestore(&pm_qos_lock, flags);
+
+	if (call_notifier)
+		blocking_notifier_call_chain(pm_qos_array[target]->notifiers,
+			(unsigned long) extreme_value, NULL);
+}
+
+static int register_pm_qos_misc(struct pm_qos_object *qos)
+{
+	qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
+	qos->pm_qos_power_miscdev.name = qos->name;
+	qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops;
+
+	return misc_register(&qos->pm_qos_power_miscdev);
+}
+
+static int find_pm_qos_object_by_minor(int minor)
+{
+	int pm_qos_class;
+
+	for (pm_qos_class = 0;
+		pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) {
+		if (minor ==
+			pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor)
+			return pm_qos_class;
+	}
+	return -1;
+}
+
+/**
+ * pm_qos_requirement - returns current system wide qos expectation
+ * @pm_qos_class: identification of which qos value is requested
+ *
+ * This function returns the current target value in an atomic manner.
+ */
+int pm_qos_requirement(int pm_qos_class)
+{
+	return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
+}
+EXPORT_SYMBOL_GPL(pm_qos_requirement);
+
+/**
+ * pm_qos_add_requirement - inserts new qos request into the list
+ * @pm_qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ * @value: defines the qos request
+ *
+ * This function inserts a new entry in the pm_qos_class list of requested qos
+ * performance characteristics.  It recomputes the aggregate QoS expectations
+ * for the pm_qos_class of parameters.
+ */
+int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
+{
+	struct requirement_list *dep;
+	unsigned long flags;
+
+	dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+	if (dep) {
+		if (value == PM_QOS_DEFAULT_VALUE)
+			dep->value = pm_qos_array[pm_qos_class]->default_value;
+		else
+			dep->value = value;
+		dep->name = kstrdup(name, GFP_KERNEL);
+		if (!dep->name)
+			goto cleanup;
+
+		spin_lock_irqsave(&pm_qos_lock, flags);
+		list_add(&dep->list,
+			&pm_qos_array[pm_qos_class]->requirements.list);
+		spin_unlock_irqrestore(&pm_qos_lock, flags);
+		update_target(pm_qos_class);
+
+		return 0;
+	}
+
+cleanup:
+	kfree(dep);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
+
+/**
+ * pm_qos_update_requirement - modifies an existing qos request
+ * @pm_qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ * @value: defines the qos request
+ *
+ * Updates an existing qos requirement for the pm_qos_class of parameters along
+ * with updating the target pm_qos_class value.
+ *
+ * If the named request isn't in the list then no change is made.
+ */
+int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
+{
+	unsigned long flags;
+	struct requirement_list *node;
+	int pending_update = 0;
+
+	spin_lock_irqsave(&pm_qos_lock, flags);
+	list_for_each_entry(node,
+		&pm_qos_array[pm_qos_class]->requirements.list, list) {
+		if (strcmp(node->name, name) == 0) {
+			if (new_value == PM_QOS_DEFAULT_VALUE)
+				node->value =
+				pm_qos_array[pm_qos_class]->default_value;
+			else
+				node->value = new_value;
+			pending_update = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&pm_qos_lock, flags);
+	if (pending_update)
+		update_target(pm_qos_class);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
+
+/**
+ * pm_qos_remove_requirement - modifies an existing qos request
+ * @pm_qos_class: identifies which list of qos request to us
+ * @name: identifies the request
+ *
+ * Will remove named qos request from pm_qos_class list of parameters and
+ * recompute the current target value for the pm_qos_class.
+ */
+void pm_qos_remove_requirement(int pm_qos_class, char *name)
+{
+	unsigned long flags;
+	struct requirement_list *node;
+	int pending_update = 0;
+
+	spin_lock_irqsave(&pm_qos_lock, flags);
+	list_for_each_entry(node,
+		&pm_qos_array[pm_qos_class]->requirements.list, list) {
+		if (strcmp(node->name, name) == 0) {
+			kfree(node->name);
+			list_del(&node->list);
+			kfree(node);
+			pending_update = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&pm_qos_lock, flags);
+	if (pending_update)
+		update_target(pm_qos_class);
+}
+EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
+
+/**
+ * pm_qos_add_notifier - sets notification entry for changes to target value
+ * @pm_qos_class: identifies which qos target changes should be notified.
+ * @notifier: notifier block managed by caller.
+ *
+ * will register the notifier into a notification chain that gets called
+ * upon changes to the pm_qos_class target value.
+ */
+ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
+{
+	int retval;
+
+	retval = blocking_notifier_chain_register(
+			pm_qos_array[pm_qos_class]->notifiers, notifier);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_qos_add_notifier);
+
+/**
+ * pm_qos_remove_notifier - deletes notification entry from chain.
+ * @pm_qos_class: identifies which qos target changes are notified.
+ * @notifier: notifier block to be removed.
+ *
+ * will remove the notifier from the notification chain that gets called
+ * upon changes to the pm_qos_class target value.
+ */
+int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
+{
+	int retval;
+
+	retval = blocking_notifier_chain_unregister(
+			pm_qos_array[pm_qos_class]->notifiers, notifier);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
+
+#define PID_NAME_LEN sizeof("process_1234567890")
+static char name[PID_NAME_LEN];
+
+static int pm_qos_power_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+	long pm_qos_class;
+
+	lock_kernel();
+	pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
+	if (pm_qos_class >= 0) {
+		filp->private_data = (void *)pm_qos_class;
+		sprintf(name, "process_%d", current->pid);
+		ret = pm_qos_add_requirement(pm_qos_class, name,
+					PM_QOS_DEFAULT_VALUE);
+		if (ret >= 0) {
+			unlock_kernel();
+			return 0;
+		}
+	}
+	unlock_kernel();
+
+	return -EPERM;
+}
+
+static int pm_qos_power_release(struct inode *inode, struct file *filp)
+{
+	int pm_qos_class;
+
+	pm_qos_class = (long)filp->private_data;
+	sprintf(name, "process_%d", current->pid);
+	pm_qos_remove_requirement(pm_qos_class, name);
+
+	return 0;
+}
+
+static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
+		size_t count, loff_t *f_pos)
+{
+	s32 value;
+	int pm_qos_class;
+
+	pm_qos_class = (long)filp->private_data;
+	if (count != sizeof(s32))
+		return -EINVAL;
+	if (copy_from_user(&value, buf, sizeof(s32)))
+		return -EFAULT;
+	sprintf(name, "process_%d", current->pid);
+	pm_qos_update_requirement(pm_qos_class, name, value);
+
+	return  sizeof(s32);
+}
+
+
+static int __init pm_qos_power_init(void)
+{
+	int ret = 0;
+
+	ret = register_pm_qos_misc(&cpu_dma_pm_qos);
+	if (ret < 0) {
+		printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n");
+		return ret;
+	}
+	ret = register_pm_qos_misc(&network_lat_pm_qos);
+	if (ret < 0) {
+		printk(KERN_ERR "pm_qos_param: network_latency setup failed\n");
+		return ret;
+	}
+	ret = register_pm_qos_misc(&network_throughput_pm_qos);
+	if (ret < 0)
+		printk(KERN_ERR
+			"pm_qos_param: network_throughput setup failed\n");
+
+	return ret;
+}
+
+late_initcall(pm_qos_power_init);
--- a/kernel/kernel/posix-cpu-timers.c
+++ b/kernel/kernel/posix-cpu-timers.c
--- a/kernel/kernel/posix-timers.c
+++ b/kernel/kernel/posix-timers.c
--- a/kernel/kernel/power/Kconfig
+++ b/kernel/kernel/power/Kconfig
@@ -0,0 +1,251 @@
+config PM
+	bool "Power Management support"
+	depends on !IA64_HP_SIM
+	---help---
+	  "Power Management" means that parts of your computer are shut
+	  off or put into a power conserving "sleep" mode if they are not
+	  being used.  There are two competing standards for doing this: APM
+	  and ACPI.  If you want to use either one, say Y here and then also
+	  to the requisite support below.
+
+	  Power Management is most important for battery powered laptop
+	  computers; if you have a laptop, check out the Linux Laptop home
+	  page on the WWW at <http://www.linux-on-laptops.com/> or
+	  Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
+	  and the Battery Powered Linux mini-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  Note that, even if you say N here, Linux on the x86 architecture
+	  will issue the hlt instruction if nothing is to be done, thereby
+	  sending the processor to sleep and saving power.
+
+config PM_DEBUG
+	bool "Power Management Debug Support"
+	depends on PM
+	---help---
+	This option enables various debugging support in the Power Management
+	code. This is helpful when debugging and reporting PM bugs, like
+	suspend support.
+
+config PM_VERBOSE
+	bool "Verbose Power Management debugging"
+	depends on PM_DEBUG
+	default n
+	---help---
+	This option enables verbose messages from the Power Management code.
+
+config CAN_PM_TRACE
+	def_bool y
+	depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
+
+config PM_TRACE
+	bool
+	help
+	  This enables code to save the last PM event point across
+	  reboot. The architecture needs to support this, x86 for
+	  example does by saving things in the RTC, see below.
+
+	  The architecture specific code must provide the extern
+	  functions from <linux/resume-trace.h> as well as the
+	  <asm/resume-trace.h> header with a TRACE_RESUME() macro.
+
+	  The way the information is presented is architecture-
+	  dependent, x86 will print the information during a
+	  late_initcall.
+
+config PM_TRACE_RTC
+	bool "Suspend/resume event tracing"
+	depends on CAN_PM_TRACE
+	depends on X86
+	select PM_TRACE
+	default n
+	---help---
+	This enables some cheesy code to save the last PM event point in the
+	RTC across reboots, so that you can debug a machine that just hangs
+	during suspend (or more commonly, during resume).
+
+	To use this debugging feature you should attempt to suspend the
+	machine, reboot it and then run
+
+		dmesg -s 1000000 | grep 'hash matches'
+
+	CAUTION: this option will cause your machine's real-time clock to be
+	set to an invalid time after a resume.
+
+config PM_SLEEP_SMP
+	bool
+	depends on SMP
+	depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
+	depends on PM_SLEEP
+	select HOTPLUG_CPU
+	default y
+
+config PM_SLEEP
+	bool
+	depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE || HIBERNATION_ON_MEMORY
+	default y
+
+config SUSPEND
+	bool "Suspend to RAM and standby"
+	depends on PM && ARCH_SUSPEND_POSSIBLE
+	default y
+	---help---
+	  Allow the system to enter sleep states in which main memory is
+	  powered and thus its contents are preserved, such as the
+	  suspend-to-RAM state (e.g. the ACPI S3 state).
+
+config PM_TEST_SUSPEND
+	bool "Test suspend/resume and wakealarm during bootup"
+	depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
+	---help---
+	This option will let you suspend your machine during bootup, and
+	make it wake up a few seconds later using an RTC wakeup alarm.
+	Enable this with a kernel parameter like "test_suspend=mem".
+
+	You probably want to have your system's RTC driver statically
+	linked, ensuring that it's available when this test runs.
+
+config SUSPEND_FREEZER
+	bool "Enable freezer for suspend to RAM/standby" \
+		if ARCH_WANTS_FREEZER_CONTROL || BROKEN
+	depends on SUSPEND
+	default y
+	help
+	  This allows you to turn off the freezer for suspend. If this is
+	  done, no tasks are frozen for suspend to RAM/standby.
+
+	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
+
+config HIBERNATION_NVS
+	bool
+
+config HIBERNATION
+	bool "Hibernation (aka 'suspend to disk')"
+	depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
+	select HIBERNATION_NVS if HAS_IOMEM
+	---help---
+	  Enable the suspend to disk (STD) functionality, which is usually
+	  called "hibernation" in user interfaces.  STD checkpoints the
+	  system and powers it off; and restores that checkpoint on reboot.
+
+	  You can suspend your machine with 'echo disk > /sys/power/state'
+	  after placing resume=/dev/swappartition on the kernel command line
+	  in your bootloader's configuration file.
+
+	  Alternatively, you can use the additional userland tools available
+	  from <http://suspend.sf.net>.
+
+	  In principle it does not require ACPI or APM, although for example
+	  ACPI will be used for the final steps when it is available.  One
+	  of the reasons to use software suspend is that the firmware hooks
+	  for suspend states like suspend-to-RAM (STR) often don't work very
+	  well with Linux.
+
+	  It creates an image which is saved in your active swap. Upon the next
+	  boot, pass the 'resume=/dev/swappartition' argument to the kernel to
+	  have it detect the saved image, restore memory state from it, and
+	  continue to run as before. If you do not want the previous state to
+	  be reloaded, then use the 'noresume' kernel command line argument.
+	  Note, however, that fsck will be run on your filesystems and you will
+	  need to run mkswap against the swap partition used for the suspend.
+
+	  It also works with swap files to a limited extent (for details see
+	  <file:Documentation/power/swsusp-and-swap-files.txt>).
+
+	  Right now you may boot without resuming and resume later but in the
+	  meantime you cannot use the swap partition(s)/file(s) involved in
+	  suspending.  Also in this case you must not use the filesystems
+	  that were mounted before the suspend.  In particular, you MUST NOT
+	  MOUNT any journaled filesystems mounted before the suspend or they
+	  will get corrupted in a nasty way.
+
+	  For more information take a look at <file:Documentation/power/swsusp.txt>.
+
+config PM_STD_PARTITION
+	string "Default resume partition"
+	depends on HIBERNATION
+	default ""
+	---help---
+	  The default resume partition is the partition that the suspend-
+	  to-disk implementation will look for a suspended disk image. 
+
+	  The partition specified here will be different for almost every user. 
+	  It should be a valid swap partition (at least for now) that is turned
+	  on before suspending. 
+
+	  The partition specified can be overridden by specifying:
+
+		resume=/dev/<other device> 
+
+	  which will set the resume partition to the device specified. 
+
+	  Note there is currently not a way to specify which device to save the
+	  suspended image to. It will simply pick the first available swap 
+	  device.
+
+config HIBERNATION_ON_MEMORY
+	bool "Hibernation on memory"
+	depends on PM
+	select HIBERNATION
+	---help---
+	  Enable the hibernation on memory (HoM) functionality. This techniques
+	  will put the memory in self-refresh and after that it will turn-off
+	  the chip.
+
+config HOM_TAG_VIRTUAL_ADDRESS
+	hex "Hibernation on memory - Tag virtual addess"
+	depends on HIBERNATION_ON_MEMORY
+	depends on SUPERH32
+	default "0x80001800"
+	---help---
+	  Set the Absolute Virtual Address where the Hibernation
+
+config HOM_DEBUG
+	bool "Hibernation on memory - Debug"
+	depends on HIBERNATION_ON_MEMORY
+	depends on SUPERH32
+	default n
+	---help---
+	  This option enables the hom_printk function. This works
+	  like a standard printk on the empty_zero_page.
+	  It's used as a rudimental printk to debug.
+
+config APM_EMULATION
+	tristate "Advanced Power Management Emulation"
+	depends on PM && SYS_SUPPORTS_APM_EMULATION
+	help
+	  APM is a BIOS specification for saving power using several different
+	  techniques. This is mostly useful for battery powered laptops with
+	  APM compliant BIOSes. If you say Y here, the system time will be
+	  reset after a RESUME operation, the /proc/apm device will provide
+	  battery status information, and user-space programs will receive
+	  notification of APM "events" (e.g. battery status change).
+
+	  In order to use APM, you will need supporting software. For location
+	  and more information, read <file:Documentation/power/pm.txt> and the
+	  Battery Powered Linux mini-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  This driver does not spin down disk drives (see the hdparm(8)
+	  manpage ("man 8 hdparm") for that), and it doesn't turn off
+	  VESA-compliant "green" monitors.
+
+	  Generally, if you don't have a battery in your machine, there isn't
+	  much point in using this driver and you should say N. If you get
+	  random kernel OOPSes or reboots that don't seem to be related to
+	  anything, try disabling/enabling this option (or disabling/enabling
+	  APM in your BIOS).
+
+config PM_RUNTIME
+	bool "Run-time PM core functionality"
+	depends on PM
+	---help---
+	  Enable functionality allowing I/O devices to be put into energy-saving
+	  (low power) states at run time (or autosuspended) after a specified
+	  period of inactivity and woken up in response to a hardware-generated
+	  wake-up event or a driver's request.
+
+	  Hardware support is generally required for this functionality to work
+	  and the bus type drivers of the buses the devices are on are
+	  responsible for the actual handling of the autosuspend requests and
+	  wake-up events.
--- a/kernel/kernel/power/Makefile
+++ b/kernel/kernel/power/Makefile
@@ -0,0 +1,15 @@
+
+ifeq ($(CONFIG_PM_DEBUG),y)
+EXTRA_CFLAGS	+=	-DDEBUG
+endif
+
+obj-$(CONFIG_PM)		+= main.o
+obj-$(CONFIG_PM_SLEEP)		+= console.o
+obj-$(CONFIG_FREEZER)		+= process.o
+obj-$(CONFIG_SUSPEND)		+= suspend.o
+obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
+obj-$(CONFIG_HIBERNATION)	+= swsusp.o hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION_NVS)	+= hibernate_nvs.o
+
+obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
+obj-$(CONFIG_HIBERNATION_ON_MEMORY)	+= hom.o
--- a/kernel/kernel/power/console.c
+++ b/kernel/kernel/power/console.c
@@ -0,0 +1,36 @@
+/*
+ * drivers/power/process.c - Functions for saving/restoring console.
+ *
+ * Originally from swsusp.
+ */
+
+#include <linux/vt_kern.h>
+#include <linux/kbd_kern.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include "power.h"
+
+#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
+#define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
+
+static int orig_fgconsole, orig_kmsg;
+
+int pm_prepare_console(void)
+{
+	orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1);
+	if (orig_fgconsole < 0)
+		return 1;
+
+	orig_kmsg = kmsg_redirect;
+	kmsg_redirect = SUSPEND_CONSOLE;
+	return 0;
+}
+
+void pm_restore_console(void)
+{
+	if (orig_fgconsole >= 0) {
+		vt_move_to_console(orig_fgconsole, 0);
+		kmsg_redirect = orig_kmsg;
+	}
+}
+#endif
--- a/kernel/kernel/power/hibernate.c
+++ b/kernel/kernel/power/hibernate.c
@@ -0,0 +1,967 @@
+/*
+ * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/suspend.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/kmod.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pm.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <scsi/scsi_scan.h>
+#include <asm/suspend.h>
+
+#include "power.h"
+
+
+static int noresume = 0;
+static char resume_file[256] = CONFIG_PM_STD_PARTITION;
+dev_t swsusp_resume_device;
+sector_t swsusp_resume_block;
+
+enum {
+	HIBERNATION_INVALID,
+	HIBERNATION_PLATFORM,
+	HIBERNATION_TEST,
+	HIBERNATION_TESTPROC,
+	HIBERNATION_SHUTDOWN,
+	HIBERNATION_REBOOT,
+	/* keep last */
+	__HIBERNATION_AFTER_LAST
+};
+#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1)
+#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1)
+
+static int hibernation_mode = HIBERNATION_SHUTDOWN;
+
+static struct platform_hibernation_ops *hibernation_ops;
+
+/**
+ * hibernation_set_ops - set the global hibernate operations
+ * @ops: the hibernation operations to use in subsequent hibernation transitions
+ */
+
+void hibernation_set_ops(struct platform_hibernation_ops *ops)
+{
+	if (ops && !(ops->begin && ops->end &&  ops->pre_snapshot
+	    && ops->prepare && ops->finish && ops->enter && ops->pre_restore
+	    && ops->restore_cleanup)) {
+		WARN_ON(1);
+		return;
+	}
+	mutex_lock(&pm_mutex);
+	hibernation_ops = ops;
+	if (ops)
+		hibernation_mode = HIBERNATION_PLATFORM;
+	else if (hibernation_mode == HIBERNATION_PLATFORM)
+		hibernation_mode = HIBERNATION_SHUTDOWN;
+
+	mutex_unlock(&pm_mutex);
+}
+
+static bool entering_platform_hibernation;
+
+bool system_entering_hibernation(void)
+{
+	return entering_platform_hibernation;
+}
+EXPORT_SYMBOL(system_entering_hibernation);
+
+#ifdef CONFIG_PM_DEBUG
+static void hibernation_debug_sleep(void)
+{
+	printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
+	mdelay(5000);
+}
+
+static int hibernation_testmode(int mode)
+{
+	if (hibernation_mode == mode) {
+		hibernation_debug_sleep();
+		return 1;
+	}
+	return 0;
+}
+
+static int hibernation_test(int level)
+{
+	if (pm_test_level == level) {
+		hibernation_debug_sleep();
+		return 1;
+	}
+	return 0;
+}
+#else /* !CONFIG_PM_DEBUG */
+static int hibernation_testmode(int mode) { return 0; }
+static int hibernation_test(int level) { return 0; }
+#endif /* !CONFIG_PM_DEBUG */
+
+/**
+ *	platform_begin - tell the platform driver that we're starting
+ *	hibernation
+ */
+
+static int platform_begin(int platform_mode)
+{
+	return (platform_mode && hibernation_ops) ?
+		hibernation_ops->begin() : 0;
+}
+
+/**
+ *	platform_end - tell the platform driver that we've entered the
+ *	working state
+ */
+
+static void platform_end(int platform_mode)
+{
+	if (platform_mode && hibernation_ops)
+		hibernation_ops->end();
+}
+
+/**
+ *	platform_pre_snapshot - prepare the machine for hibernation using the
+ *	platform driver if so configured and return an error code if it fails
+ */
+
+static int platform_pre_snapshot(int platform_mode)
+{
+	return (platform_mode && hibernation_ops) ?
+		hibernation_ops->pre_snapshot() : 0;
+}
+
+/**
+ *	platform_leave - prepare the machine for switching to the normal mode
+ *	of operation using the platform driver (called with interrupts disabled)
+ */
+
+static void platform_leave(int platform_mode)
+{
+	if (platform_mode && hibernation_ops)
+		hibernation_ops->leave();
+}
+
+/**
+ *	platform_finish - switch the machine to the normal mode of operation
+ *	using the platform driver (must be called after platform_prepare())
+ */
+
+static void platform_finish(int platform_mode)
+{
+	if (platform_mode && hibernation_ops)
+		hibernation_ops->finish();
+}
+
+/**
+ *	platform_pre_restore - prepare the platform for the restoration from a
+ *	hibernation image.  If the restore fails after this function has been
+ *	called, platform_restore_cleanup() must be called.
+ */
+
+static int platform_pre_restore(int platform_mode)
+{
+	return (platform_mode && hibernation_ops) ?
+		hibernation_ops->pre_restore() : 0;
+}
+
+/**
+ *	platform_restore_cleanup - switch the platform to the normal mode of
+ *	operation after a failing restore.  If platform_pre_restore() has been
+ *	called before the failing restore, this function must be called too,
+ *	regardless of the result of platform_pre_restore().
+ */
+
+static void platform_restore_cleanup(int platform_mode)
+{
+	if (platform_mode && hibernation_ops)
+		hibernation_ops->restore_cleanup();
+}
+
+/**
+ *	platform_recover - recover the platform from a failure to suspend
+ *	devices.
+ */
+
+static void platform_recover(int platform_mode)
+{
+	if (platform_mode && hibernation_ops && hibernation_ops->recover)
+		hibernation_ops->recover();
+}
+
+/**
+ *	create_image - freeze devices that need to be frozen with interrupts
+ *	off, create the hibernation image and thaw those devices.  Control
+ *	reappears in this routine after a restore.
+ */
+
+static int create_image(int platform_mode)
+{
+	int error;
+
+	error = arch_prepare_suspend();
+	if (error)
+		return error;
+
+	/* At this point, dpm_suspend_start() has been called, but *not*
+	 * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now.
+	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
+	 * become desynchronized with the actual state of the hardware
+	 * at resume time, and evil weirdness ensues.
+	 */
+	error = dpm_suspend_noirq(PMSG_FREEZE);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to power down, "
+			"aborting hibernation\n");
+		return error;
+	}
+
+	error = platform_pre_snapshot(platform_mode);
+	if (error || hibernation_test(TEST_PLATFORM))
+		goto Platform_finish;
+
+	error = disable_nonboot_cpus();
+	if (error || hibernation_test(TEST_CPUS)
+	    || hibernation_testmode(HIBERNATION_TEST))
+		goto Enable_cpus;
+
+	local_irq_disable();
+
+	error = sysdev_suspend(PMSG_FREEZE);
+	if (error) {
+		printk(KERN_ERR "PM: Some system devices failed to power down, "
+			"aborting hibernation\n");
+		goto Enable_irqs;
+	}
+
+	if (hibernation_test(TEST_CORE))
+		goto Power_up;
+
+	in_suspend = 1;
+	save_processor_state();
+	error = swsusp_arch_suspend();
+	if (error)
+		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
+			error);
+	/* Restore control flow magically appears here */
+	restore_processor_state();
+	if (!in_suspend)
+		platform_leave(platform_mode);
+
+ Power_up:
+	sysdev_resume();
+	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
+	 * that suspended with irqs off ... no overall powerup.
+	 */
+
+ Enable_irqs:
+	local_irq_enable();
+
+ Enable_cpus:
+	enable_nonboot_cpus();
+
+ Platform_finish:
+	platform_finish(platform_mode);
+
+	dpm_resume_noirq(in_suspend ?
+		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+
+	return error;
+}
+
+/**
+ *	hibernation_snapshot - quiesce devices and create the hibernation
+ *	snapshot image.
+ *	@platform_mode - if set, use the platform driver, if available, to
+ *			 prepare the platform firmware for the power transition.
+ *
+ *	Must be called with pm_mutex held
+ */
+
+int hibernation_snapshot(int platform_mode)
+{
+	int error;
+
+	error = platform_begin(platform_mode);
+	if (error)
+		return error;
+
+	/* Preallocate image memory before shutting down devices. */
+	error = hibernate_preallocate_memory();
+	if (error)
+		goto Close;
+
+	suspend_console();
+	error = dpm_suspend_start(PMSG_FREEZE);
+	if (error)
+		goto Recover_platform;
+
+	if (hibernation_test(TEST_DEVICES))
+		goto Recover_platform;
+
+	error = create_image(platform_mode);
+	/* Control returns here after successful restore */
+
+ Resume_devices:
+	/* We may need to release the preallocated image pages here. */
+	if (error || !in_suspend)
+		swsusp_free();
+
+	dpm_resume_end(in_suspend ?
+		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+	resume_console();
+ Close:
+	platform_end(platform_mode);
+	return error;
+
+ Recover_platform:
+	platform_recover(platform_mode);
+	goto Resume_devices;
+}
+
+/**
+ *	resume_target_kernel - prepare devices that need to be suspended with
+ *	interrupts off, restore the contents of highmem that have not been
+ *	restored yet from the image and run the low level code that will restore
+ *	the remaining contents of memory and switch to the just restored target
+ *	kernel.
+ */
+
+static int resume_target_kernel(bool platform_mode)
+{
+	int error;
+
+	error = dpm_suspend_noirq(PMSG_QUIESCE);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to power down, "
+			"aborting resume\n");
+		return error;
+	}
+
+	error = platform_pre_restore(platform_mode);
+	if (error)
+		goto Cleanup;
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto Enable_cpus;
+
+	local_irq_disable();
+
+	error = sysdev_suspend(PMSG_QUIESCE);
+	if (error)
+		goto Enable_irqs;
+
+	/* We'll ignore saved state, but this gets preempt count (etc) right */
+	save_processor_state();
+	error = restore_highmem();
+	if (!error) {
+		error = swsusp_arch_resume();
+		/*
+		 * The code below is only ever reached in case of a failure.
+		 * Otherwise execution continues at place where
+		 * swsusp_arch_suspend() was called
+		 */
+		BUG_ON(!error);
+		/* This call to restore_highmem() undos the previous one */
+		restore_highmem();
+	}
+	/*
+	 * The only reason why swsusp_arch_resume() can fail is memory being
+	 * very tight, so we have to free it as soon as we can to avoid
+	 * subsequent failures
+	 */
+	swsusp_free();
+	restore_processor_state();
+	touch_softlockup_watchdog();
+
+	sysdev_resume();
+
+ Enable_irqs:
+	local_irq_enable();
+
+ Enable_cpus:
+	enable_nonboot_cpus();
+
+ Cleanup:
+	platform_restore_cleanup(platform_mode);
+
+	dpm_resume_noirq(PMSG_RECOVER);
+
+	return error;
+}
+
+/**
+ *	hibernation_restore - quiesce devices and restore the hibernation
+ *	snapshot image.  If successful, control returns in hibernation_snaphot()
+ *	@platform_mode - if set, use the platform driver, if available, to
+ *			 prepare the platform firmware for the transition.
+ *
+ *	Must be called with pm_mutex held
+ */
+
+int hibernation_restore(int platform_mode)
+{
+	int error;
+
+	pm_prepare_console();
+	suspend_console();
+	error = dpm_suspend_start(PMSG_QUIESCE);
+	if (!error) {
+		error = resume_target_kernel(platform_mode);
+		dpm_resume_end(PMSG_RECOVER);
+	}
+	resume_console();
+	pm_restore_console();
+	return error;
+}
+
+/**
+ *	hibernation_platform_enter - enter the hibernation state using the
+ *	platform driver (if available)
+ */
+
+int hibernation_platform_enter(void)
+{
+	int error;
+
+	if (!hibernation_ops)
+		return -ENOSYS;
+
+	/*
+	 * We have cancelled the power transition by running
+	 * hibernation_ops->finish() before saving the image, so we should let
+	 * the firmware know that we're going to enter the sleep state after all
+	 */
+	error = hibernation_ops->begin();
+	if (error)
+		goto Close;
+
+	entering_platform_hibernation = true;
+	suspend_console();
+	error = dpm_suspend_start(PMSG_HIBERNATE);
+	if (error) {
+		if (hibernation_ops->recover)
+			hibernation_ops->recover();
+		goto Resume_devices;
+	}
+
+	error = dpm_suspend_noirq(PMSG_HIBERNATE);
+	if (error)
+		goto Resume_devices;
+
+	error = hibernation_ops->prepare();
+	if (error)
+		goto Platform_finish;
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto Platform_finish;
+
+	local_irq_disable();
+	sysdev_suspend(PMSG_HIBERNATE);
+	hibernation_ops->enter();
+	/* We should never get here */
+	while (1);
+
+	/*
+	 * We don't need to reenable the nonboot CPUs or resume consoles, since
+	 * the system is going to be halted anyway.
+	 */
+ Platform_finish:
+	hibernation_ops->finish();
+
+	dpm_suspend_noirq(PMSG_RESTORE);
+
+ Resume_devices:
+	entering_platform_hibernation = false;
+	dpm_resume_end(PMSG_RESTORE);
+	resume_console();
+
+ Close:
+	hibernation_ops->end();
+
+	return error;
+}
+
+/**
+ *	power_down - Shut the machine down for hibernation.
+ *
+ *	Use the platform driver, if configured so; otherwise try
+ *	to power off or reboot.
+ */
+
+static void power_down(void)
+{
+	switch (hibernation_mode) {
+	case HIBERNATION_TEST:
+	case HIBERNATION_TESTPROC:
+		break;
+	case HIBERNATION_REBOOT:
+		kernel_restart(NULL);
+		break;
+	case HIBERNATION_PLATFORM:
+		hibernation_platform_enter();
+	case HIBERNATION_SHUTDOWN:
+		kernel_power_off();
+		break;
+	}
+	kernel_halt();
+	/*
+	 * Valid image is on the disk, if we continue we risk serious data
+	 * corruption after resume.
+	 */
+	printk(KERN_CRIT "PM: Please power down manually\n");
+	while(1);
+}
+
+static int prepare_processes(void)
+{
+	int error = 0;
+
+	if (freeze_processes()) {
+		error = -EBUSY;
+		thaw_processes();
+	}
+	return error;
+}
+
+/**
+ *	hibernate - The granpappy of the built-in hibernation management
+ */
+
+int hibernate(void)
+{
+	int error;
+
+	mutex_lock(&pm_mutex);
+	/* The snapshot device should not be opened while we're running */
+	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+		error = -EBUSY;
+		goto Unlock;
+	}
+
+	pm_prepare_console();
+	error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+	if (error)
+		goto Exit;
+
+	error = usermodehelper_disable();
+	if (error)
+		goto Exit;
+
+	/* Allocate memory management structures */
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Exit;
+
+	printk(KERN_INFO "PM: Syncing filesystems ... ");
+	sys_sync();
+	printk("done.\n");
+
+	error = prepare_processes();
+	if (error)
+		goto Finish;
+
+	if (hibernation_test(TEST_FREEZER))
+		goto Thaw;
+
+	if (hibernation_testmode(HIBERNATION_TESTPROC))
+		goto Thaw;
+
+	error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
+	if (error)
+		goto Thaw;
+
+	if (in_suspend) {
+		unsigned int flags = 0;
+
+		if (hibernation_mode == HIBERNATION_PLATFORM)
+			flags |= SF_PLATFORM_MODE;
+		pr_debug("PM: writing image.\n");
+		error = swsusp_write(flags);
+		swsusp_free();
+		if (!error)
+			power_down();
+	} else {
+		pr_debug("PM: Image restored successfully.\n");
+	}
+
+ Thaw:
+	thaw_processes();
+ Finish:
+	free_basic_memory_bitmaps();
+	usermodehelper_enable();
+ Exit:
+	pm_notifier_call_chain(PM_POST_HIBERNATION);
+	pm_restore_console();
+	atomic_inc(&snapshot_device_available);
+ Unlock:
+	mutex_unlock(&pm_mutex);
+	return error;
+}
+
+
+/**
+ *	software_resume - Resume from a saved image.
+ *
+ *	Called as a late_initcall (so all devices are discovered and
+ *	initialized), we call swsusp to see if we have a saved image or not.
+ *	If so, we quiesce devices, the restore the saved image. We will
+ *	return above (in hibernate() ) if everything goes well.
+ *	Otherwise, we fail gracefully and return to the normally
+ *	scheduled program.
+ *
+ */
+
+static int software_resume(void)
+{
+	int error;
+	unsigned int flags;
+
+	/*
+	 * If the user said "noresume".. bail out early.
+	 */
+	if (noresume)
+		return 0;
+
+	/*
+	 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
+	 * is configured into the kernel. Since the regular hibernate
+	 * trigger path is via sysfs which takes a buffer mutex before
+	 * calling hibernate functions (which take pm_mutex) this can
+	 * cause lockdep to complain about a possible ABBA deadlock
+	 * which cannot happen since we're in the boot code here and
+	 * sysfs can't be invoked yet. Therefore, we use a subclass
+	 * here to avoid lockdep complaining.
+	 */
+	mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+
+	if (swsusp_resume_device)
+		goto Check_image;
+
+	if (!strlen(resume_file)) {
+		error = -ENOENT;
+		goto Unlock;
+	}
+
+	pr_debug("PM: Checking image partition %s\n", resume_file);
+
+	/* Check if the device is there */
+	swsusp_resume_device = name_to_dev_t(resume_file);
+	if (!swsusp_resume_device) {
+		/*
+		 * Some device discovery might still be in progress; we need
+		 * to wait for this to finish.
+		 */
+		wait_for_device_probe();
+		/*
+		 * We can't depend on SCSI devices being available after loading
+		 * one of their modules until scsi_complete_async_scans() is
+		 * called and the resume device usually is a SCSI one.
+		 */
+		scsi_complete_async_scans();
+
+		swsusp_resume_device = name_to_dev_t(resume_file);
+		if (!swsusp_resume_device) {
+			error = -ENODEV;
+			goto Unlock;
+		}
+	}
+
+ Check_image:
+	pr_debug("PM: Resume from partition %d:%d\n",
+		MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
+
+	pr_debug("PM: Checking hibernation image.\n");
+	error = swsusp_check();
+	if (error)
+		goto Unlock;
+
+	/* The snapshot device should not be opened while we're running */
+	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+		error = -EBUSY;
+		swsusp_close(FMODE_READ);
+		goto Unlock;
+	}
+
+	pm_prepare_console();
+	error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+	if (error)
+		goto close_finish;
+
+	error = usermodehelper_disable();
+	if (error)
+		goto close_finish;
+
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto close_finish;
+
+	pr_debug("PM: Preparing processes for restore.\n");
+	error = prepare_processes();
+	if (error) {
+		swsusp_close(FMODE_READ);
+		goto Done;
+	}
+
+	pr_debug("PM: Reading hibernation image.\n");
+
+	error = swsusp_read(&flags);
+	swsusp_close(FMODE_READ);
+	if (!error)
+		hibernation_restore(flags & SF_PLATFORM_MODE);
+
+	printk(KERN_ERR "PM: Restore failed, recovering.\n");
+	swsusp_free();
+	thaw_processes();
+ Done:
+	free_basic_memory_bitmaps();
+	usermodehelper_enable();
+ Finish:
+	pm_notifier_call_chain(PM_POST_RESTORE);
+	pm_restore_console();
+	atomic_inc(&snapshot_device_available);
+	/* For success case, the suspend path will release the lock */
+ Unlock:
+	mutex_unlock(&pm_mutex);
+	pr_debug("PM: Resume from disk failed.\n");
+	return error;
+close_finish:
+	swsusp_close(FMODE_READ);
+	goto Finish;
+}
+
+late_initcall(software_resume);
+
+
+static const char * const hibernation_modes[] = {
+	[HIBERNATION_PLATFORM]	= "platform",
+	[HIBERNATION_SHUTDOWN]	= "shutdown",
+	[HIBERNATION_REBOOT]	= "reboot",
+	[HIBERNATION_TEST]	= "test",
+	[HIBERNATION_TESTPROC]	= "testproc",
+};
+
+/**
+ *	disk - Control hibernation mode
+ *
+ *	Suspend-to-disk can be handled in several ways. We have a few options
+ *	for putting the system to sleep - using the platform driver (e.g. ACPI
+ *	or other hibernation_ops), powering off the system or rebooting the
+ *	system (for testing) as well as the two test modes.
+ *
+ *	The system can support 'platform', and that is known a priori (and
+ *	encoded by the presence of hibernation_ops). However, the user may
+ *	choose 'shutdown' or 'reboot' as alternatives, as well as one fo the
+ *	test modes, 'test' or 'testproc'.
+ *
+ *	show() will display what the mode is currently set to.
+ *	store() will accept one of
+ *
+ *	'platform'
+ *	'shutdown'
+ *	'reboot'
+ *	'test'
+ *	'testproc'
+ *
+ *	It will only change to 'platform' if the system
+ *	supports it (as determined by having hibernation_ops).
+ */
+
+static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	int i;
+	char *start = buf;
+
+	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
+		if (!hibernation_modes[i])
+			continue;
+		switch (i) {
+		case HIBERNATION_SHUTDOWN:
+		case HIBERNATION_REBOOT:
+		case HIBERNATION_TEST:
+		case HIBERNATION_TESTPROC:
+			break;
+		case HIBERNATION_PLATFORM:
+			if (hibernation_ops)
+				break;
+			/* not a valid mode, continue with loop */
+			continue;
+		}
+		if (i == hibernation_mode)
+			buf += sprintf(buf, "[%s] ", hibernation_modes[i]);
+		else
+			buf += sprintf(buf, "%s ", hibernation_modes[i]);
+	}
+	buf += sprintf(buf, "\n");
+	return buf-start;
+}
+
+
+static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
+			  const char *buf, size_t n)
+{
+	int error = 0;
+	int i;
+	int len;
+	char *p;
+	int mode = HIBERNATION_INVALID;
+
+	p = memchr(buf, '\n', n);
+	len = p ? p - buf : n;
+
+	mutex_lock(&pm_mutex);
+	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
+		if (len == strlen(hibernation_modes[i])
+		    && !strncmp(buf, hibernation_modes[i], len)) {
+			mode = i;
+			break;
+		}
+	}
+	if (mode != HIBERNATION_INVALID) {
+		switch (mode) {
+		case HIBERNATION_SHUTDOWN:
+		case HIBERNATION_REBOOT:
+		case HIBERNATION_TEST:
+		case HIBERNATION_TESTPROC:
+			hibernation_mode = mode;
+			break;
+		case HIBERNATION_PLATFORM:
+			if (hibernation_ops)
+				hibernation_mode = mode;
+			else
+				error = -EINVAL;
+		}
+	} else
+		error = -EINVAL;
+
+	if (!error)
+		pr_debug("PM: Hibernation mode set to '%s'\n",
+			 hibernation_modes[mode]);
+	mutex_unlock(&pm_mutex);
+	return error ? error : n;
+}
+
+power_attr(disk);
+
+static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
+			   char *buf)
+{
+	return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
+		       MINOR(swsusp_resume_device));
+}
+
+static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
+			    const char *buf, size_t n)
+{
+	unsigned int maj, min;
+	dev_t res;
+	int ret = -EINVAL;
+
+	if (sscanf(buf, "%u:%u", &maj, &min) != 2)
+		goto out;
+
+	res = MKDEV(maj,min);
+	if (maj != MAJOR(res) || min != MINOR(res))
+		goto out;
+
+	mutex_lock(&pm_mutex);
+	swsusp_resume_device = res;
+	mutex_unlock(&pm_mutex);
+	printk(KERN_INFO "PM: Starting manual resume from disk\n");
+	noresume = 0;
+	software_resume();
+	ret = n;
+ out:
+	return ret;
+}
+
+power_attr(resume);
+
+static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr,
+			       char *buf)
+{
+	return sprintf(buf, "%lu\n", image_size);
+}
+
+static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr,
+				const char *buf, size_t n)
+{
+	unsigned long size;
+
+	if (sscanf(buf, "%lu", &size) == 1) {
+		image_size = size;
+		return n;
+	}
+
+	return -EINVAL;
+}
+
+power_attr(image_size);
+
+static struct attribute * g[] = {
+	&disk_attr.attr,
+	&resume_attr.attr,
+	&image_size_attr.attr,
+	NULL,
+};
+
+
+static struct attribute_group attr_group = {
+	.attrs = g,
+};
+
+
+static int __init pm_disk_init(void)
+{
+	return sysfs_create_group(power_kobj, &attr_group);
+}
+
+core_initcall(pm_disk_init);
+
+
+static int __init resume_setup(char *str)
+{
+	if (noresume)
+		return 1;
+
+	strncpy( resume_file, str, 255 );
+	return 1;
+}
+
+static int __init resume_offset_setup(char *str)
+{
+	unsigned long long offset;
+
+	if (noresume)
+		return 1;
+
+	if (sscanf(str, "%llu", &offset) == 1)
+		swsusp_resume_block = offset;
+
+	return 1;
+}
+
+static int __init noresume_setup(char *str)
+{
+	noresume = 1;
+	return 1;
+}
+
+__setup("noresume", noresume_setup);
+__setup("resume_offset=", resume_offset_setup);
+__setup("resume=", resume_setup);
--- a/kernel/kernel/power/hibernate_nvs.c
+++ b/kernel/kernel/power/hibernate_nvs.c
@@ -0,0 +1,135 @@
+/*
+ * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
+ *
+ * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+
+/*
+ * Platforms, like ACPI, may want us to save some memory used by them during
+ * hibernation and to restore the contents of this memory during the subsequent
+ * resume.  The code below implements a mechanism allowing us to do that.
+ */
+
+struct nvs_page {
+	unsigned long phys_start;
+	unsigned int size;
+	void *kaddr;
+	void *data;
+	struct list_head node;
+};
+
+static LIST_HEAD(nvs_list);
+
+/**
+ *	hibernate_nvs_register - register platform NVS memory region to save
+ *	@start - physical address of the region
+ *	@size - size of the region
+ *
+ *	The NVS region need not be page-aligned (both ends) and we arrange
+ *	things so that the data from page-aligned addresses in this region will
+ *	be copied into separate RAM pages.
+ */
+int hibernate_nvs_register(unsigned long start, unsigned long size)
+{
+	struct nvs_page *entry, *next;
+
+	while (size > 0) {
+		unsigned int nr_bytes;
+
+		entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
+		if (!entry)
+			goto Error;
+
+		list_add_tail(&entry->node, &nvs_list);
+		entry->phys_start = start;
+		nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
+		entry->size = (size < nr_bytes) ? size : nr_bytes;
+
+		start += entry->size;
+		size -= entry->size;
+	}
+	return 0;
+
+ Error:
+	list_for_each_entry_safe(entry, next, &nvs_list, node) {
+		list_del(&entry->node);
+		kfree(entry);
+	}
+	return -ENOMEM;
+}
+
+/**
+ *	hibernate_nvs_free - free data pages allocated for saving NVS regions
+ */
+void hibernate_nvs_free(void)
+{
+	struct nvs_page *entry;
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data) {
+			free_page((unsigned long)entry->data);
+			entry->data = NULL;
+			if (entry->kaddr) {
+				iounmap(entry->kaddr);
+				entry->kaddr = NULL;
+			}
+		}
+}
+
+/**
+ *	hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
+ */
+int hibernate_nvs_alloc(void)
+{
+	struct nvs_page *entry;
+
+	list_for_each_entry(entry, &nvs_list, node) {
+		entry->data = (void *)__get_free_page(GFP_KERNEL);
+		if (!entry->data) {
+			hibernate_nvs_free();
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+/**
+ *	hibernate_nvs_save - save NVS memory regions
+ */
+void hibernate_nvs_save(void)
+{
+	struct nvs_page *entry;
+
+	printk(KERN_INFO "PM: Saving platform NVS memory\n");
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data) {
+			entry->kaddr = ioremap(entry->phys_start, entry->size);
+			memcpy(entry->data, entry->kaddr, entry->size);
+		}
+}
+
+/**
+ *	hibernate_nvs_restore - restore NVS memory regions
+ *
+ *	This function is going to be called with interrupts disabled, so it
+ *	cannot iounmap the virtual addresses used to access the NVS region.
+ */
+void hibernate_nvs_restore(void)
+{
+	struct nvs_page *entry;
+
+	printk(KERN_INFO "PM: Restoring platform NVS memory\n");
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data)
+			memcpy(entry->kaddr, entry->data, entry->size);
+}
--- a/kernel/kernel/power/hom.c
+++ b/kernel/kernel/power/hom.c
@@ -0,0 +1,271 @@
+/*
+ * kernel/power/hom.c - Hibernation on Memory core functionality.
+ *
+ * Copyright (c) 2010 STMicroelectronics
+ * Author: Francesco M. Virlinzi  <francesco.virlinzi@st.com>
+ *
+ * This file is released under the GPLv2
+ *
+ * This files reuses most of the code used to manage
+ * system wide suspend and system wide hibernation
+ * to support a new kind of system wide power operation:
+ *
+ * Hibernation on Memory where the
+ * - Chip is off
+ * - DRAM is in self-refresh mode
+ *
+ */
+
+#include <linux/hom.h>
+#include <linux/freezer.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+
+#include <asm-generic/sections.h>
+
+#include "power.h"
+
+static struct platform_hom_ops *hom_ops;
+
+/**
+ *	hom_set_ops - Set the global power method table.
+ *	@ops:   Pointer to ops structure.
+ */
+int hom_set_ops(struct platform_hom_ops *ops)
+{
+	pr_debug("%s\n", __func__);
+	if (!ops)
+		return -EINVAL;
+	mutex_lock(&pm_mutex);
+	hom_ops = ops;
+	mutex_unlock(&pm_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(hom_set_ops);
+
+static inline int platform_begin(void)
+{
+	if (hom_ops->begin)
+		return hom_ops->begin();
+	return 0;
+}
+
+static inline int platform_prepare(void)
+{
+	if (hom_ops->prepare)
+		return hom_ops->prepare();
+
+	return 0;
+}
+
+#ifndef CONFIG_HMEM_MODE_TEST
+static inline int platform_enter(void)
+{
+	if (hom_ops->enter)
+		return hom_ops->enter();
+
+	return 0;
+}
+#else
+static inline int platform_enter(void) { return 0; }
+#endif
+
+static inline int platform_complete(void)
+{
+	if (hom_ops->complete)
+		return hom_ops->complete();
+	return 0;
+}
+
+static inline int platform_end(void)
+{
+	if (hom_ops->end)
+		return hom_ops->end();
+	return 0;
+}
+
+static int prepare_processes(void)
+{
+	int error = 0;
+
+	if (freeze_processes()) {
+		error = -EBUSY;
+		thaw_processes();
+	}
+	return error;
+}
+
+/**
+ *	hom_prepare - Do prep work before entering low-power state.
+ *
+ *	This is common code that is called for each state that we're entering.
+ *	Run suspend notifiers, allocate a console and stop all processes.
+ */
+static int hom_prepare(void)
+{
+	int error;
+
+	pm_prepare_console();
+
+	error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+	if (error)
+		goto Exit;
+
+	error = usermodehelper_disable();
+	if (error)
+		goto Exit;
+
+	pr_info("PM: Syncing filesystems ... ");
+	sys_sync();
+	pr_info("done.\n");
+
+	error = prepare_processes();
+	if (!error)
+		return 0;
+
+	usermodehelper_enable();
+Exit:
+	pm_notifier_call_chain(PM_POST_HIBERNATION);
+	pm_restore_console();
+
+	return error;
+}
+
+/**
+ *	hom_freeze_devices_and_enter -
+ *	freeze devices and enter in hibernation on memory
+ */
+static int hibernation_on_memory_enter(void)
+{
+	int error = 0;
+	unsigned long pe_counter;
+
+	if (!hom_ops)
+		return -ENOSYS;
+
+	pr_debug("[STM]:[PM]: platform_begin\n");
+	error = platform_begin();
+	if (error)
+		goto Close;
+
+	suspend_console();
+
+	pr_debug("[STM]:[PM]: Suspend devices\n");
+	error = dpm_suspend_start(PMSG_FREEZE);
+	if (error)
+		goto Resume_devices;
+
+	pr_debug("[STM]:[PM]: Suspend devices (noirq)\n");
+	error = dpm_suspend_noirq(PMSG_FREEZE);
+	if (error)
+		goto Resume_devices_noirq;
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto Enable_cpus;
+
+	local_irq_disable();
+
+	pr_debug("[STM]:[PM]: Suspend sysdevices\n");
+	error = sysdev_suspend(PMSG_FREEZE);
+	if (error)
+		goto Enable_irqs;
+
+	pr_debug("[STM]:[PM]: platform_prepare\n");
+	error = platform_prepare();
+	if (error) {
+		pr_err("[STM]:[PM]: platform_prepare has refused the HoM\n");
+		goto Skip_enter;
+	}
+
+	pr_debug("[STM]:[PM]: platform_enter\n");
+
+
+	pe_counter = preempt_count();
+
+	platform_enter();
+
+	BUG_ON(pe_counter != preempt_count());
+
+ Skip_enter:
+	pr_debug("[STM]:[PM]: platform_complete\n");
+	platform_complete();
+
+	pr_debug("[STM]:[PM]: Resumed sysdevices\n");
+	sysdev_resume();
+
+ Enable_irqs:
+	local_irq_enable();
+
+ Enable_cpus:
+	enable_nonboot_cpus();
+
+ Resume_devices_noirq:
+
+	pr_debug("[STM]:[PM]: Resume devices (noirq)\n");
+	dpm_resume_noirq(PMSG_RESTORE);
+
+ Resume_devices:
+
+	pr_debug("[STM]:[PM]: Resume devices\n");
+	dpm_resume_end(PMSG_RESTORE);
+
+	resume_console();
+
+ Close:
+	pr_debug("[STM]:[PM]: platform_end\n");
+	platform_end();
+
+	pr_debug("[STM]:[PM]: exit\n");
+	return error;
+}
+
+/**
+ *	hom_finish - Do final work before exiting suspend sequence.
+ *
+ *	Call platform code to clean up, restart processes, and free the
+ *	console that we've allocated. This is not called for suspend-to-disk.
+ */
+static void hom_finish(void)
+{
+	pr_debug("%s\n", __func__);
+
+	thaw_processes();
+
+	usermodehelper_enable();
+
+	pm_notifier_call_chain(PM_POST_HIBERNATION);
+	pm_restore_console();
+}
+
+static int hom_enter(void)
+{
+	int err;
+
+	mutex_lock(&pm_mutex);
+
+	err = hom_prepare();
+	if (err)
+		goto Finish;
+
+	err = hibernation_on_memory_enter();
+
+	hom_finish();
+
+Finish:
+	mutex_unlock(&pm_mutex);
+
+	pr_info("[STM]:[PM]: HoM End...\n");
+	return err;
+}
+
+int hibernate_on_memory(void)
+{
+	return hom_enter();
+}
+EXPORT_SYMBOL(hibernate_on_memory);
--- a/kernel/kernel/power/main.c
+++ b/kernel/kernel/power/main.c
@@ -0,0 +1,256 @@
+/*
+ * kernel/power/main.c - PM subsystem core functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * 
+ * This file is released under the GPLv2
+ *
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/resume-trace.h>
+#include <linux/workqueue.h>
+
+#include <linux/hom.h>
+#include "power.h"
+
+DEFINE_MUTEX(pm_mutex);
+
+unsigned int pm_flags;
+EXPORT_SYMBOL(pm_flags);
+
+#ifdef CONFIG_PM_SLEEP
+
+/* Routines for PM-transition notifications */
+
+static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
+
+int register_pm_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&pm_chain_head, nb);
+}
+EXPORT_SYMBOL_GPL(register_pm_notifier);
+
+int unregister_pm_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&pm_chain_head, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_pm_notifier);
+
+int pm_notifier_call_chain(unsigned long val)
+{
+	return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
+			== NOTIFY_BAD) ? -EINVAL : 0;
+}
+
+#ifdef CONFIG_PM_DEBUG
+int pm_test_level = TEST_NONE;
+
+static const char * const pm_tests[__TEST_AFTER_LAST] = {
+	[TEST_NONE] = "none",
+	[TEST_CORE] = "core",
+	[TEST_CPUS] = "processors",
+	[TEST_PLATFORM] = "platform",
+	[TEST_DEVICES] = "devices",
+	[TEST_FREEZER] = "freezer",
+};
+
+static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr,
+				char *buf)
+{
+	char *s = buf;
+	int level;
+
+	for (level = TEST_FIRST; level <= TEST_MAX; level++)
+		if (pm_tests[level]) {
+			if (level == pm_test_level)
+				s += sprintf(s, "[%s] ", pm_tests[level]);
+			else
+				s += sprintf(s, "%s ", pm_tests[level]);
+		}
+
+	if (s != buf)
+		/* convert the last space to a newline */
+		*(s-1) = '\n';
+
+	return (s - buf);
+}
+
+static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
+				const char *buf, size_t n)
+{
+	const char * const *s;
+	int level;
+	char *p;
+	int len;
+	int error = -EINVAL;
+
+	p = memchr(buf, '\n', n);
+	len = p ? p - buf : n;
+
+	mutex_lock(&pm_mutex);
+
+	level = TEST_FIRST;
+	for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
+		if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) {
+			pm_test_level = level;
+			error = 0;
+			break;
+		}
+
+	mutex_unlock(&pm_mutex);
+
+	return error ? error : n;
+}
+
+power_attr(pm_test);
+#endif /* CONFIG_PM_DEBUG */
+
+#endif /* CONFIG_PM_SLEEP */
+
+struct kobject *power_kobj;
+
+/**
+ *	state - control system power state.
+ *
+ *	show() returns what states are supported, which is hard-coded to
+ *	'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
+ *	'disk' (Suspend-to-Disk).
+ *
+ *	store() accepts one of those strings, translates it into the 
+ *	proper enumerated value, and initiates a suspend transition.
+ */
+static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
+			  char *buf)
+{
+	char *s = buf;
+#ifdef CONFIG_SUSPEND
+	int i;
+
+	for (i = 0; i < PM_SUSPEND_MAX; i++) {
+		if (pm_states[i] && valid_state(i))
+			s += sprintf(s,"%s ", pm_states[i]);
+	}
+#endif
+#ifdef CONFIG_HIBERNATION_ON_MEMORY
+	s += sprintf(s, "%s ", "hom");
+#endif
+#ifdef CONFIG_HIBERNATION
+	s += sprintf(s, "%s\n", "disk");
+#else
+	if (s != buf)
+		/* convert the last space to a newline */
+		*(s-1) = '\n';
+#endif
+	return (s - buf);
+}
+
+static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
+			   const char *buf, size_t n)
+{
+#ifdef CONFIG_SUSPEND
+	suspend_state_t state = PM_SUSPEND_STANDBY;
+	const char * const *s;
+#endif
+	char *p;
+	int len;
+	int error = -EINVAL;
+
+	p = memchr(buf, '\n', n);
+	len = p ? p - buf : n;
+
+	/* First, check if we are requested to hibernate */
+	if (len == 4 && !strncmp(buf, "disk", len)) {
+		error = hibernate();
+  goto Exit;
+	}
+
+#ifdef CONFIG_HIBERNATION_ON_MEMORY
+	/* Second,  check if we are requesting the hibernate on memory */
+	if (len == 3 && !strncmp(buf, "hom", len)) {
+		error = hibernate_on_memory();
+  goto Exit;
+	}
+#endif
+#ifdef CONFIG_SUSPEND
+	for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
+		if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
+			break;
+	}
+	if (state < PM_SUSPEND_MAX && *s)
+		error = enter_state(state);
+#endif
+
+ Exit:
+	return error ? error : n;
+}
+
+power_attr(state);
+
+#ifdef CONFIG_PM_TRACE
+int pm_trace_enabled;
+
+static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "%d\n", pm_trace_enabled);
+}
+
+static ssize_t
+pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
+	       const char *buf, size_t n)
+{
+	int val;
+
+	if (sscanf(buf, "%d", &val) == 1) {
+		pm_trace_enabled = !!val;
+		return n;
+	}
+	return -EINVAL;
+}
+
+power_attr(pm_trace);
+#endif /* CONFIG_PM_TRACE */
+
+static struct attribute * g[] = {
+	&state_attr.attr,
+#ifdef CONFIG_PM_TRACE
+	&pm_trace_attr.attr,
+#endif
+#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG)
+	&pm_test_attr.attr,
+#endif
+	NULL,
+};
+
+static struct attribute_group attr_group = {
+	.attrs = g,
+};
+
+#ifdef CONFIG_PM_RUNTIME
+struct workqueue_struct *pm_wq;
+
+static int __init pm_start_workqueue(void)
+{
+	pm_wq = create_freezeable_workqueue("pm");
+
+	return pm_wq ? 0 : -ENOMEM;
+}
+#else
+static inline int pm_start_workqueue(void) { return 0; }
+#endif
+
+static int __init pm_init(void)
+{
+	int error = pm_start_workqueue();
+	if (error)
+		return error;
+	power_kobj = kobject_create_and_add("power", NULL);
+	if (!power_kobj)
+		return -ENOMEM;
+	return sysfs_create_group(power_kobj, &attr_group);
+}
+
+core_initcall(pm_init);
--- a/kernel/kernel/power/power.h
+++ b/kernel/kernel/power/power.h
@@ -0,0 +1,238 @@
+#include <linux/suspend.h>
+#include <linux/suspend_ioctls.h>
+#include <linux/utsname.h>
+#include <linux/freezer.h>
+
+struct swsusp_info {
+	struct new_utsname	uts;
+	u32			version_code;
+	unsigned long		num_physpages;
+	int			cpus;
+	unsigned long		image_pages;
+	unsigned long		pages;
+	unsigned long		size;
+} __attribute__((aligned(PAGE_SIZE)));
+
+#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_ARCH_HIBERNATION_HEADER
+/* Maximum size of architecture specific data in a hibernation header */
+#define MAX_ARCH_HEADER_SIZE	(sizeof(struct new_utsname) + 4)
+
+extern int arch_hibernation_header_save(void *addr, unsigned int max_size);
+extern int arch_hibernation_header_restore(void *addr);
+
+static inline int init_header_complete(struct swsusp_info *info)
+{
+	return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE);
+}
+
+static inline char *check_image_kernel(struct swsusp_info *info)
+{
+	return arch_hibernation_header_restore(info) ?
+			"architecture specific data" : NULL;
+}
+#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
+
+/*
+ * Keep some memory free so that I/O operations can succeed without paging
+ * [Might this be more than 4 MB?]
+ */
+#define PAGES_FOR_IO	((4096 * 1024) >> PAGE_SHIFT)
+
+/*
+ * Keep 1 MB of memory free so that device drivers can allocate some pages in
+ * their .suspend() routines without breaking the suspend to disk.
+ */
+#define SPARE_PAGES	((1024 * 1024) >> PAGE_SHIFT)
+
+/* kernel/power/hibernate.c */
+extern int hibernation_snapshot(int platform_mode);
+extern int hibernation_restore(int platform_mode);
+extern int hibernation_platform_enter(void);
+#endif
+
+extern int pfn_is_nosave(unsigned long);
+
+#define power_attr(_name) \
+static struct kobj_attribute _name##_attr = {	\
+	.attr	= {				\
+		.name = __stringify(_name),	\
+		.mode = 0644,			\
+	},					\
+	.show	= _name##_show,			\
+	.store	= _name##_store,		\
+}
+
+/* Preferred image size in bytes (default 500 MB) */
+extern unsigned long image_size;
+extern int in_suspend;
+extern dev_t swsusp_resume_device;
+extern sector_t swsusp_resume_block;
+
+extern asmlinkage int swsusp_arch_suspend(void);
+extern asmlinkage int swsusp_arch_resume(void);
+
+extern int create_basic_memory_bitmaps(void);
+extern void free_basic_memory_bitmaps(void);
+extern int hibernate_preallocate_memory(void);
+
+/**
+ *	Auxiliary structure used for reading the snapshot image data and
+ *	metadata from and writing them to the list of page backup entries
+ *	(PBEs) which is the main data structure of swsusp.
+ *
+ *	Using struct snapshot_handle we can transfer the image, including its
+ *	metadata, as a continuous sequence of bytes with the help of
+ *	snapshot_read_next() and snapshot_write_next().
+ *
+ *	The code that writes the image to a storage or transfers it to
+ *	the user land is required to use snapshot_read_next() for this
+ *	purpose and it should not make any assumptions regarding the internal
+ *	structure of the image.  Similarly, the code that reads the image from
+ *	a storage or transfers it from the user land is required to use
+ *	snapshot_write_next().
+ *
+ *	This may allow us to change the internal structure of the image
+ *	in the future with considerably less effort.
+ */
+
+struct snapshot_handle {
+	loff_t		offset;	/* number of the last byte ready for reading
+				 * or writing in the sequence
+				 */
+	unsigned int	cur;	/* number of the block of PAGE_SIZE bytes the
+				 * next operation will refer to (ie. current)
+				 */
+	unsigned int	cur_offset;	/* offset with respect to the current
+					 * block (for the next operation)
+					 */
+	unsigned int	prev;	/* number of the block of PAGE_SIZE bytes that
+				 * was the current one previously
+				 */
+	void		*buffer;	/* address of the block to read from
+					 * or write to
+					 */
+	unsigned int	buf_offset;	/* location to read from or write to,
+					 * given as a displacement from 'buffer'
+					 */
+	int		sync_read;	/* Set to one to notify the caller of
+					 * snapshot_write_next() that it may
+					 * need to call wait_on_bio_chain()
+					 */
+};
+
+/* This macro returns the address from/to which the caller of
+ * snapshot_read_next()/snapshot_write_next() is allowed to
+ * read/write data after the function returns
+ */
+#define data_of(handle)	((handle).buffer + (handle).buf_offset)
+
+extern unsigned int snapshot_additional_pages(struct zone *zone);
+extern unsigned long snapshot_get_image_size(void);
+extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
+extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
+extern void snapshot_write_finalize(struct snapshot_handle *handle);
+extern int snapshot_image_loaded(struct snapshot_handle *handle);
+
+/* If unset, the snapshot device cannot be open. */
+extern atomic_t snapshot_device_available;
+
+extern sector_t alloc_swapdev_block(int swap);
+extern void free_all_swap_pages(int swap);
+extern int swsusp_swap_in_use(void);
+
+/*
+ * Flags that can be passed from the hibernatig hernel to the "boot" kernel in
+ * the image header.
+ */
+#define SF_PLATFORM_MODE	1
+
+/* kernel/power/hibernate.c */
+extern int swsusp_check(void);
+extern void swsusp_free(void);
+extern int swsusp_read(unsigned int *flags_p);
+extern int swsusp_write(unsigned int flags);
+extern void swsusp_close(fmode_t);
+
+struct timeval;
+/* kernel/power/swsusp.c */
+extern void swsusp_show_speed(struct timeval *, struct timeval *,
+				unsigned int, char *);
+
+#ifdef CONFIG_SUSPEND
+/* kernel/power/suspend.c */
+extern const char *const pm_states[];
+
+extern bool valid_state(suspend_state_t state);
+extern int suspend_devices_and_enter(suspend_state_t state);
+extern int enter_state(suspend_state_t state);
+#else /* !CONFIG_SUSPEND */
+static inline int suspend_devices_and_enter(suspend_state_t state)
+{
+	return -ENOSYS;
+}
+static inline int enter_state(suspend_state_t state) { return -ENOSYS; }
+static inline bool valid_state(suspend_state_t state) { return false; }
+#endif /* !CONFIG_SUSPEND */
+
+#ifdef CONFIG_PM_TEST_SUSPEND
+/* kernel/power/suspend_test.c */
+extern void suspend_test_start(void);
+extern void suspend_test_finish(const char *label);
+#else /* !CONFIG_PM_TEST_SUSPEND */
+static inline void suspend_test_start(void) {}
+static inline void suspend_test_finish(const char *label) {}
+#endif /* !CONFIG_PM_TEST_SUSPEND */
+
+#ifdef CONFIG_PM_SLEEP
+/* kernel/power/main.c */
+extern int pm_notifier_call_chain(unsigned long val);
+#endif
+
+#ifdef CONFIG_HIGHMEM
+int restore_highmem(void);
+#else
+static inline unsigned int count_highmem_pages(void) { return 0; }
+static inline int restore_highmem(void) { return 0; }
+#endif
+
+/*
+ * Suspend test levels
+ */
+enum {
+	/* keep first */
+	TEST_NONE,
+	TEST_CORE,
+	TEST_CPUS,
+	TEST_PLATFORM,
+	TEST_DEVICES,
+	TEST_FREEZER,
+	/* keep last */
+	__TEST_AFTER_LAST
+};
+
+#define TEST_FIRST	TEST_NONE
+#define TEST_MAX	(__TEST_AFTER_LAST - 1)
+
+extern int pm_test_level;
+
+#ifdef CONFIG_SUSPEND_FREEZER
+static inline int suspend_freeze_processes(void)
+{
+	return freeze_processes();
+}
+
+static inline void suspend_thaw_processes(void)
+{
+	thaw_processes();
+}
+#else
+static inline int suspend_freeze_processes(void)
+{
+	return 0;
+}
+
+static inline void suspend_thaw_processes(void)
+{
+}
+#endif
--- a/kernel/kernel/power/poweroff.c
+++ b/kernel/kernel/power/poweroff.c
@@ -0,0 +1,46 @@
+/*
+ * poweroff.c - sysrq handler to gracefully power down machine.
+ *
+ * This file is released under the GPL v2
+ */
+
+#include <linux/kernel.h>
+#include <linux/sysrq.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <linux/workqueue.h>
+#include <linux/reboot.h>
+#include <linux/cpumask.h>
+
+/*
+ * When the user hits Sys-Rq o to power down the machine this is the
+ * callback we use.
+ */
+
+static void do_poweroff(struct work_struct *dummy)
+{
+	kernel_power_off();
+}
+
+static DECLARE_WORK(poweroff_work, do_poweroff);
+
+static void handle_poweroff(int key, struct tty_struct *tty)
+{
+	/* run sysrq poweroff on boot cpu */
+	schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
+}
+
+static struct sysrq_key_op	sysrq_poweroff_op = {
+	.handler        = handle_poweroff,
+	.help_msg       = "powerOff",
+	.action_msg     = "Power Off",
+	.enable_mask	= SYSRQ_ENABLE_BOOT,
+};
+
+static int pm_sysrq_init(void)
+{
+	register_sysrq_key('o', &sysrq_poweroff_op);
+	return 0;
+}
+
+subsys_initcall(pm_sysrq_init);
--- a/kernel/kernel/power/process.c
+++ b/kernel/kernel/power/process.c
@@ -0,0 +1,160 @@
+/*
+ * drivers/power/process.c - Functions for starting/stopping processes on 
+ *                           suspend transitions.
+ *
+ * Originally from swsusp.
+ */
+
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/oom.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+
+/* 
+ * Timeout for stopping processes
+ */
+#define TIMEOUT	(20 * HZ)
+
+static inline int freezeable(struct task_struct * p)
+{
+	if ((p == current) ||
+	    (p->flags & PF_NOFREEZE) ||
+	    (p->exit_state != 0))
+		return 0;
+	return 1;
+}
+
+static int try_to_freeze_tasks(bool sig_only)
+{
+	struct task_struct *g, *p;
+	unsigned long end_time;
+	unsigned int todo;
+	struct timeval start, end;
+	u64 elapsed_csecs64;
+	unsigned int elapsed_csecs;
+
+	do_gettimeofday(&start);
+
+	end_time = jiffies + TIMEOUT;
+	do {
+		todo = 0;
+		read_lock(&tasklist_lock);
+		do_each_thread(g, p) {
+			if (frozen(p) || !freezeable(p))
+				continue;
+
+			if (!freeze_task(p, sig_only))
+				continue;
+
+			/*
+			 * Now that we've done set_freeze_flag, don't
+			 * perturb a task in TASK_STOPPED or TASK_TRACED.
+			 * It is "frozen enough".  If the task does wake
+			 * up, it will immediately call try_to_freeze.
+			 */
+			if (!task_is_stopped_or_traced(p) &&
+			    !freezer_should_skip(p))
+				todo++;
+		} while_each_thread(g, p);
+		read_unlock(&tasklist_lock);
+		yield();			/* Yield is okay here */
+		if (time_after(jiffies, end_time))
+			break;
+	} while (todo);
+
+	do_gettimeofday(&end);
+	elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
+	do_div(elapsed_csecs64, NSEC_PER_SEC / 100);
+	elapsed_csecs = elapsed_csecs64;
+
+	if (todo) {
+		/* This does not unfreeze processes that are already frozen
+		 * (we have slightly ugly calling convention in that respect,
+		 * and caller must call thaw_processes() if something fails),
+		 * but it cleans up leftover PF_FREEZE requests.
+		 */
+		printk("\n");
+		printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
+				"(%d tasks refusing to freeze):\n",
+				elapsed_csecs / 100, elapsed_csecs % 100, todo);
+		show_state();
+		read_lock(&tasklist_lock);
+		do_each_thread(g, p) {
+			task_lock(p);
+			if (freezing(p) && !freezer_should_skip(p))
+				printk(KERN_ERR " %s\n", p->comm);
+			cancel_freezing(p);
+			task_unlock(p);
+		} while_each_thread(g, p);
+		read_unlock(&tasklist_lock);
+	} else {
+		printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100,
+			elapsed_csecs % 100);
+	}
+
+	return todo ? -EBUSY : 0;
+}
+
+/**
+ *	freeze_processes - tell processes to enter the refrigerator
+ */
+int freeze_processes(void)
+{
+	int error;
+
+	printk("Freezing user space processes ... ");
+	error = try_to_freeze_tasks(true);
+	if (error)
+		goto Exit;
+	printk("done.\n");
+
+	printk("Freezing remaining freezable tasks ... ");
+	error = try_to_freeze_tasks(false);
+	if (error)
+		goto Exit;
+	printk("done.");
+
+	oom_killer_disable();
+ Exit:
+	BUG_ON(in_atomic());
+	printk("\n");
+
+	return error;
+}
+
+static void thaw_tasks(bool nosig_only)
+{
+	struct task_struct *g, *p;
+
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		if (!freezeable(p))
+			continue;
+
+		if (nosig_only && should_send_signal(p))
+			continue;
+
+		if (cgroup_freezing_or_frozen(p))
+			continue;
+
+		thaw_process(p);
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+}
+
+void thaw_processes(void)
+{
+	oom_killer_enable();
+
+	printk("Restarting tasks ... ");
+	thaw_tasks(true);
+	thaw_tasks(false);
+	schedule();
+	printk("done.\n");
+}
+
--- a/kernel/kernel/power/snapshot.c
+++ b/kernel/kernel/power/snapshot.c
--- a/kernel/kernel/power/suspend.c
+++ b/kernel/kernel/power/suspend.c
@@ -0,0 +1,300 @@
+/*
+ * kernel/power/suspend.c - Suspend to RAM and standby functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/syscalls.h>
+
+#include "power.h"
+
+const char *const pm_states[PM_SUSPEND_MAX] = {
+	[PM_SUSPEND_STANDBY]	= "standby",
+	[PM_SUSPEND_MEM]	= "mem",
+};
+
+static struct platform_suspend_ops *suspend_ops;
+
+/**
+ *	suspend_set_ops - Set the global suspend method table.
+ *	@ops:	Pointer to ops structure.
+ */
+void suspend_set_ops(struct platform_suspend_ops *ops)
+{
+	mutex_lock(&pm_mutex);
+	suspend_ops = ops;
+	mutex_unlock(&pm_mutex);
+}
+
+bool valid_state(suspend_state_t state)
+{
+	/*
+	 * All states need lowlevel support and need to be valid to the lowlevel
+	 * implementation, no valid callback implies that none are valid.
+	 */
+	return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
+}
+
+/**
+ * suspend_valid_only_mem - generic memory-only valid callback
+ *
+ * Platform drivers that implement mem suspend only and only need
+ * to check for that in their .valid callback can use this instead
+ * of rolling their own .valid callback.
+ */
+int suspend_valid_only_mem(suspend_state_t state)
+{
+	return state == PM_SUSPEND_MEM;
+}
+
+static int suspend_test(int level)
+{
+#ifdef CONFIG_PM_DEBUG
+	if (pm_test_level == level) {
+		printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
+		mdelay(5000);
+		return 1;
+	}
+#endif /* !CONFIG_PM_DEBUG */
+	return 0;
+}
+
+/**
+ *	suspend_prepare - Do prep work before entering low-power state.
+ *
+ *	This is common code that is called for each state that we're entering.
+ *	Run suspend notifiers, allocate a console and stop all processes.
+ */
+static int suspend_prepare(void)
+{
+	int error;
+
+	if (!suspend_ops || !suspend_ops->enter)
+		return -EPERM;
+
+	pm_prepare_console();
+
+	error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+	if (error)
+		goto Finish;
+
+	error = usermodehelper_disable();
+	if (error)
+		goto Finish;
+
+	error = suspend_freeze_processes();
+	if (!error)
+		return 0;
+
+	suspend_thaw_processes();
+	usermodehelper_enable();
+ Finish:
+	pm_notifier_call_chain(PM_POST_SUSPEND);
+	pm_restore_console();
+	return error;
+}
+
+/* default implementation */
+void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
+{
+	local_irq_disable();
+}
+
+/* default implementation */
+void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
+{
+	local_irq_enable();
+}
+
+/**
+ *	suspend_enter - enter the desired system sleep state.
+ *	@state:		state to enter
+ *
+ *	This function should be called after devices have been suspended.
+ */
+static int suspend_enter(suspend_state_t state)
+{
+	int error;
+
+	if (suspend_ops->prepare) {
+		error = suspend_ops->prepare();
+		if (error)
+			return error;
+	}
+
+	error = dpm_suspend_noirq(PMSG_SUSPEND);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to power down\n");
+		goto Platfrom_finish;
+	}
+
+	if (suspend_ops->prepare_late) {
+		error = suspend_ops->prepare_late();
+		if (error)
+			goto Power_up_devices;
+	}
+
+	if (suspend_test(TEST_PLATFORM))
+		goto Platform_wake;
+
+	error = disable_nonboot_cpus();
+	if (error || suspend_test(TEST_CPUS))
+		goto Enable_cpus;
+
+	arch_suspend_disable_irqs();
+	BUG_ON(!irqs_disabled());
+
+	error = sysdev_suspend(PMSG_SUSPEND);
+	if (!error) {
+		if (!suspend_test(TEST_CORE))
+			error = suspend_ops->enter(state);
+		sysdev_resume();
+	}
+
+	arch_suspend_enable_irqs();
+	BUG_ON(irqs_disabled());
+
+ Enable_cpus:
+	enable_nonboot_cpus();
+
+ Platform_wake:
+	if (suspend_ops->wake)
+		suspend_ops->wake();
+
+ Power_up_devices:
+	dpm_resume_noirq(PMSG_RESUME);
+
+ Platfrom_finish:
+	if (suspend_ops->finish)
+		suspend_ops->finish();
+
+	return error;
+}
+
+/**
+ *	suspend_devices_and_enter - suspend devices and enter the desired system
+ *				    sleep state.
+ *	@state:		  state to enter
+ */
+int suspend_devices_and_enter(suspend_state_t state)
+{
+	int error;
+
+	if (!suspend_ops)
+		return -ENOSYS;
+
+	if (suspend_ops->begin) {
+		error = suspend_ops->begin(state);
+		if (error)
+			goto Close;
+	}
+	suspend_console();
+	suspend_test_start();
+	error = dpm_suspend_start(PMSG_SUSPEND);
+	if (error) {
+		printk(KERN_ERR "PM: Some devices failed to suspend\n");
+		goto Recover_platform;
+	}
+	suspend_test_finish("suspend devices");
+	if (suspend_test(TEST_DEVICES))
+		goto Recover_platform;
+
+	suspend_enter(state);
+
+ Resume_devices:
+	suspend_test_start();
+	dpm_resume_end(PMSG_RESUME);
+	suspend_test_finish("resume devices");
+	resume_console();
+ Close:
+	if (suspend_ops->end)
+		suspend_ops->end();
+	return error;
+
+ Recover_platform:
+	if (suspend_ops->recover)
+		suspend_ops->recover();
+	goto Resume_devices;
+}
+
+/**
+ *	suspend_finish - Do final work before exiting suspend sequence.
+ *
+ *	Call platform code to clean up, restart processes, and free the
+ *	console that we've allocated. This is not called for suspend-to-disk.
+ */
+static void suspend_finish(void)
+{
+	suspend_thaw_processes();
+	usermodehelper_enable();
+	pm_notifier_call_chain(PM_POST_SUSPEND);
+	pm_restore_console();
+}
+
+/**
+ *	enter_state - Do common work of entering low-power state.
+ *	@state:		pm_state structure for state we're entering.
+ *
+ *	Make sure we're the only ones trying to enter a sleep state. Fail
+ *	if someone has beat us to it, since we don't want anything weird to
+ *	happen when we wake up.
+ *	Then, do the setup for suspend, enter the state, and cleaup (after
+ *	we've woken up).
+ */
+int enter_state(suspend_state_t state)
+{
+	int error;
+
+	if (!valid_state(state))
+		return -ENODEV;
+
+	if (!mutex_trylock(&pm_mutex))
+		return -EBUSY;
+
+	printk(KERN_INFO "PM: Syncing filesystems ... ");
+	sys_sync();
+	printk("done.\n");
+
+	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
+	error = suspend_prepare();
+	if (error)
+		goto Unlock;
+
+	if (suspend_test(TEST_FREEZER))
+		goto Finish;
+
+	pr_debug("PM: Entering %s sleep\n", pm_states[state]);
+	error = suspend_devices_and_enter(state);
+
+ Finish:
+	pr_debug("PM: Finishing wakeup.\n");
+	suspend_finish();
+ Unlock:
+	mutex_unlock(&pm_mutex);
+	return error;
+}
+
+/**
+ *	pm_suspend - Externally visible function for suspending system.
+ *	@state:		Enumerated value of state to enter.
+ *
+ *	Determine whether or not value is within range, get state
+ *	structure, and enter (above).
+ */
+int pm_suspend(suspend_state_t state)
+{
+	if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
+		return enter_state(state);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(pm_suspend);
--- a/kernel/kernel/power/suspend_test.c
+++ b/kernel/kernel/power/suspend_test.c
@@ -0,0 +1,188 @@
+/*
+ * kernel/power/suspend_test.c - Suspend to RAM and standby test facility.
+ *
+ * Copyright (c) 2009 Pavel Machek <pavel@ucw.cz>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/rtc.h>
+
+#include "power.h"
+
+/*
+ * We test the system suspend code by setting an RTC wakealarm a short
+ * time in the future, then suspending.  Suspending the devices won't
+ * normally take long ... some systems only need a few milliseconds.
+ *
+ * The time it takes is system-specific though, so when we test this
+ * during system bootup we allow a LOT of time.
+ */
+#define TEST_SUSPEND_SECONDS	10
+
+static unsigned long suspend_test_start_time;
+
+void suspend_test_start(void)
+{
+	/* FIXME Use better timebase than "jiffies", ideally a clocksource.
+	 * What we want is a hardware counter that will work correctly even
+	 * during the irqs-are-off stages of the suspend/resume cycle...
+	 */
+	suspend_test_start_time = jiffies;
+}
+
+void suspend_test_finish(const char *label)
+{
+	long nj = jiffies - suspend_test_start_time;
+	unsigned msec;
+
+	msec = jiffies_to_msecs(abs(nj));
+	pr_info("PM: %s took %d.%03d seconds\n", label,
+			msec / 1000, msec % 1000);
+
+	/* Warning on suspend means the RTC alarm period needs to be
+	 * larger -- the system was sooo slooowwww to suspend that the
+	 * alarm (should have) fired before the system went to sleep!
+	 *
+	 * Warning on either suspend or resume also means the system
+	 * has some performance issues.  The stack dump of a WARN_ON
+	 * is more likely to get the right attention than a printk...
+	 */
+	WARN(msec > (TEST_SUSPEND_SECONDS * 1000),
+	     "Component: %s, time: %u\n", label, msec);
+}
+
+/*
+ * To test system suspend, we need a hands-off mechanism to resume the
+ * system.  RTCs wake alarms are a common self-contained mechanism.
+ */
+
+static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
+{
+	static char err_readtime[] __initdata =
+		KERN_ERR "PM: can't read %s time, err %d\n";
+	static char err_wakealarm [] __initdata =
+		KERN_ERR "PM: can't set %s wakealarm, err %d\n";
+	static char err_suspend[] __initdata =
+		KERN_ERR "PM: suspend test failed, error %d\n";
+	static char info_test[] __initdata =
+		KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
+
+	unsigned long		now;
+	struct rtc_wkalrm	alm;
+	int			status;
+
+	/* this may fail if the RTC hasn't been initialized */
+	status = rtc_read_time(rtc, &alm.time);
+	if (status < 0) {
+		printk(err_readtime, dev_name(&rtc->dev), status);
+		return;
+	}
+	rtc_tm_to_time(&alm.time, &now);
+
+	memset(&alm, 0, sizeof alm);
+	rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
+	alm.enabled = true;
+
+	status = rtc_set_alarm(rtc, &alm);
+	if (status < 0) {
+		printk(err_wakealarm, dev_name(&rtc->dev), status);
+		return;
+	}
+
+	if (state == PM_SUSPEND_MEM) {
+		printk(info_test, pm_states[state]);
+		status = pm_suspend(state);
+		if (status == -ENODEV)
+			state = PM_SUSPEND_STANDBY;
+	}
+	if (state == PM_SUSPEND_STANDBY) {
+		printk(info_test, pm_states[state]);
+		status = pm_suspend(state);
+	}
+	if (status < 0)
+		printk(err_suspend, status);
+
+	/* Some platforms can't detect that the alarm triggered the
+	 * wakeup, or (accordingly) disable it after it afterwards.
+	 * It's supposed to give oneshot behavior; cope.
+	 */
+	alm.enabled = false;
+	rtc_set_alarm(rtc, &alm);
+}
+
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+	struct rtc_device *candidate = to_rtc_device(dev);
+
+	if (!candidate->ops->set_alarm)
+		return 0;
+	if (!device_may_wakeup(candidate->dev.parent))
+		return 0;
+
+	*(const char **)name_ptr = dev_name(dev);
+	return 1;
+}
+
+/*
+ * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
+ * at startup time.  They're normally disabled, for faster boot and because
+ * we can't know which states really work on this particular system.
+ */
+static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
+
+static char warn_bad_state[] __initdata =
+	KERN_WARNING "PM: can't test '%s' suspend state\n";
+
+static int __init setup_test_suspend(char *value)
+{
+	unsigned i;
+
+	/* "=mem" ==> "mem" */
+	value++;
+	for (i = 0; i < PM_SUSPEND_MAX; i++) {
+		if (!pm_states[i])
+			continue;
+		if (strcmp(pm_states[i], value) != 0)
+			continue;
+		test_state = (__force suspend_state_t) i;
+		return 0;
+	}
+	printk(warn_bad_state, value);
+	return 0;
+}
+__setup("test_suspend", setup_test_suspend);
+
+static int __init test_suspend(void)
+{
+	static char		warn_no_rtc[] __initdata =
+		KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
+
+	char			*pony = NULL;
+	struct rtc_device	*rtc = NULL;
+
+	/* PM is initialized by now; is that state testable? */
+	if (test_state == PM_SUSPEND_ON)
+		goto done;
+	if (!valid_state(test_state)) {
+		printk(warn_bad_state, pm_states[test_state]);
+		goto done;
+	}
+
+	/* RTCs have initialized by now too ... can we use one? */
+	class_find_device(rtc_class, NULL, &pony, has_wakealarm);
+	if (pony)
+		rtc = rtc_class_open(pony);
+	if (!rtc) {
+		printk(warn_no_rtc);
+		goto done;
+	}
+
+	/* go for it */
+	test_wakealarm(rtc, test_state);
+	rtc_class_close(rtc);
+done:
+	return 0;
+}
+late_initcall(test_suspend);
--- a/kernel/kernel/power/swap.c
+++ b/kernel/kernel/power/swap.c
@@ -0,0 +1,646 @@
+/*
+ * linux/kernel/power/swap.c
+ *
+ * This file provides functions for reading the suspend image from
+ * and writing it to a swap partition.
+ *
+ * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <linux/genhd.h>
+#include <linux/device.h>
+#include <linux/buffer_head.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pm.h>
+
+#include "power.h"
+
+#define SWSUSP_SIG	"S1SUSPEND"
+
+struct swsusp_header {
+	char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
+	sector_t image;
+	unsigned int flags;	/* Flags to pass to the "boot" kernel */
+	char	orig_sig[10];
+	char	sig[10];
+} __attribute__((packed));
+
+static struct swsusp_header *swsusp_header;
+
+/*
+ * General things
+ */
+
+static unsigned short root_swap = 0xffff;
+static struct block_device *resume_bdev;
+
+/**
+ *	submit - submit BIO request.
+ *	@rw:	READ or WRITE.
+ *	@off	physical offset of page.
+ *	@page:	page we're reading or writing.
+ *	@bio_chain: list of pending biod (for async reading)
+ *
+ *	Straight from the textbook - allocate and initialize the bio.
+ *	If we're reading, make sure the page is marked as dirty.
+ *	Then submit it and, if @bio_chain == NULL, wait.
+ */
+static int submit(int rw, pgoff_t page_off, struct page *page,
+			struct bio **bio_chain)
+{
+	const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+	struct bio *bio;
+
+	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
+	bio->bi_bdev = resume_bdev;
+	bio->bi_end_io = end_swap_bio_read;
+
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+		printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
+			page_off);
+		bio_put(bio);
+		return -EFAULT;
+	}
+
+	lock_page(page);
+	bio_get(bio);
+
+	if (bio_chain == NULL) {
+		submit_bio(bio_rw, bio);
+		wait_on_page_locked(page);
+		if (rw == READ)
+			bio_set_pages_dirty(bio);
+		bio_put(bio);
+	} else {
+		if (rw == READ)
+			get_page(page);	/* These pages are freed later */
+		bio->bi_private = *bio_chain;
+		*bio_chain = bio;
+		submit_bio(bio_rw, bio);
+	}
+	return 0;
+}
+
+static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+	return submit(READ, page_off, virt_to_page(addr), bio_chain);
+}
+
+static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+	return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
+}
+
+static int wait_on_bio_chain(struct bio **bio_chain)
+{
+	struct bio *bio;
+	struct bio *next_bio;
+	int ret = 0;
+
+	if (bio_chain == NULL)
+		return 0;
+
+	bio = *bio_chain;
+	if (bio == NULL)
+		return 0;
+	while (bio) {
+		struct page *page;
+
+		next_bio = bio->bi_private;
+		page = bio->bi_io_vec[0].bv_page;
+		wait_on_page_locked(page);
+		if (!PageUptodate(page) || PageError(page))
+			ret = -EIO;
+		put_page(page);
+		bio_put(bio);
+		bio = next_bio;
+	}
+	*bio_chain = NULL;
+	return ret;
+}
+
+/*
+ * Saving part
+ */
+
+static int mark_swapfiles(sector_t start, unsigned int flags)
+{
+	int error;
+
+	bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+	if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
+	    !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
+		memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
+		memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
+		swsusp_header->image = start;
+		swsusp_header->flags = flags;
+		error = bio_write_page(swsusp_resume_block,
+					swsusp_header, NULL);
+	} else {
+		printk(KERN_ERR "PM: Swap header not found!\n");
+		error = -ENODEV;
+	}
+	return error;
+}
+
+/**
+ *	swsusp_swap_check - check if the resume device is a swap device
+ *	and get its index (if so)
+ */
+
+static int swsusp_swap_check(void) /* This is called before saving image */
+{
+	int res;
+
+	res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
+			&resume_bdev);
+	if (res < 0)
+		return res;
+
+	root_swap = res;
+	res = blkdev_get(resume_bdev, FMODE_WRITE);
+	if (res)
+		return res;
+
+	res = set_blocksize(resume_bdev, PAGE_SIZE);
+	if (res < 0)
+		blkdev_put(resume_bdev, FMODE_WRITE);
+
+	return res;
+}
+
+/**
+ *	write_page - Write one page to given swap location.
+ *	@buf:		Address we're writing.
+ *	@offset:	Offset of the swap page we're writing to.
+ *	@bio_chain:	Link the next write BIO here
+ */
+
+static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
+{
+	void *src;
+
+	if (!offset)
+		return -ENOSPC;
+
+	if (bio_chain) {
+		src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+		if (src) {
+			memcpy(src, buf, PAGE_SIZE);
+		} else {
+			WARN_ON_ONCE(1);
+			bio_chain = NULL;	/* Go synchronous */
+			src = buf;
+		}
+	} else {
+		src = buf;
+	}
+	return bio_write_page(offset, src, bio_chain);
+}
+
+/*
+ *	The swap map is a data structure used for keeping track of each page
+ *	written to a swap partition.  It consists of many swap_map_page
+ *	structures that contain each an array of MAP_PAGE_SIZE swap entries.
+ *	These structures are stored on the swap and linked together with the
+ *	help of the .next_swap member.
+ *
+ *	The swap map is created during suspend.  The swap map pages are
+ *	allocated and populated one at a time, so we only need one memory
+ *	page to set up the entire structure.
+ *
+ *	During resume we also only need to use one swap_map_page structure
+ *	at a time.
+ */
+
+#define MAP_PAGE_ENTRIES	(PAGE_SIZE / sizeof(sector_t) - 1)
+
+struct swap_map_page {
+	sector_t entries[MAP_PAGE_ENTRIES];
+	sector_t next_swap;
+};
+
+/**
+ *	The swap_map_handle structure is used for handling swap in
+ *	a file-alike way
+ */
+
+struct swap_map_handle {
+	struct swap_map_page *cur;
+	sector_t cur_swap;
+	unsigned int k;
+};
+
+static void release_swap_writer(struct swap_map_handle *handle)
+{
+	if (handle->cur)
+		free_page((unsigned long)handle->cur);
+	handle->cur = NULL;
+}
+
+static int get_swap_writer(struct swap_map_handle *handle)
+{
+	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
+	if (!handle->cur)
+		return -ENOMEM;
+	handle->cur_swap = alloc_swapdev_block(root_swap);
+	if (!handle->cur_swap) {
+		release_swap_writer(handle);
+		return -ENOSPC;
+	}
+	handle->k = 0;
+	return 0;
+}
+
+static int swap_write_page(struct swap_map_handle *handle, void *buf,
+				struct bio **bio_chain)
+{
+	int error = 0;
+	sector_t offset;
+
+	if (!handle->cur)
+		return -EINVAL;
+	offset = alloc_swapdev_block(root_swap);
+	error = write_page(buf, offset, bio_chain);
+	if (error)
+		return error;
+	handle->cur->entries[handle->k++] = offset;
+	if (handle->k >= MAP_PAGE_ENTRIES) {
+		error = wait_on_bio_chain(bio_chain);
+		if (error)
+			goto out;
+		offset = alloc_swapdev_block(root_swap);
+		if (!offset)
+			return -ENOSPC;
+		handle->cur->next_swap = offset;
+		error = write_page(handle->cur, handle->cur_swap, NULL);
+		if (error)
+			goto out;
+		memset(handle->cur, 0, PAGE_SIZE);
+		handle->cur_swap = offset;
+		handle->k = 0;
+	}
+ out:
+	return error;
+}
+
+static int flush_swap_writer(struct swap_map_handle *handle)
+{
+	if (handle->cur && handle->cur_swap)
+		return write_page(handle->cur, handle->cur_swap, NULL);
+	else
+		return -EINVAL;
+}
+
+/**
+ *	save_image - save the suspend image data
+ */
+
+static int save_image(struct swap_map_handle *handle,
+                      struct snapshot_handle *snapshot,
+                      unsigned int nr_to_write)
+{
+	unsigned int m;
+	int ret;
+	int nr_pages;
+	int err2;
+	struct bio *bio;
+	struct timeval start;
+	struct timeval stop;
+
+	printk(KERN_INFO "PM: Saving image data pages (%u pages) ...     ",
+		nr_to_write);
+	m = nr_to_write / 100;
+	if (!m)
+		m = 1;
+	nr_pages = 0;
+	bio = NULL;
+	do_gettimeofday(&start);
+	while (1) {
+		ret = snapshot_read_next(snapshot, PAGE_SIZE);
+		if (ret <= 0)
+			break;
+		ret = swap_write_page(handle, data_of(*snapshot), &bio);
+		if (ret)
+			break;
+		if (!(nr_pages % m))
+			printk("\b\b\b\b%3d%%", nr_pages / m);
+		nr_pages++;
+	}
+	err2 = wait_on_bio_chain(&bio);
+	do_gettimeofday(&stop);
+	if (!ret)
+		ret = err2;
+	if (!ret)
+		printk("\b\b\b\bdone\n");
+	else
+		printk("\n");
+	swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
+	return ret;
+}
+
+/**
+ *	enough_swap - Make sure we have enough swap to save the image.
+ *
+ *	Returns TRUE or FALSE after checking the total amount of swap
+ *	space avaiable from the resume partition.
+ */
+
+static int enough_swap(unsigned int nr_pages)
+{
+	unsigned int free_swap = count_swap_pages(root_swap, 1);
+
+	pr_debug("PM: Free swap pages: %u\n", free_swap);
+	return free_swap > nr_pages + PAGES_FOR_IO;
+}
+
+/**
+ *	swsusp_write - Write entire image and metadata.
+ *	@flags: flags to pass to the "boot" kernel in the image header
+ *
+ *	It is important _NOT_ to umount filesystems at this point. We want
+ *	them synced (in case something goes wrong) but we DO not want to mark
+ *	filesystem clean: it is not. (And it does not matter, if we resume
+ *	correctly, we'll mark system clean, anyway.)
+ */
+
+int swsusp_write(unsigned int flags)
+{
+	struct swap_map_handle handle;
+	struct snapshot_handle snapshot;
+	struct swsusp_info *header;
+	int error;
+
+	error = swsusp_swap_check();
+	if (error) {
+		printk(KERN_ERR "PM: Cannot find swap device, try "
+				"swapon -a.\n");
+		return error;
+	}
+	memset(&snapshot, 0, sizeof(struct snapshot_handle));
+	error = snapshot_read_next(&snapshot, PAGE_SIZE);
+	if (error < PAGE_SIZE) {
+		if (error >= 0)
+			error = -EFAULT;
+
+		goto out;
+	}
+	header = (struct swsusp_info *)data_of(snapshot);
+	if (!enough_swap(header->pages)) {
+		printk(KERN_ERR "PM: Not enough free swap\n");
+		error = -ENOSPC;
+		goto out;
+	}
+	error = get_swap_writer(&handle);
+	if (!error) {
+		sector_t start = handle.cur_swap;
+
+		error = swap_write_page(&handle, header, NULL);
+		if (!error)
+			error = save_image(&handle, &snapshot,
+					header->pages - 1);
+
+		if (!error) {
+			flush_swap_writer(&handle);
+			printk(KERN_INFO "PM: S");
+			error = mark_swapfiles(start, flags);
+			printk("|\n");
+		}
+	}
+	if (error)
+		free_all_swap_pages(root_swap);
+
+	release_swap_writer(&handle);
+ out:
+	swsusp_close(FMODE_WRITE);
+	return error;
+}
+
+/**
+ *	The following functions allow us to read data using a swap map
+ *	in a file-alike way
+ */
+
+static void release_swap_reader(struct swap_map_handle *handle)
+{
+	if (handle->cur)
+		free_page((unsigned long)handle->cur);
+	handle->cur = NULL;
+}
+
+static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
+{
+	int error;
+
+	if (!start)
+		return -EINVAL;
+
+	handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
+	if (!handle->cur)
+		return -ENOMEM;
+
+	error = bio_read_page(start, handle->cur, NULL);
+	if (error) {
+		release_swap_reader(handle);
+		return error;
+	}
+	handle->k = 0;
+	return 0;
+}
+
+static int swap_read_page(struct swap_map_handle *handle, void *buf,
+				struct bio **bio_chain)
+{
+	sector_t offset;
+	int error;
+
+	if (!handle->cur)
+		return -EINVAL;
+	offset = handle->cur->entries[handle->k];
+	if (!offset)
+		return -EFAULT;
+	error = bio_read_page(offset, buf, bio_chain);
+	if (error)
+		return error;
+	if (++handle->k >= MAP_PAGE_ENTRIES) {
+		error = wait_on_bio_chain(bio_chain);
+		handle->k = 0;
+		offset = handle->cur->next_swap;
+		if (!offset)
+			release_swap_reader(handle);
+		else if (!error)
+			error = bio_read_page(offset, handle->cur, NULL);
+	}
+	return error;
+}
+
+/**
+ *	load_image - load the image using the swap map handle
+ *	@handle and the snapshot handle @snapshot
+ *	(assume there are @nr_pages pages to load)
+ */
+
+static int load_image(struct swap_map_handle *handle,
+                      struct snapshot_handle *snapshot,
+                      unsigned int nr_to_read)
+{
+	unsigned int m;
+	int error = 0;
+	struct timeval start;
+	struct timeval stop;
+	struct bio *bio;
+	int err2;
+	unsigned nr_pages;
+
+	printk(KERN_INFO "PM: Loading image data pages (%u pages) ...     ",
+		nr_to_read);
+	m = nr_to_read / 100;
+	if (!m)
+		m = 1;
+	nr_pages = 0;
+	bio = NULL;
+	do_gettimeofday(&start);
+	for ( ; ; ) {
+		error = snapshot_write_next(snapshot, PAGE_SIZE);
+		if (error <= 0)
+			break;
+		error = swap_read_page(handle, data_of(*snapshot), &bio);
+		if (error)
+			break;
+		if (snapshot->sync_read)
+			error = wait_on_bio_chain(&bio);
+		if (error)
+			break;
+		if (!(nr_pages % m))
+			printk("\b\b\b\b%3d%%", nr_pages / m);
+		nr_pages++;
+	}
+	err2 = wait_on_bio_chain(&bio);
+	do_gettimeofday(&stop);
+	if (!error)
+		error = err2;
+	if (!error) {
+		printk("\b\b\b\bdone\n");
+		snapshot_write_finalize(snapshot);
+		if (!snapshot_image_loaded(snapshot))
+			error = -ENODATA;
+	} else
+		printk("\n");
+	swsusp_show_speed(&start, &stop, nr_to_read, "Read");
+	return error;
+}
+
+/**
+ *	swsusp_read - read the hibernation image.
+ *	@flags_p: flags passed by the "frozen" kernel in the image header should
+ *		  be written into this memeory location
+ */
+
+int swsusp_read(unsigned int *flags_p)
+{
+	int error;
+	struct swap_map_handle handle;
+	struct snapshot_handle snapshot;
+	struct swsusp_info *header;
+
+	*flags_p = swsusp_header->flags;
+	if (IS_ERR(resume_bdev)) {
+		pr_debug("PM: Image device not initialised\n");
+		return PTR_ERR(resume_bdev);
+	}
+
+	memset(&snapshot, 0, sizeof(struct snapshot_handle));
+	error = snapshot_write_next(&snapshot, PAGE_SIZE);
+	if (error < PAGE_SIZE)
+		return error < 0 ? error : -EFAULT;
+	header = (struct swsusp_info *)data_of(snapshot);
+	error = get_swap_reader(&handle, swsusp_header->image);
+	if (!error)
+		error = swap_read_page(&handle, header, NULL);
+	if (!error)
+		error = load_image(&handle, &snapshot, header->pages - 1);
+	release_swap_reader(&handle);
+
+	if (!error)
+		pr_debug("PM: Image successfully loaded\n");
+	else
+		pr_debug("PM: Error %d resuming\n", error);
+	return error;
+}
+
+/**
+ *      swsusp_check - Check for swsusp signature in the resume device
+ */
+
+int swsusp_check(void)
+{
+	int error;
+
+	resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+	if (!IS_ERR(resume_bdev)) {
+		set_blocksize(resume_bdev, PAGE_SIZE);
+		memset(swsusp_header, 0, PAGE_SIZE);
+		error = bio_read_page(swsusp_resume_block,
+					swsusp_header, NULL);
+		if (error)
+			goto put;
+
+		if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
+			memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
+			/* Reset swap signature now */
+			error = bio_write_page(swsusp_resume_block,
+						swsusp_header, NULL);
+		} else {
+			error = -EINVAL;
+		}
+
+put:
+		if (error)
+			blkdev_put(resume_bdev, FMODE_READ);
+		else
+			pr_debug("PM: Signature found, resuming\n");
+	} else {
+		error = PTR_ERR(resume_bdev);
+	}
+
+	if (error)
+		pr_debug("PM: Error %d checking image file\n", error);
+
+	return error;
+}
+
+/**
+ *	swsusp_close - close swap device.
+ */
+
+void swsusp_close(fmode_t mode)
+{
+	if (IS_ERR(resume_bdev)) {
+		pr_debug("PM: Image device not initialised\n");
+		return;
+	}
+
+	blkdev_put(resume_bdev, mode);
+}
+
+static int swsusp_header_init(void)
+{
+	swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
+	if (!swsusp_header)
+		panic("Could not allocate memory for swsusp_header\n");
+	return 0;
+}
+
+core_initcall(swsusp_header_init);
--- a/kernel/kernel/power/swsusp.c
+++ b/kernel/kernel/power/swsusp.c
@@ -0,0 +1,188 @@
+/*
+ * linux/kernel/power/swsusp.c
+ *
+ * This file provides code to write suspend image to swap and read it back.
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
+ *
+ * This file is released under the GPLv2.
+ *
+ * I'd like to thank the following people for their work:
+ *
+ * Pavel Machek <pavel@ucw.cz>:
+ * Modifications, defectiveness pointing, being with me at the very beginning,
+ * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
+ *
+ * Steve Doddi <dirk@loth.demon.co.uk>:
+ * Support the possibility of hardware state restoring.
+ *
+ * Raph <grey.havens@earthling.net>:
+ * Support for preserving states of network devices and virtual console
+ * (including X and svgatextmode)
+ *
+ * Kurt Garloff <garloff@suse.de>:
+ * Straightened the critical function in order to prevent compilers from
+ * playing tricks with local variables.
+ *
+ * Andreas Mohr <a.mohr@mailto.de>
+ *
+ * Alex Badea <vampire@go.ro>:
+ * Fixed runaway init
+ *
+ * Rafael J. Wysocki <rjw@sisk.pl>
+ * Reworked the freeing of memory and the handling of swap
+ *
+ * More state savers are welcome. Especially for the scsi layer...
+ *
+ * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
+ */
+
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/swap.h>
+#include <linux/pm.h>
+#include <linux/swapops.h>
+#include <linux/bootmem.h>
+#include <linux/syscalls.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+#include <linux/rbtree.h>
+#include <linux/io.h>
+
+#include "power.h"
+
+int in_suspend __nosavedata = 0;
+
+/**
+ *	The following functions are used for tracing the allocated
+ *	swap pages, so that they can be freed in case of an error.
+ */
+
+struct swsusp_extent {
+	struct rb_node node;
+	unsigned long start;
+	unsigned long end;
+};
+
+static struct rb_root swsusp_extents = RB_ROOT;
+
+static int swsusp_extents_insert(unsigned long swap_offset)
+{
+	struct rb_node **new = &(swsusp_extents.rb_node);
+	struct rb_node *parent = NULL;
+	struct swsusp_extent *ext;
+
+	/* Figure out where to put the new node */
+	while (*new) {
+		ext = container_of(*new, struct swsusp_extent, node);
+		parent = *new;
+		if (swap_offset < ext->start) {
+			/* Try to merge */
+			if (swap_offset == ext->start - 1) {
+				ext->start--;
+				return 0;
+			}
+			new = &((*new)->rb_left);
+		} else if (swap_offset > ext->end) {
+			/* Try to merge */
+			if (swap_offset == ext->end + 1) {
+				ext->end++;
+				return 0;
+			}
+			new = &((*new)->rb_right);
+		} else {
+			/* It already is in the tree */
+			return -EINVAL;
+		}
+	}
+	/* Add the new node and rebalance the tree. */
+	ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
+	if (!ext)
+		return -ENOMEM;
+
+	ext->start = swap_offset;
+	ext->end = swap_offset;
+	rb_link_node(&ext->node, parent, new);
+	rb_insert_color(&ext->node, &swsusp_extents);
+	return 0;
+}
+
+/**
+ *	alloc_swapdev_block - allocate a swap page and register that it has
+ *	been allocated, so that it can be freed in case of an error.
+ */
+
+sector_t alloc_swapdev_block(int swap)
+{
+	unsigned long offset;
+
+	offset = swp_offset(get_swap_page_of_type(swap));
+	if (offset) {
+		if (swsusp_extents_insert(offset))
+			swap_free(swp_entry(swap, offset));
+		else
+			return swapdev_block(swap, offset);
+	}
+	return 0;
+}
+
+/**
+ *	free_all_swap_pages - free swap pages allocated for saving image data.
+ *	It also frees the extents used to register which swap entres had been
+ *	allocated.
+ */
+
+void free_all_swap_pages(int swap)
+{
+	struct rb_node *node;
+
+	while ((node = swsusp_extents.rb_node)) {
+		struct swsusp_extent *ext;
+		unsigned long offset;
+
+		ext = container_of(node, struct swsusp_extent, node);
+		rb_erase(node, &swsusp_extents);
+		for (offset = ext->start; offset <= ext->end; offset++)
+			swap_free(swp_entry(swap, offset));
+
+		kfree(ext);
+	}
+}
+
+int swsusp_swap_in_use(void)
+{
+	return (swsusp_extents.rb_node != NULL);
+}
+
+/**
+ *	swsusp_show_speed - print the time elapsed between two events represented by
+ *	@start and @stop
+ *
+ *	@nr_pages -	number of pages processed between @start and @stop
+ *	@msg -		introductory message to print
+ */
+
+void swsusp_show_speed(struct timeval *start, struct timeval *stop,
+			unsigned nr_pages, char *msg)
+{
+	s64 elapsed_centisecs64;
+	int centisecs;
+	int k;
+	int kps;
+
+	elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
+	do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
+	centisecs = elapsed_centisecs64;
+	if (centisecs == 0)
+		centisecs = 1;	/* avoid div-by-zero */
+	k = nr_pages * (PAGE_SIZE / 1024);
+	kps = (k * 100) / centisecs;
+	printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n",
+			msg, k,
+			centisecs / 100, centisecs % 100,
+			kps / 1000, (kps % 1000) / 10);
+}
--- a/kernel/kernel/power/user.c
+++ b/kernel/kernel/power/user.c
@@ -0,0 +1,451 @@
+/*
+ * linux/kernel/power/user.c
+ *
+ * This file provides the user space interface for software suspend/resume.
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pm.h>
+#include <linux/fs.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <scsi/scsi_scan.h>
+
+#include <asm/uaccess.h>
+
+#include "power.h"
+
+/*
+ * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and
+ * will be removed in the future.  They are only preserved here for
+ * compatibility with existing userland utilities.
+ */
+#define SNAPSHOT_SET_SWAP_FILE	_IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
+#define SNAPSHOT_PMOPS		_IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
+
+#define PMOPS_PREPARE	1
+#define PMOPS_ENTER	2
+#define PMOPS_FINISH	3
+
+/*
+ * NOTE: The following ioctl definitions are wrong and have been replaced with
+ * correct ones.  They are only preserved here for compatibility with existing
+ * userland utilities and will be removed in the future.
+ */
+#define SNAPSHOT_ATOMIC_SNAPSHOT	_IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
+#define SNAPSHOT_SET_IMAGE_SIZE		_IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
+#define SNAPSHOT_AVAIL_SWAP		_IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
+#define SNAPSHOT_GET_SWAP_PAGE		_IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
+
+
+#define SNAPSHOT_MINOR	231
+
+static struct snapshot_data {
+	struct snapshot_handle handle;
+	int swap;
+	int mode;
+	char frozen;
+	char ready;
+	char platform_support;
+} snapshot_state;
+
+atomic_t snapshot_device_available = ATOMIC_INIT(1);
+
+static int snapshot_open(struct inode *inode, struct file *filp)
+{
+	struct snapshot_data *data;
+	int error;
+
+	mutex_lock(&pm_mutex);
+
+	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+		error = -EBUSY;
+		goto Unlock;
+	}
+
+	if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
+		atomic_inc(&snapshot_device_available);
+		error = -ENOSYS;
+		goto Unlock;
+	}
+	if(create_basic_memory_bitmaps()) {
+		atomic_inc(&snapshot_device_available);
+		error = -ENOMEM;
+		goto Unlock;
+	}
+	nonseekable_open(inode, filp);
+	data = &snapshot_state;
+	filp->private_data = data;
+	memset(&data->handle, 0, sizeof(struct snapshot_handle));
+	if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
+		/* Hibernating.  The image device should be accessible. */
+		data->swap = swsusp_resume_device ?
+			swap_type_of(swsusp_resume_device, 0, NULL) : -1;
+		data->mode = O_RDONLY;
+		error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+		if (error)
+			pm_notifier_call_chain(PM_POST_HIBERNATION);
+	} else {
+		/*
+		 * Resuming.  We may need to wait for the image device to
+		 * appear.
+		 */
+		wait_for_device_probe();
+		scsi_complete_async_scans();
+
+		data->swap = -1;
+		data->mode = O_WRONLY;
+		error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+		if (error)
+			pm_notifier_call_chain(PM_POST_RESTORE);
+	}
+	if (error)
+		atomic_inc(&snapshot_device_available);
+	data->frozen = 0;
+	data->ready = 0;
+	data->platform_support = 0;
+
+ Unlock:
+	mutex_unlock(&pm_mutex);
+
+	return error;
+}
+
+static int snapshot_release(struct inode *inode, struct file *filp)
+{
+	struct snapshot_data *data;
+
+	mutex_lock(&pm_mutex);
+
+	swsusp_free();
+	free_basic_memory_bitmaps();
+	data = filp->private_data;
+	free_all_swap_pages(data->swap);
+	if (data->frozen)
+		thaw_processes();
+	pm_notifier_call_chain(data->mode == O_RDONLY ?
+			PM_POST_HIBERNATION : PM_POST_RESTORE);
+	atomic_inc(&snapshot_device_available);
+
+	mutex_unlock(&pm_mutex);
+
+	return 0;
+}
+
+static ssize_t snapshot_read(struct file *filp, char __user *buf,
+                             size_t count, loff_t *offp)
+{
+	struct snapshot_data *data;
+	ssize_t res;
+
+	mutex_lock(&pm_mutex);
+
+	data = filp->private_data;
+	if (!data->ready) {
+		res = -ENODATA;
+		goto Unlock;
+	}
+	res = snapshot_read_next(&data->handle, count);
+	if (res > 0) {
+		if (copy_to_user(buf, data_of(data->handle), res))
+			res = -EFAULT;
+		else
+			*offp = data->handle.offset;
+	}
+
+ Unlock:
+	mutex_unlock(&pm_mutex);
+
+	return res;
+}
+
+static ssize_t snapshot_write(struct file *filp, const char __user *buf,
+                              size_t count, loff_t *offp)
+{
+	struct snapshot_data *data;
+	ssize_t res;
+
+	mutex_lock(&pm_mutex);
+
+	data = filp->private_data;
+	res = snapshot_write_next(&data->handle, count);
+	if (res > 0) {
+		if (copy_from_user(data_of(data->handle), buf, res))
+			res = -EFAULT;
+		else
+			*offp = data->handle.offset;
+	}
+
+	mutex_unlock(&pm_mutex);
+
+	return res;
+}
+
+static long snapshot_ioctl(struct file *filp, unsigned int cmd,
+							unsigned long arg)
+{
+	int error = 0;
+	struct snapshot_data *data;
+	loff_t size;
+	sector_t offset;
+
+	if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
+		return -ENOTTY;
+	if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR)
+		return -ENOTTY;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!mutex_trylock(&pm_mutex))
+		return -EBUSY;
+
+	data = filp->private_data;
+
+	switch (cmd) {
+
+	case SNAPSHOT_FREEZE:
+		if (data->frozen)
+			break;
+
+		printk("Syncing filesystems ... ");
+		sys_sync();
+		printk("done.\n");
+
+		error = usermodehelper_disable();
+		if (error)
+			break;
+
+		error = freeze_processes();
+		if (error) {
+			thaw_processes();
+			usermodehelper_enable();
+		}
+		if (!error)
+			data->frozen = 1;
+		break;
+
+	case SNAPSHOT_UNFREEZE:
+		if (!data->frozen || data->ready)
+			break;
+		thaw_processes();
+		usermodehelper_enable();
+		data->frozen = 0;
+		break;
+
+	case SNAPSHOT_CREATE_IMAGE:
+	case SNAPSHOT_ATOMIC_SNAPSHOT:
+		if (data->mode != O_RDONLY || !data->frozen  || data->ready) {
+			error = -EPERM;
+			break;
+		}
+		error = hibernation_snapshot(data->platform_support);
+		if (!error)
+			error = put_user(in_suspend, (int __user *)arg);
+		if (!error)
+			data->ready = 1;
+		break;
+
+	case SNAPSHOT_ATOMIC_RESTORE:
+		snapshot_write_finalize(&data->handle);
+		if (data->mode != O_WRONLY || !data->frozen ||
+		    !snapshot_image_loaded(&data->handle)) {
+			error = -EPERM;
+			break;
+		}
+		error = hibernation_restore(data->platform_support);
+		break;
+
+	case SNAPSHOT_FREE:
+		swsusp_free();
+		memset(&data->handle, 0, sizeof(struct snapshot_handle));
+		data->ready = 0;
+		break;
+
+	case SNAPSHOT_PREF_IMAGE_SIZE:
+	case SNAPSHOT_SET_IMAGE_SIZE:
+		image_size = arg;
+		break;
+
+	case SNAPSHOT_GET_IMAGE_SIZE:
+		if (!data->ready) {
+			error = -ENODATA;
+			break;
+		}
+		size = snapshot_get_image_size();
+		size <<= PAGE_SHIFT;
+		error = put_user(size, (loff_t __user *)arg);
+		break;
+
+	case SNAPSHOT_AVAIL_SWAP_SIZE:
+	case SNAPSHOT_AVAIL_SWAP:
+		size = count_swap_pages(data->swap, 1);
+		size <<= PAGE_SHIFT;
+		error = put_user(size, (loff_t __user *)arg);
+		break;
+
+	case SNAPSHOT_ALLOC_SWAP_PAGE:
+	case SNAPSHOT_GET_SWAP_PAGE:
+		if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
+			error = -ENODEV;
+			break;
+		}
+		offset = alloc_swapdev_block(data->swap);
+		if (offset) {
+			offset <<= PAGE_SHIFT;
+			error = put_user(offset, (loff_t __user *)arg);
+		} else {
+			error = -ENOSPC;
+		}
+		break;
+
+	case SNAPSHOT_FREE_SWAP_PAGES:
+		if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
+			error = -ENODEV;
+			break;
+		}
+		free_all_swap_pages(data->swap);
+		break;
+
+	case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */
+		if (!swsusp_swap_in_use()) {
+			/*
+			 * User space encodes device types as two-byte values,
+			 * so we need to recode them
+			 */
+			if (old_decode_dev(arg)) {
+				data->swap = swap_type_of(old_decode_dev(arg),
+							0, NULL);
+				if (data->swap < 0)
+					error = -ENODEV;
+			} else {
+				data->swap = -1;
+				error = -EINVAL;
+			}
+		} else {
+			error = -EPERM;
+		}
+		break;
+
+	case SNAPSHOT_S2RAM:
+		if (!data->frozen) {
+			error = -EPERM;
+			break;
+		}
+		/*
+		 * Tasks are frozen and the notifiers have been called with
+		 * PM_HIBERNATION_PREPARE
+		 */
+		error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+		break;
+
+	case SNAPSHOT_PLATFORM_SUPPORT:
+		data->platform_support = !!arg;
+		break;
+
+	case SNAPSHOT_POWER_OFF:
+		if (data->platform_support)
+			error = hibernation_platform_enter();
+		break;
+
+	case SNAPSHOT_PMOPS: /* This ioctl is deprecated */
+		error = -EINVAL;
+
+		switch (arg) {
+
+		case PMOPS_PREPARE:
+			data->platform_support = 1;
+			error = 0;
+			break;
+
+		case PMOPS_ENTER:
+			if (data->platform_support)
+				error = hibernation_platform_enter();
+			break;
+
+		case PMOPS_FINISH:
+			if (data->platform_support)
+				error = 0;
+			break;
+
+		default:
+			printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg);
+
+		}
+		break;
+
+	case SNAPSHOT_SET_SWAP_AREA:
+		if (swsusp_swap_in_use()) {
+			error = -EPERM;
+		} else {
+			struct resume_swap_area swap_area;
+			dev_t swdev;
+
+			error = copy_from_user(&swap_area, (void __user *)arg,
+					sizeof(struct resume_swap_area));
+			if (error) {
+				error = -EFAULT;
+				break;
+			}
+
+			/*
+			 * User space encodes device types as two-byte values,
+			 * so we need to recode them
+			 */
+			swdev = old_decode_dev(swap_area.dev);
+			if (swdev) {
+				offset = swap_area.offset;
+				data->swap = swap_type_of(swdev, offset, NULL);
+				if (data->swap < 0)
+					error = -ENODEV;
+			} else {
+				data->swap = -1;
+				error = -EINVAL;
+			}
+		}
+		break;
+
+	default:
+		error = -ENOTTY;
+
+	}
+
+	mutex_unlock(&pm_mutex);
+
+	return error;
+}
+
+static const struct file_operations snapshot_fops = {
+	.open = snapshot_open,
+	.release = snapshot_release,
+	.read = snapshot_read,
+	.write = snapshot_write,
+	.llseek = no_llseek,
+	.unlocked_ioctl = snapshot_ioctl,
+};
+
+static struct miscdevice snapshot_device = {
+	.minor = SNAPSHOT_MINOR,
+	.name = "snapshot",
+	.fops = &snapshot_fops,
+};
+
+static int __init snapshot_device_init(void)
+{
+	return misc_register(&snapshot_device);
+};
+
+device_initcall(snapshot_device_init);
--- a/Show More
+++ b/Show More