add idl4k kernel firmware version 1.13.0.105

2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions
--- a/kernel/arch/sh/mm/Kconfig
+++ b/kernel/arch/sh/mm/Kconfig
@@ -0,0 +1,361 @@
+menu "Memory management options"
+
+config QUICKLIST
+	def_bool y
+
+config MMU
+        bool "Support for memory management hardware"
+	depends on !CPU_SH2
+	default y
+	help
+	  Some SH processors (such as SH-2/SH-2A) lack an MMU. In order to
+	  boot on these systems, this option must not be set.
+
+	  On other systems (such as the SH-3 and 4) where an MMU exists,
+	  turning this off will boot the kernel on these machines with the
+	  MMU implicitly switched off.
+
+config PAGE_OFFSET
+	hex
+	default "0x80000000" if MMU && SUPERH32
+	default "0x20000000" if MMU && SUPERH64
+	default "0x00000000"
+
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	range 9 64 if PAGE_SIZE_16KB
+	default "9" if PAGE_SIZE_16KB
+	range 7 64 if PAGE_SIZE_64KB
+	default "7" if PAGE_SIZE_64KB
+	range 11 64
+	default "14" if !MMU
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  The page size is not necessarily 4KB. Keep this in mind when
+	  choosing a value for this option.
+
+config MEMORY_START
+	hex "Physical memory start address"
+	default "0x08000000"
+	---help---
+	  Computers built with Hitachi SuperH processors always
+	  map the ROM starting at address zero.  But the processor
+	  does not specify the range that RAM takes.
+
+	  The physical memory (RAM) start address will be automatically
+	  set to 08000000. Other platforms, such as the Solution Engine
+	  boards typically map RAM at 0C000000.
+
+	  Tweak this only when porting to a new machine which does not
+	  already have a defconfig. Changing it from the known correct
+	  value on any of the known systems will only lead to disaster.
+
+config MEMORY_SIZE
+	hex "Physical memory size"
+	default "0x04000000"
+	help
+	  This sets the default memory size assumed by your SH kernel. It can
+	  be overridden as normal by the 'mem=' argument on the kernel command
+	  line. If unsure, consult your board specifications or just leave it
+	  as 0x04000000 which was the default value before this became
+	  configurable.
+
+# Physical addressing modes
+
+config 29BIT
+	def_bool !32BIT
+	depends on SUPERH32
+
+config 32BIT
+	bool
+	default y if CPU_SH5
+
+config SUPPORTS_32BIT
+	bool
+	default n
+
+config PMB_ENABLE
+	bool "Support 32-bit physical addressing through PMB"
+	depends on MMU && EXPERIMENTAL && SUPPORTS_32BIT
+	select 32BIT
+	default y
+	help
+	  If you say Y here, physical addressing will be extended to
+	  32-bits through the SH-4A PMB. If this is not set, legacy
+	  29-bit physical addressing will be used.
+
+choice
+	prompt "PMB handling type"
+	depends on PMB_ENABLE
+	default PMB_FIXED
+
+config PMB
+	bool "PMB"
+	help
+	  If you say Y here, physical addressing will be extended to
+	  32-bits through the SH-4A PMB. If this is not set, legacy
+	  29-bit physical addressing will be used.
+
+config PMB_FIXED
+	bool "fixed PMB"
+	help
+	  If this option is enabled, fixed PMB mappings are inherited
+	  from the boot loader, and the kernel does not attempt dynamic
+	  management. This is the closest to legacy 29-bit physical mode,
+	  and allows systems to support up to 512MiB of system memory.
+
+endchoice
+
+config PMB_64M_TILES
+	bool "Tile P1/P2 region with 64M PMB entries"
+	depends on PMB
+	help
+	  PMB lookups are architecturally defined such that if a miss
+	  occurs when accessing the P1/P2 region a reset will occur.
+	  This can make debugging kernel code very difficult, as a
+	  stray pointer will cause a reset rather than an exception as
+	  on most other architectures.
+
+	  To help with this problem, this option can be enabled which
+	  ensures that the entire P1/P2 region is always filled with
+	  PMB entries, so that a PMB miss can never occur.
+
+	  The disadvantags of enabing this option is that only 64M PMB
+	  entries can be used, as this the only way to guantee that
+	  there will always be sufficient PMB entries to fill the
+	  P1/P2 virtual region. This may mean that some PMB
+	  configurations which work without this option enabled fail
+	  when it is enabled.
+
+	  The second disadvantage is that the mappings created in this
+	  way are completly valid for read and write accesses (there
+	  is no way to cause an exception on a PMB hit either).
+	  Entries configured in this way can be mapped to any chosen
+	  physical address, using the option PMB_64M_TILES_PHYS
+	  below, which defaults to the start of kernel memory, which
+	  may cause corruption of code or data in this region if a
+	  invalid address is written to. It may be possible to choose
+	  another address which is less damaging, but that will depend
+	  on the SoC and application.
+
+config PMB_64M_TILES_PHYS
+	hex "PMB workaround physical memory start address"
+	depends on PMB_64M_TILES
+	default MEMORY_START
+	help
+	  The physical address to be used for any PMB mappings used to
+	  fill in holes in the virtual address space when PMB_64M_TILES
+	  is enabled.
+
+config X2TLB
+	bool "Enable extended TLB mode"
+	depends on (CPU_SHX2 || CPU_SHX3) && MMU && EXPERIMENTAL
+	help
+	  Selecting this option will enable the extended mode of the SH-X2
+	  TLB. For legacy SH-X behaviour and interoperability, say N. For
+	  all of the fun new features and a willingless to submit bug reports,
+	  say Y.
+
+config VSYSCALL
+	bool "Support vsyscall page"
+	depends on MMU && (CPU_SH3 || CPU_SH4)
+	default y
+	help
+	  This will enable support for the kernel mapping a vDSO page
+	  in process space, and subsequently handing down the entry point
+	  to the libc through the ELF auxiliary vector.
+
+	  From the kernel side this is used for the signal trampoline.
+	  For systems with an MMU that can afford to give up a page,
+	  (the default value) say Y.
+
+config NUMA
+	bool "Non Uniform Memory Access (NUMA) Support"
+	depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL
+	default n
+	help
+	  Some SH systems have many various memories scattered around
+	  the address space, each with varying latencies. This enables
+	  support for these blocks by binding them to nodes and allowing
+	  memory policies to be used for prioritizing and controlling
+	  allocation behaviour.
+
+config NODES_SHIFT
+	int
+	default "3" if CPU_SUBTYPE_SHX3
+	default "1"
+	depends on NEED_MULTIPLE_NODES
+
+config ARCH_FLATMEM_ENABLE
+	def_bool y
+	depends on !NUMA
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	select SPARSEMEM_STATIC
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+
+config MAX_ACTIVE_REGIONS
+	int
+	default "6" if (CPU_SUBTYPE_SHX3 && SPARSEMEM)
+	default "2" if SPARSEMEM && (CPU_SUBTYPE_SH7722 || \
+		       CPU_SUBTYPE_SH7785)
+	default "1"
+
+config ARCH_POPULATES_NODE_MAP
+	def_bool y
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool y
+
+config ARCH_ENABLE_MEMORY_HOTPLUG
+	def_bool y
+	depends on SPARSEMEM && MMU
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+	depends on SPARSEMEM && MMU
+
+config ARCH_MEMORY_PROBE
+	def_bool y
+	depends on MEMORY_HOTPLUG
+
+choice
+	prompt "Kernel page size"
+	default PAGE_SIZE_8KB if X2TLB
+	default PAGE_SIZE_4KB
+
+config PAGE_SIZE_4KB
+	bool "4kB"
+	depends on !MMU || !X2TLB
+	help
+	  This is the default page size used by all SuperH CPUs.
+
+config PAGE_SIZE_8KB
+	bool "8kB"
+	depends on !MMU || X2TLB
+	help
+	  This enables 8kB pages as supported by SH-X2 and later MMUs.
+
+config PAGE_SIZE_16KB
+	bool "16kB"
+	depends on !MMU
+	help
+	  This enables 16kB pages on MMU-less SH systems.
+
+config PAGE_SIZE_64KB
+	bool "64kB"
+	depends on !MMU || CPU_SH4 || CPU_SH5
+	help
+	  This enables support for 64kB pages, possible on all SH-4
+	  CPUs and later.
+
+endchoice
+
+choice
+	prompt "HugeTLB page size"
+	depends on HUGETLB_PAGE
+	default HUGETLB_PAGE_SIZE_1MB if PAGE_SIZE_64KB
+	default HUGETLB_PAGE_SIZE_64K
+
+config HUGETLB_PAGE_SIZE_64K
+	bool "64kB"
+	depends on !PAGE_SIZE_64KB
+
+config HUGETLB_PAGE_SIZE_256K
+	bool "256kB"
+	depends on X2TLB
+
+config HUGETLB_PAGE_SIZE_1MB
+	bool "1MB"
+
+config HUGETLB_PAGE_SIZE_4MB
+	bool "4MB"
+	depends on X2TLB
+
+config HUGETLB_PAGE_SIZE_64MB
+	bool "64MB"
+	depends on X2TLB
+
+config HUGETLB_PAGE_SIZE_512MB
+	bool "512MB"
+	depends on CPU_SH5
+
+endchoice
+
+source "mm/Kconfig"
+
+endmenu
+
+menu "Cache configuration"
+
+config SH7705_CACHE_32KB
+	bool "Enable 32KB cache size for SH7705"
+	depends on CPU_SUBTYPE_SH7705
+	default y
+
+choice
+	prompt "Cache mode"
+	default CACHE_WRITEBACK if CPU_SH2A || CPU_SH3 || CPU_SH4 || CPU_SH5
+	default CACHE_WRITETHROUGH if (CPU_SH2 && !CPU_SH2A)
+
+config CACHE_WRITEBACK
+	bool "Write-back"
+
+config CACHE_WRITETHROUGH
+	bool "Write-through"
+	help
+	  Selecting this option will configure the caches in write-through
+	  mode, as opposed to the default write-back configuration.
+
+	  Since there's sill some aliasing issues on SH-4, this option will
+	  unfortunately still require the majority of flushing functions to
+	  be implemented to deal with aliasing.
+
+	  If unsure, say N.
+
+config CACHE_OFF
+	bool "Off"
+
+endchoice
+
+config STM_L2_CACHE
+	bool "STM Level-2 cache support"
+	depends on CPU_ST40_300
+	help
+	  Selecting this option will enable support for Level-2
+	  cache present in some of the STMicroelectronics SOCs.
+
+choice
+	prompt "Default Level-2 cache mode"
+	depends on STM_L2_CACHE
+	default STM_L2_CACHE_WRITEBACK
+	help
+	  Select a mode the Level-2 cache should be configured by
+	  default. This may be changed in runtime using
+	  "/sys/kernel/mm/l2/mode" file.
+
+config STM_L2_CACHE_BYPASSED
+	bool "Bypassed"
+
+config STM_L2_CACHE_WRITETHROUGH
+	bool "Write-through"
+
+config STM_L2_CACHE_WRITEBACK
+	bool "Write-back"
+
+endchoice
+
+endmenu
--- a/kernel/arch/sh/mm/Makefile
+++ b/kernel/arch/sh/mm/Makefile
@@ -0,0 +1,68 @@
+#
+# Makefile for the Linux SuperH-specific parts of the memory manager.
+#
+
+GCOV_PROFILE_pmb.o := n
+
+obj-y			:= cache.o init.o consistent.o mmap.o
+
+cacheops-$(CONFIG_CPU_SH2)		:= cache-sh2.o
+cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
+cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
+cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
+cacheops-$(CONFIG_CPU_SH5)		:= cache-sh5.o flush-sh4.o
+cacheops-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
+
+obj-y			+= $(cacheops-y)
+
+mmu-y			:= nommu.o extable_32.o
+mmu-$(CONFIG_MMU)	:= extable_$(BITS).o fault_$(BITS).o \
+			   ioremap_$(BITS).o kmap.o tlbflush_$(BITS).o
+
+obj-y			+= $(mmu-y)
+obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
+
+ifdef CONFIG_DEBUG_FS
+obj-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
+endif
+
+ifdef CONFIG_MMU
+tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
+tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o
+tlb-$(CONFIG_CPU_SH5)		:= tlb-sh5.o
+tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o
+obj-y				+= $(tlb-y)
+endif
+
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_PMB)		+= pmb.o
+obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
+obj-$(CONFIG_NUMA)		+= numa.o
+obj-$(CONFIG_STM_L2_CACHE)	+= stm-l2-cache.o stm-l2-helper.o
+
+# Special flags for fault_64.o.  This puts restrictions on the number of
+# caller-save registers that the compiler can target when building this file.
+# This is required because the code is called from a context in entry.S where
+# very few registers have been saved in the exception handler (for speed
+# reasons).
+# The caller save registers that have been saved and which can be used are
+# r2,r3,r4,r5 : argument passing
+# r15, r18 : SP and LINK
+# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
+#         use of them, so it's probably beneficial to performance to save them
+#         and have them available for it.
+#
+# The resources not listed below are callee save, i.e. the compiler is free to
+# use any of them and will spill them to the stack itself.
+
+CFLAGS_fault_64.o += -ffixed-r7 \
+	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
+	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
+	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
+	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
+	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
+	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
+	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
+	-fomit-frame-pointer
+
+EXTRA_CFLAGS += -Werror
--- a/kernel/arch/sh/mm/asids-debugfs.c
+++ b/kernel/arch/sh/mm/asids-debugfs.c
@@ -0,0 +1,77 @@
+/*
+ * debugfs ops for process ASIDs
+ *
+ *  Copyright (C) 2000, 2001  Paolo Alberelli
+ *  Copyright (C) 2003 - 2008  Paul Mundt
+ *  Copyright (C) 2003, 2004  Richard Curnow
+ *
+ * Provides a debugfs file that lists out the ASIDs currently associated
+ * with the processes.
+ *
+ * In the SH-5 case, if the DM.PC register is examined through the debug
+ * link, this shows ASID + PC. To make use of this, the PID->ASID
+ * relationship needs to be known. This is primarily for debugging.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <asm/processor.h>
+#include <asm/mmu_context.h>
+
+static int asids_seq_show(struct seq_file *file, void *iter)
+{
+	struct task_struct *p;
+
+	read_lock(&tasklist_lock);
+
+	for_each_process(p) {
+		int pid = p->pid;
+
+		if (unlikely(!pid))
+			continue;
+
+		if (p->mm)
+			seq_printf(file, "%5d : %04lx\n", pid,
+				   cpu_asid(smp_processor_id(), p->mm));
+	}
+
+	read_unlock(&tasklist_lock);
+
+	return 0;
+}
+
+static int asids_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, asids_seq_show, inode->i_private);
+}
+
+static const struct file_operations asids_debugfs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= asids_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init asids_debugfs_init(void)
+{
+	struct dentry *asids_dentry;
+
+	asids_dentry = debugfs_create_file("asids", S_IRUSR, sh_debugfs_root,
+					   NULL, &asids_debugfs_fops);
+	if (!asids_dentry)
+		return -ENOMEM;
+	if (IS_ERR(asids_dentry))
+		return PTR_ERR(asids_dentry);
+
+	return 0;
+}
+module_init(asids_debugfs_init);
+
+MODULE_LICENSE("GPL v2");
--- a/kernel/arch/sh/mm/cache-debugfs.c
+++ b/kernel/arch/sh/mm/cache-debugfs.c
@@ -0,0 +1,154 @@
+/*
+ * debugfs ops for the L1 cache
+ *
+ *  Copyright (C) 2006  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+
+enum cache_type {
+	CACHE_TYPE_ICACHE,
+	CACHE_TYPE_DCACHE,
+	CACHE_TYPE_UNIFIED,
+};
+
+static int __uses_jump_to_uncached cache_seq_show(struct seq_file *file,
+						  void *iter)
+{
+	unsigned int cache_type = (unsigned int)file->private;
+	struct cache_info *cache;
+	unsigned int waysize, way, cache_size;
+	unsigned long ccr, base;
+	static unsigned long addrstart = 0;
+
+	/*
+	 * Go uncached immediately so we don't skew the results any
+	 * more than we already are..
+	 */
+	jump_to_uncached();
+
+	ccr = ctrl_inl(CCR);
+	if ((ccr & CCR_CACHE_ENABLE) == 0) {
+		back_to_cached();
+
+		seq_printf(file, "disabled\n");
+		return 0;
+	}
+
+	if (cache_type == CACHE_TYPE_DCACHE) {
+		base = CACHE_OC_ADDRESS_ARRAY;
+		cache = &current_cpu_data.dcache;
+	} else {
+		base = CACHE_IC_ADDRESS_ARRAY;
+		cache = &current_cpu_data.icache;
+	}
+
+	/*
+	 * Due to the amount of data written out (depending on the cache size),
+	 * we may be iterated over multiple times. In this case, keep track of
+	 * the entry position in addrstart, and rewind it when we've hit the
+	 * end of the cache.
+	 *
+	 * Likewise, the same code is used for multiple caches, so care must
+	 * be taken for bouncing addrstart back and forth so the appropriate
+	 * cache is hit.
+	 */
+	cache_size = cache->ways * cache->sets * cache->linesz;
+	if (((addrstart & 0xff000000) != base) ||
+	     (addrstart & 0x00ffffff) > cache_size)
+		addrstart = base;
+
+	waysize = cache->sets;
+
+	/*
+	 * If the OC is already in RAM mode, we only have
+	 * half of the entries to consider..
+	 */
+	if ((ccr & CCR_CACHE_ORA) && cache_type == CACHE_TYPE_DCACHE)
+		waysize >>= 1;
+
+	waysize <<= cache->entry_shift;
+
+	for (way = 0; way < cache->ways; way++) {
+		unsigned long addr;
+		unsigned int line;
+
+		seq_printf(file, "-----------------------------------------\n");
+		seq_printf(file, "Way %d\n", way);
+		seq_printf(file, "-----------------------------------------\n");
+
+		for (addr = addrstart, line = 0;
+		     addr < addrstart + waysize;
+		     addr += cache->linesz, line++) {
+			unsigned long data = ctrl_inl(addr);
+
+			/* Check the V bit, ignore invalid cachelines */
+			if ((data & 1) == 0)
+				continue;
+
+			/* U: Dirty, cache tag is 10 bits up */
+			seq_printf(file, "%3d: %c 0x%lx\n",
+				   line, data & 2 ? 'U' : ' ',
+				   data & 0x1ffffc00);
+		}
+
+		addrstart += cache->way_incr;
+	}
+
+	back_to_cached();
+
+	return 0;
+}
+
+static int cache_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cache_seq_show, inode->i_private);
+}
+
+static const struct file_operations cache_debugfs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= cache_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init cache_debugfs_init(void)
+{
+	struct dentry *dcache_dentry, *icache_dentry;
+
+	dcache_dentry = debugfs_create_file("dcache", S_IRUSR, sh_debugfs_root,
+					    (unsigned int *)CACHE_TYPE_DCACHE,
+					    &cache_debugfs_fops);
+	if (!dcache_dentry)
+		return -ENOMEM;
+	if (IS_ERR(dcache_dentry))
+		return PTR_ERR(dcache_dentry);
+
+	icache_dentry = debugfs_create_file("icache", S_IRUSR, sh_debugfs_root,
+					    (unsigned int *)CACHE_TYPE_ICACHE,
+					    &cache_debugfs_fops);
+	if (!icache_dentry) {
+		debugfs_remove(dcache_dentry);
+		return -ENOMEM;
+	}
+	if (IS_ERR(icache_dentry)) {
+		debugfs_remove(dcache_dentry);
+		return PTR_ERR(icache_dentry);
+	}
+
+	return 0;
+}
+module_init(cache_debugfs_init);
+
+MODULE_LICENSE("GPL v2");
--- a/kernel/arch/sh/mm/cache-sh2.c
+++ b/kernel/arch/sh/mm/cache-sh2.c
@@ -0,0 +1,91 @@
+/*
+ * arch/sh/mm/cache-sh2.c
+ *
+ * Copyright (C) 2002 Paul Mundt
+ * Copyright (C) 2008 Yoshinori Sato
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/addrspace.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+static void sh2__flush_wback_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		unsigned long addr = CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0);
+		int way;
+		for (way = 0; way < 4; way++) {
+			unsigned long data =  ctrl_inl(addr | (way << 12));
+			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
+				data &= ~SH_CACHE_UPDATED;
+				ctrl_outl(data, addr | (way << 12));
+			}
+		}
+	}
+}
+
+static void sh2__flush_purge_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES)
+		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
+}
+
+static void sh2__flush_invalidate_region(void *start, int size)
+{
+#ifdef CONFIG_CACHE_WRITEBACK
+	/*
+	 * SH-2 does not support individual line invalidation, only a
+	 * global invalidate.
+	 */
+	unsigned long ccr;
+	unsigned long flags;
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	ccr = ctrl_inl(CCR);
+	ccr |= CCR_CACHE_INVALIDATE;
+	ctrl_outl(ccr, CCR);
+
+	back_to_cached();
+	local_irq_restore(flags);
+#else
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES)
+		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
+#endif
+}
+
+void __init sh2_cache_init(void)
+{
+	__flush_wback_region		= sh2__flush_wback_region;
+	__flush_purge_region		= sh2__flush_purge_region;
+	__flush_invalidate_region	= sh2__flush_invalidate_region;
+}
--- a/kernel/arch/sh/mm/cache-sh2a.c
+++ b/kernel/arch/sh/mm/cache-sh2a.c
@@ -0,0 +1,140 @@
+/*
+ * arch/sh/mm/cache-sh2a.c
+ *
+ * Copyright (C) 2008 Yoshinori Sato
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/addrspace.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+static void sh2a__flush_wback_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+	unsigned long flags;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		unsigned long addr = CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0);
+		int way;
+		for (way = 0; way < 4; way++) {
+			unsigned long data =  ctrl_inl(addr | (way << 11));
+			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
+				data &= ~SH_CACHE_UPDATED;
+				ctrl_outl(data, addr | (way << 11));
+			}
+		}
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+static void sh2a__flush_purge_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+	unsigned long flags;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+			  CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
+	}
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+static void sh2a__flush_invalidate_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+	unsigned long flags;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	local_irq_save(flags);
+	jump_to_uncached();
+
+#ifdef CONFIG_CACHE_WRITEBACK
+	ctrl_outl(ctrl_inl(CCR) | CCR_OCACHE_INVALIDATE, CCR);
+	/* I-cache invalidate */
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+			  CACHE_IC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
+	}
+#else
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+			  CACHE_IC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
+		ctrl_outl((v & CACHE_PHYSADDR_MASK),
+			  CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
+	}
+#endif
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+/* WBack O-Cache and flush I-Cache */
+static void sh2a_flush_icache_range(void *args)
+{
+	struct flusher_data *data = args;
+	unsigned long start, end;
+	unsigned long v;
+	unsigned long flags;
+
+	start = data->addr1 & ~(L1_CACHE_BYTES-1);
+	end = (data->addr2 + L1_CACHE_BYTES-1) & ~(L1_CACHE_BYTES-1);
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	for (v = start; v < end; v+=L1_CACHE_BYTES) {
+		unsigned long addr = (v & 0x000007f0);
+		int way;
+		/* O-Cache writeback */
+		for (way = 0; way < 4; way++) {
+			unsigned long data =  ctrl_inl(CACHE_OC_ADDRESS_ARRAY | addr | (way << 11));
+			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
+				data &= ~SH_CACHE_UPDATED;
+				ctrl_outl(data, CACHE_OC_ADDRESS_ARRAY | addr | (way << 11));
+			}
+		}
+		/* I-Cache invalidate */
+		ctrl_outl(addr,
+			  CACHE_IC_ADDRESS_ARRAY | addr | 0x00000008);
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+void __init sh2a_cache_init(void)
+{
+	local_flush_icache_range	= sh2a_flush_icache_range;
+
+	__flush_wback_region		= sh2a__flush_wback_region;
+	__flush_purge_region		= sh2a__flush_purge_region;
+	__flush_invalidate_region	= sh2a__flush_invalidate_region;
+}
--- a/kernel/arch/sh/mm/cache-sh3.c
+++ b/kernel/arch/sh/mm/cache-sh3.c
@@ -0,0 +1,105 @@
+/*
+ * arch/sh/mm/cache-sh3.c
+ *
+ * Copyright (C) 1999, 2000  Niibe Yutaka
+ * Copyright (C) 2002 Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ *
+ * Is this really worth it, or should we just alias this routine
+ * to __flush_purge_region too?
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+
+static void sh3__flush_wback_region(void *start, int size)
+{
+	unsigned long v, j;
+	unsigned long begin, end;
+	unsigned long flags;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		unsigned long addrstart = CACHE_OC_ADDRESS_ARRAY;
+		for (j = 0; j < current_cpu_data.dcache.ways; j++) {
+			unsigned long data, addr, p;
+
+			p = __pa(v);
+			addr = addrstart | (v & current_cpu_data.dcache.entry_mask);
+			local_irq_save(flags);
+			data = ctrl_inl(addr);
+
+			if ((data & CACHE_PHYSADDR_MASK) ==
+			    (p & CACHE_PHYSADDR_MASK)) {
+				data &= ~SH_CACHE_UPDATED;
+				ctrl_outl(data, addr);
+				local_irq_restore(flags);
+				break;
+			}
+			local_irq_restore(flags);
+			addrstart += current_cpu_data.dcache.way_incr;
+		}
+	}
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+static void sh3__flush_purge_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		unsigned long data, addr;
+
+		data = (v & 0xfffffc00); /* _Virtual_ address, ~U, ~V */
+		addr = CACHE_OC_ADDRESS_ARRAY |
+			(v & current_cpu_data.dcache.entry_mask) | SH_CACHE_ASSOC;
+		ctrl_outl(data, addr);
+	}
+}
+
+void __init sh3_cache_init(void)
+{
+	__flush_wback_region = sh3__flush_wback_region;
+	__flush_purge_region = sh3__flush_purge_region;
+
+	/*
+	 * No write back please
+	 *
+	 * Except I don't think there's any way to avoid the writeback.
+	 * So we just alias it to sh3__flush_purge_region(). dwmw2.
+	 */
+	__flush_invalidate_region = sh3__flush_purge_region;
+}
--- a/kernel/arch/sh/mm/cache-sh4.c
+++ b/kernel/arch/sh/mm/cache-sh4.c
@@ -0,0 +1,752 @@
+/*
+ * arch/sh/mm/cache-sh4.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2001 - 2007  Paul Mundt
+ * Copyright (C) 2003  Richard Curnow
+ * Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <linux/module.h>
+
+/*
+ * The maximum number of pages we support up to when doing ranged dcache
+ * flushing. Anything exceeding this will simply flush the dcache in its
+ * entirety.
+ */
+#define MAX_DCACHE_PAGES	64	/* XXX: Tune for ways */
+#define MAX_ICACHE_PAGES	32
+
+static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long);
+
+static void __flush_cache_one(unsigned long addr,
+		unsigned long phys, int way_count, unsigned long way_incr);
+static void (*__flush_cache_one_uncached)(unsigned long addr,
+		unsigned long phys, int way_count, unsigned long way_incr);
+
+/*
+ * Write back the range of D-cache, and purge the I-cache.
+ *
+ * Called from kernel/module.c:sys_init_module and routine for a.out format,
+ * signal handler code and kprobes code
+ */
+static void __uses_jump_to_uncached sh4_flush_icache_range(void *args)
+{
+	struct flusher_data *data = args;
+	unsigned long start, end;
+	unsigned long flags, v;
+	int i;
+
+	start = data->addr1;
+	end = data->addr2;
+
+	/* If there are too many pages then just blow away the caches */
+	if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
+		local_flush_cache_all(NULL);
+		return;
+	}
+
+	/*
+	 * Selectively flush d-cache then invalidate the i-cache.
+	 * This is inefficient, so only use this for small ranges.
+	 */
+	start &= ~(L1_CACHE_BYTES-1);
+	end += L1_CACHE_BYTES-1;
+	end &= ~(L1_CACHE_BYTES-1);
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	for (v = start; v < end; v += L1_CACHE_BYTES) {
+		unsigned long icacheaddr;
+		int j, n;
+
+		__ocbwb(v);
+
+		icacheaddr = CACHE_IC_ADDRESS_ARRAY | (v &
+				cpu_data->icache.entry_mask);
+
+		/* Clear i-cache line valid-bit */
+		n = boot_cpu_data.icache.n_aliases;
+		for (i = 0; i < cpu_data->icache.ways; i++) {
+			for (j = 0; j < n; j++)
+				__raw_writel(0, icacheaddr + (j * PAGE_SIZE));
+			icacheaddr += cpu_data->icache.way_incr;
+		}
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+static inline void flush_cache_one(unsigned long start, unsigned long kaddr)
+{
+	unsigned long flags;
+	struct cache_info *cache;
+	int way_count;
+	unsigned long way_incr;
+	void (*fco)(unsigned long addr, unsigned long kaddr, int way_count,
+			unsigned long way_incr);
+
+	/*
+	 * All types of SH-4 require PC to uncached to operate on the I-cache.
+	 * Some types of SH-4 require PC to be uncached to operate on the
+	 * D-cache.
+	 */
+
+	if (unlikely(start < CACHE_OC_ADDRESS_ARRAY)){
+		cache = &boot_cpu_data.icache;
+		fco = __flush_cache_one_uncached;
+	} else {
+		cache = &boot_cpu_data.dcache;
+		fco = __flush_cache_one;
+	}
+
+	if (unlikely(boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG))
+		fco = __flush_cache_one_uncached;
+
+	way_count = cache->ways;
+	way_incr = cache->way_incr;
+
+	local_irq_save(flags);
+	fco(start | SH_CACHE_ASSOC, kaddr, way_count, way_incr);
+	local_irq_restore(flags);
+}
+
+/*
+ * Called just before the kernel reads a page cache page, or has written
+ * to a page cache page, which may have been mapped into user space.
+ * Write back & invalidate the D-cache of the page.
+ * (To avoid "alias" issues)
+ */
+static void sh4_flush_dcache_page(void *arg)
+{
+	struct page *page = arg;
+#ifndef CONFIG_SMP
+	struct address_space *mapping = page_mapping(page);
+
+	if (mapping && !mapping_mapped(mapping))
+		/* There are no user mappings for this page, so we can
+		 * defer the flush. */
+		set_bit(PG_dcache_dirty, &page->flags);
+	else
+#endif
+		/* page->mapping is NULL for argv/env pages, which
+		 * must be flushed here (there is no call to
+		 * update_mmu_cache in this case). Or there is a user
+		 * mapping for this page, so we flush. */
+		flush_kernel_dcache_page(page);
+
+	wmb();
+}
+
+void flush_kernel_dcache_page_addr(unsigned long kaddr)
+{
+	unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
+	int i, n;
+
+	/* Loop all the D-cache */
+	n = boot_cpu_data.dcache.n_aliases;
+	for (i = 0; i < n; i++, addr += PAGE_SIZE)
+		flush_cache_one(addr, kaddr);
+}
+EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
+
+/* TODO: Selective icache invalidation through IC address array.. */
+static void __uses_jump_to_uncached flush_icache_all(void)
+{
+	unsigned long flags, ccr;
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	/* Flush I-cache */
+	ccr = ctrl_inl(CCR);
+	ccr |= CCR_CACHE_ICI;
+	ctrl_outl(ccr, CCR);
+
+	/*
+	 * back_to_cached() will take care of the barrier for us, don't add
+	 * another one!
+	 */
+
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+static inline void flush_dcache_all(void)
+{
+	(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
+	wmb();
+}
+
+static void sh4_flush_cache_all(void *unused)
+{
+	flush_dcache_all();
+	flush_icache_all();
+}
+
+static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
+			     unsigned long end)
+{
+	unsigned long d = 0, p = start & PAGE_MASK;
+	unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
+	unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
+	unsigned long select_bit;
+	unsigned long all_aliases_mask;
+	unsigned long addr_offset;
+	pgd_t *dir;
+	pmd_t *pmd;
+	pud_t *pud;
+	pte_t *pte;
+	int i;
+
+	dir = pgd_offset(mm, p);
+	pud = pud_offset(dir, p);
+	pmd = pmd_offset(pud, p);
+	end = PAGE_ALIGN(end);
+
+	all_aliases_mask = (1 << n_aliases) - 1;
+
+	do {
+		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
+			p &= PMD_MASK;
+			p += PMD_SIZE;
+			pmd++;
+
+			continue;
+		}
+
+		pte = pte_offset_kernel(pmd, p);
+
+		do {
+			unsigned long phys;
+			pte_t entry = *pte;
+
+			if (!(pte_val(entry) & _PAGE_PRESENT)) {
+				pte++;
+				p += PAGE_SIZE;
+				continue;
+			}
+
+			phys = pte_val(entry) & PTE_PHYS_MASK;
+
+			if ((p ^ phys) & alias_mask) {
+				d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
+				d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
+
+				if (d == all_aliases_mask)
+					goto loop_exit;
+			}
+
+			pte++;
+			p += PAGE_SIZE;
+		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
+		pmd++;
+	} while (p < end);
+
+loop_exit:
+	addr_offset = 0;
+	select_bit = 1;
+
+	for (i = 0; i < n_aliases; i++) {
+		if (d & select_bit) {
+			(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
+			wmb();
+		}
+
+		select_bit <<= 1;
+		addr_offset += PAGE_SIZE;
+	}
+}
+
+/*
+ * Note : (RPC) since the caches are physically tagged, the only point
+ * of flush_cache_mm for SH-4 is to get rid of aliases from the
+ * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
+ * lines can stay resident so long as the virtual address they were
+ * accessed with (hence cache set) is in accord with the physical
+ * address (i.e. tag).  It's no different here.  So I reckon we don't
+ * need to flush the I-cache, since aliases don't matter for that.  We
+ * should try that.
+ *
+ * Caller takes mm->mmap_sem.
+ */
+static void sh4_flush_cache_mm(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
+		return;
+
+	/*
+	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
+	 * the cache is physically tagged, the data can just be left in there.
+	 */
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
+	/*
+	 * Don't bother groveling around the dcache for the VMA ranges
+	 * if there are too many PTEs to make it worthwhile.
+	 */
+	if (mm->nr_ptes >= MAX_DCACHE_PAGES)
+		flush_dcache_all();
+	else {
+		struct vm_area_struct *vma;
+
+		/*
+		 * In this case there are reasonably sized ranges to flush,
+		 * iterate through the VMA list and take care of any aliases.
+		 */
+		for (vma = mm->mmap; vma; vma = vma->vm_next)
+			__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
+	}
+
+	/* Only touch the icache if one of the VMAs has VM_EXEC set. */
+	if (mm->exec_vm)
+		flush_icache_all();
+}
+
+/*
+ * Write back and invalidate I/D-caches for the page.
+ *
+ * ADDR: Virtual Address (U0 address)
+ * PFN: Physical page number
+ */
+static void sh4_flush_cache_page(void *args)
+{
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long address, pfn, kaddr;
+	unsigned int alias_mask;
+
+	vma = data->vma;
+	address = data->addr1;
+	pfn = data->addr2;
+	kaddr = (unsigned long)pfn_to_kaddr(pfn);
+
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
+		return;
+
+	alias_mask = boot_cpu_data.dcache.alias_mask;
+
+	/* We only need to flush D-cache when we have alias */
+	if ((address^kaddr) & alias_mask) {
+		/* Loop 4K of the D-cache */
+		flush_cache_one(
+			CACHE_OC_ADDRESS_ARRAY | (address & alias_mask),
+			kaddr);
+		/* Loop another 4K of the D-cache */
+		flush_cache_one(
+			CACHE_OC_ADDRESS_ARRAY | (kaddr & alias_mask),
+			kaddr);
+	}
+
+	alias_mask = boot_cpu_data.icache.alias_mask;
+	if (vma->vm_flags & VM_EXEC) {
+		/*
+		 * Evict entries from the portion of the cache from which code
+		 * may have been executed at this address (virtual).  There's
+		 * no need to evict from the portion corresponding to the
+		 * physical address as for the D-cache, because we know the
+		 * kernel has never executed the code through its identity
+		 * translation.
+		 */
+		flush_cache_one(
+			CACHE_IC_ADDRESS_ARRAY | (address & alias_mask),
+			kaddr);
+	}
+}
+
+/*
+ * Write back and invalidate D-caches.
+ *
+ * START, END: Virtual Address (U0 address)
+ *
+ * NOTE: We need to flush the _physical_ page entry.
+ * Flushing the cache lines for U0 only isn't enough.
+ * We need to flush for P1 too, which may contain aliases.
+ */
+static void sh4_flush_cache_range(void *args)
+{
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
+
+	vma = data->vma;
+	start = data->addr1;
+	end = data->addr2;
+
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
+		return;
+
+	/*
+	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
+	 * the cache is physically tagged, the data can just be left in there.
+	 */
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
+	/*
+	 * Don't bother with the lookup and alias check if we have a
+	 * wide range to cover, just blow away the dcache in its
+	 * entirety instead. -- PFM.
+	 */
+	if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
+		flush_dcache_all();
+	else
+		__flush_cache_mm(vma->vm_mm, start, end);
+
+	if (vma->vm_flags & VM_EXEC) {
+		/*
+		 * TODO: Is this required???  Need to look at how I-cache
+		 * coherency is assured when new programs are loaded to see if
+		 * this matters.
+		 */
+		flush_icache_all();
+	}
+}
+
+/**
+ * __flush_cache_one
+ *
+ * @addr:  address in memory mapped cache array
+ * @phys:  address to flush (has to match tags if addr has 'A' bit
+ *         set i.e. associative write)
+ *
+ * The offset into the cache array implied by 'addr' selects the
+ * 'colour' of the virtual address range that will be flushed.  The
+ * operation (purge/write-back) is selected by the lower 2 bits of
+ * 'phys'.
+ */
+static void __uses_jump_to_uncached __flush_cache_one(unsigned long addr,
+		unsigned long phys, int way_count, unsigned long way_incr)
+{
+	unsigned long base_addr = addr;
+	unsigned long a, ea, p;
+
+	/*
+	 * We know there will be >=1 iteration, so write as do-while to avoid
+	 * pointless nead-of-loop check for 0 iterations.
+	 */
+	do {
+		ea = base_addr + PAGE_SIZE;
+		a = base_addr;
+		p = phys;
+
+		do {
+			*(volatile unsigned long *)a = p;
+			/*
+			 * Next line: intentionally not p+32, saves an add, p
+			 * will do since only the cache tag bits need to
+			 * match.
+			 */
+			*(volatile unsigned long *)(a+32) = p;
+			a += 64;
+			p += 64;
+		} while (a < ea);
+
+		base_addr += way_incr;
+	} while (--way_count != 0);
+}
+
+/*
+ * Break the 1, 2 and 4 way variants of this out into separate functions to
+ * avoid nearly all the overhead of having the conditional stuff in the function
+ * bodies (+ the 1 and 2 way cases avoid saving any registers too).
+ *
+ * We want to eliminate unnecessary bus transactions, so this code uses
+ * a non-obvious technique.
+ *
+ * Loop over a cache way sized block of, one cache line at a time. For each
+ * line, use movca.a to cause the current cache line contents to be written
+ * back, but without reading anything from main memory. However this has the
+ * side effect that the cache is now caching that memory location. So follow
+ * this with a cache invalidate to mark the cache line invalid. And do all
+ * this with interrupts disabled, to avoid the cache line being accidently
+ * evicted while it is holding garbage.
+ *
+ * This also breaks in a number of circumstances:
+ * - if there are modifications to the region of memory just above
+ *   empty_zero_page (for example because a breakpoint has been placed
+ *   there), then these can be lost.
+ *
+ *   This is because the the memory address which the cache temporarily
+ *   caches in the above description is empty_zero_page. So the
+ *   movca.l hits the cache (it is assumed that it misses, or at least
+ *   isn't dirty), modifies the line and then invalidates it, losing the
+ *   required change.
+ *
+ * - If caches are disabled or configured in write-through mode, then
+ *   the movca.l writes garbage directly into memory.
+ */
+static void __flush_dcache_segment_writethrough(unsigned long start,
+					        unsigned long extent_per_way)
+{
+	unsigned long addr;
+	int i;
+
+	addr = CACHE_OC_ADDRESS_ARRAY | (start & cpu_data->dcache.entry_mask);
+
+	while (extent_per_way) {
+		for (i = 0; i < cpu_data->dcache.ways; i++)
+			__raw_writel(0, addr + cpu_data->dcache.way_incr * i);
+
+		addr += cpu_data->dcache.linesz;
+		extent_per_way -= cpu_data->dcache.linesz;
+	}
+}
+
+static void __flush_dcache_segment_1way(unsigned long start,
+					unsigned long extent_per_way)
+{
+	unsigned long orig_sr, sr_with_bl;
+	unsigned long base_addr;
+	unsigned long way_incr, linesz, way_size;
+	struct cache_info *dcache;
+	register unsigned long a0, a0e;
+
+	asm volatile("stc sr, %0" : "=r" (orig_sr));
+	sr_with_bl = orig_sr | (1<<28);
+	base_addr = ((unsigned long)&empty_zero_page[0]);
+
+	/*
+	 * The previous code aligned base_addr to 16k, i.e. the way_size of all
+	 * existing SH-4 D-caches.  Whilst I don't see a need to have this
+	 * aligned to any better than the cache line size (which it will be
+	 * anyway by construction), let's align it to at least the way_size of
+	 * any existing or conceivable SH-4 D-cache.  -- RPC
+	 */
+	base_addr = ((base_addr >> 16) << 16);
+	base_addr |= start;
+
+	dcache = &boot_cpu_data.dcache;
+	linesz = dcache->linesz;
+	way_incr = dcache->way_incr;
+	way_size = dcache->way_size;
+
+	a0 = base_addr;
+	a0e = base_addr + extent_per_way;
+	do {
+		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
+		asm volatile("movca.l r0, @%0\n\t"
+			     "ocbi @%0" : : "r" (a0));
+		a0 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "ocbi @%0" : : "r" (a0));
+		a0 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "ocbi @%0" : : "r" (a0));
+		a0 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "ocbi @%0" : : "r" (a0));
+		asm volatile("ldc %0, sr" : : "r" (orig_sr));
+		a0 += linesz;
+	} while (a0 < a0e);
+}
+
+static void __flush_dcache_segment_2way(unsigned long start,
+					unsigned long extent_per_way)
+{
+	unsigned long orig_sr, sr_with_bl;
+	unsigned long base_addr;
+	unsigned long way_incr, linesz, way_size;
+	struct cache_info *dcache;
+	register unsigned long a0, a1, a0e;
+
+	asm volatile("stc sr, %0" : "=r" (orig_sr));
+	sr_with_bl = orig_sr | (1<<28);
+	base_addr = ((unsigned long)&empty_zero_page[0]);
+
+	/* See comment under 1-way above */
+	base_addr = ((base_addr >> 16) << 16);
+	base_addr |= start;
+
+	dcache = &boot_cpu_data.dcache;
+	linesz = dcache->linesz;
+	way_incr = dcache->way_incr;
+	way_size = dcache->way_size;
+
+	a0 = base_addr;
+	a1 = a0 + way_incr;
+	a0e = base_addr + extent_per_way;
+	do {
+		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
+		asm volatile("movca.l r0, @%0\n\t"
+			     "movca.l r0, @%1\n\t"
+			     "ocbi @%0\n\t"
+			     "ocbi @%1" : :
+			     "r" (a0), "r" (a1));
+		a0 += linesz;
+		a1 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "movca.l r0, @%1\n\t"
+			     "ocbi @%0\n\t"
+			     "ocbi @%1" : :
+			     "r" (a0), "r" (a1));
+		a0 += linesz;
+		a1 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "movca.l r0, @%1\n\t"
+			     "ocbi @%0\n\t"
+			     "ocbi @%1" : :
+			     "r" (a0), "r" (a1));
+		a0 += linesz;
+		a1 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "movca.l r0, @%1\n\t"
+			     "ocbi @%0\n\t"
+			     "ocbi @%1" : :
+			     "r" (a0), "r" (a1));
+		asm volatile("ldc %0, sr" : : "r" (orig_sr));
+		a0 += linesz;
+		a1 += linesz;
+	} while (a0 < a0e);
+}
+
+static void __flush_dcache_segment_4way(unsigned long start,
+					unsigned long extent_per_way)
+{
+	unsigned long orig_sr, sr_with_bl;
+	unsigned long base_addr;
+	unsigned long way_incr, linesz, way_size;
+	struct cache_info *dcache;
+	register unsigned long a0, a1, a2, a3, a0e;
+
+	asm volatile("stc sr, %0" : "=r" (orig_sr));
+	sr_with_bl = orig_sr | (1<<28);
+	base_addr = ((unsigned long)&empty_zero_page[0]);
+
+	/* See comment under 1-way above */
+	base_addr = ((base_addr >> 16) << 16);
+	base_addr |= start;
+
+	dcache = &boot_cpu_data.dcache;
+	linesz = dcache->linesz;
+	way_incr = dcache->way_incr;
+	way_size = dcache->way_size;
+
+	a0 = base_addr;
+	a1 = a0 + way_incr;
+	a2 = a1 + way_incr;
+	a3 = a2 + way_incr;
+	a0e = base_addr + extent_per_way;
+	do {
+		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
+		asm volatile("movca.l r0, @%0\n\t"
+			     "movca.l r0, @%1\n\t"
+			     "movca.l r0, @%2\n\t"
+			     "movca.l r0, @%3\n\t"
+			     "ocbi @%0\n\t"
+			     "ocbi @%1\n\t"
+			     "ocbi @%2\n\t"
+			     "ocbi @%3\n\t" : :
+			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
+		a0 += linesz;
+		a1 += linesz;
+		a2 += linesz;
+		a3 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "movca.l r0, @%1\n\t"
+			     "movca.l r0, @%2\n\t"
+			     "movca.l r0, @%3\n\t"
+			     "ocbi @%0\n\t"
+			     "ocbi @%1\n\t"
+			     "ocbi @%2\n\t"
+			     "ocbi @%3\n\t" : :
+			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
+		a0 += linesz;
+		a1 += linesz;
+		a2 += linesz;
+		a3 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "movca.l r0, @%1\n\t"
+			     "movca.l r0, @%2\n\t"
+			     "movca.l r0, @%3\n\t"
+			     "ocbi @%0\n\t"
+			     "ocbi @%1\n\t"
+			     "ocbi @%2\n\t"
+			     "ocbi @%3\n\t" : :
+			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
+		a0 += linesz;
+		a1 += linesz;
+		a2 += linesz;
+		a3 += linesz;
+		asm volatile("movca.l r0, @%0\n\t"
+			     "movca.l r0, @%1\n\t"
+			     "movca.l r0, @%2\n\t"
+			     "movca.l r0, @%3\n\t"
+			     "ocbi @%0\n\t"
+			     "ocbi @%1\n\t"
+			     "ocbi @%2\n\t"
+			     "ocbi @%3\n\t" : :
+			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
+		asm volatile("ldc %0, sr" : : "r" (orig_sr));
+		a0 += linesz;
+		a1 += linesz;
+		a2 += linesz;
+		a3 += linesz;
+	} while (a0 < a0e);
+}
+
+extern void __weak sh4__flush_region_init(void);
+
+/*
+ * SH-4 has virtually indexed and physically tagged cache.
+ */
+void __init sh4_cache_init(void)
+{
+	unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);
+
+	printk("PVR=%08x CVR=%08x PRR=%08x\n",
+		ctrl_inl(CCN_PVR),
+		ctrl_inl(CCN_CVR),
+		ctrl_inl(CCN_PRR));
+
+	if (wt_enabled)
+		__flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
+	else {
+		switch (boot_cpu_data.dcache.ways) {
+		case 1:
+			__flush_dcache_segment_fn = __flush_dcache_segment_1way;
+			break;
+		case 2:
+			__flush_dcache_segment_fn = __flush_dcache_segment_2way;
+			break;
+		case 4:
+			__flush_dcache_segment_fn = __flush_dcache_segment_4way;
+			break;
+		default:
+			panic("unknown number of cache ways\n");
+			break;
+		}
+	}
+
+	/*
+	 * Pre-calculate the address of the uncached version of
+	 * __flush_cache_4096 so we can call it directly.
+	 */
+	__flush_cache_one_uncached =
+		&__flush_cache_one + cached_to_uncached;
+
+	local_flush_icache_range	= sh4_flush_icache_range;
+	local_flush_dcache_page		= sh4_flush_dcache_page;
+	local_flush_cache_all		= sh4_flush_cache_all;
+	local_flush_cache_mm		= sh4_flush_cache_mm;
+	local_flush_cache_dup_mm	= sh4_flush_cache_mm;
+	local_flush_cache_page		= sh4_flush_cache_page;
+	local_flush_cache_range		= sh4_flush_cache_range;
+
+	sh4__flush_region_init();
+}
--- a/kernel/arch/sh/mm/cache-sh5.c
+++ b/kernel/arch/sh/mm/cache-sh5.c
@@ -0,0 +1,621 @@
+/*
+ * arch/sh/mm/cache-sh5.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2002  Benedict Gaster
+ * Copyright (C) 2003  Richard Curnow
+ * Copyright (C) 2003 - 2008  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+
+extern void __weak sh4__flush_region_init(void);
+
+/* Wired TLB entry for the D-cache */
+static unsigned long long dtlb_cache_slot;
+
+/*
+ * The following group of functions deal with mapping and unmapping a
+ * temporary page into a DTLB slot that has been set aside for exclusive
+ * use.
+ */
+static inline void
+sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid,
+			   unsigned long paddr)
+{
+	local_irq_disable();
+	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
+}
+
+static inline void sh64_teardown_dtlb_cache_slot(void)
+{
+	sh64_teardown_tlb_slot(dtlb_cache_slot);
+	local_irq_enable();
+}
+
+static inline void sh64_icache_inv_all(void)
+{
+	unsigned long long addr, flag, data;
+	unsigned long flags;
+
+	addr = ICCR0;
+	flag = ICCR0_ICI;
+	data = 0;
+
+	/* Make this a critical section for safety (probably not strictly necessary.) */
+	local_irq_save(flags);
+
+	/* Without %1 it gets unexplicably wrong */
+	__asm__ __volatile__ (
+		"getcfg	%3, 0, %0\n\t"
+		"or	%0, %2, %0\n\t"
+		"putcfg	%3, 0, %0\n\t"
+		"synci"
+		: "=&r" (data)
+		: "0" (data), "r" (flag), "r" (addr));
+
+	local_irq_restore(flags);
+}
+
+static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
+{
+	/* Invalidate range of addresses [start,end] from the I-cache, where
+	 * the addresses lie in the kernel superpage. */
+
+	unsigned long long ullend, addr, aligned_start;
+	aligned_start = (unsigned long long)(signed long long)(signed long) start;
+	addr = L1_CACHE_ALIGN(aligned_start);
+	ullend = (unsigned long long) (signed long long) (signed long) end;
+
+	while (addr <= ullend) {
+		__asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
+		addr += L1_CACHE_BYTES;
+	}
+}
+
+static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
+{
+	/* If we get called, we know that vma->vm_flags contains VM_EXEC.
+	   Also, eaddr is page-aligned. */
+	unsigned int cpu = smp_processor_id();
+	unsigned long long addr, end_addr;
+	unsigned long flags = 0;
+	unsigned long running_asid, vma_asid;
+	addr = eaddr;
+	end_addr = addr + PAGE_SIZE;
+
+	/* Check whether we can use the current ASID for the I-cache
+	   invalidation.  For example, if we're called via
+	   access_process_vm->flush_cache_page->here, (e.g. when reading from
+	   /proc), 'running_asid' will be that of the reader, not of the
+	   victim.
+
+	   Also, note the risk that we might get pre-empted between the ASID
+	   compare and blocking IRQs, and before we regain control, the
+	   pid->ASID mapping changes.  However, the whole cache will get
+	   invalidated when the mapping is renewed, so the worst that can
+	   happen is that the loop below ends up invalidating somebody else's
+	   cache entries.
+	*/
+
+	running_asid = get_asid();
+	vma_asid = cpu_asid(cpu, vma->vm_mm);
+	if (running_asid != vma_asid) {
+		local_irq_save(flags);
+		switch_and_save_asid(vma_asid);
+	}
+	while (addr < end_addr) {
+		/* Worth unrolling a little */
+		__asm__ __volatile__("icbi %0,  0" : : "r" (addr));
+		__asm__ __volatile__("icbi %0, 32" : : "r" (addr));
+		__asm__ __volatile__("icbi %0, 64" : : "r" (addr));
+		__asm__ __volatile__("icbi %0, 96" : : "r" (addr));
+		addr += 128;
+	}
+	if (running_asid != vma_asid) {
+		switch_and_save_asid(running_asid);
+		local_irq_restore(flags);
+	}
+}
+
+static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
+			  unsigned long start, unsigned long end)
+{
+	/* Used for invalidating big chunks of I-cache, i.e. assume the range
+	   is whole pages.  If 'start' or 'end' is not page aligned, the code
+	   is conservative and invalidates to the ends of the enclosing pages.
+	   This is functionally OK, just a performance loss. */
+
+	/* See the comments below in sh64_dcache_purge_user_range() regarding
+	   the choice of algorithm.  However, for the I-cache option (2) isn't
+	   available because there are no physical tags so aliases can't be
+	   resolved.  The icbi instruction has to be used through the user
+	   mapping.   Because icbi is cheaper than ocbp on a cache hit, it
+	   would be cheaper to use the selective code for a large range than is
+	   possible with the D-cache.  Just assume 64 for now as a working
+	   figure.
+	   */
+	int n_pages;
+
+	if (!mm)
+		return;
+
+	n_pages = ((end - start) >> PAGE_SHIFT);
+	if (n_pages >= 64) {
+		sh64_icache_inv_all();
+	} else {
+		unsigned long aligned_start;
+		unsigned long eaddr;
+		unsigned long after_last_page_start;
+		unsigned long mm_asid, current_asid;
+		unsigned long flags = 0;
+
+		mm_asid = cpu_asid(smp_processor_id(), mm);
+		current_asid = get_asid();
+
+		if (mm_asid != current_asid) {
+			/* Switch ASID and run the invalidate loop under cli */
+			local_irq_save(flags);
+			switch_and_save_asid(mm_asid);
+		}
+
+		aligned_start = start & PAGE_MASK;
+		after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
+
+		while (aligned_start < after_last_page_start) {
+			struct vm_area_struct *vma;
+			unsigned long vma_end;
+			vma = find_vma(mm, aligned_start);
+			if (!vma || (aligned_start <= vma->vm_end)) {
+				/* Avoid getting stuck in an error condition */
+				aligned_start += PAGE_SIZE;
+				continue;
+			}
+			vma_end = vma->vm_end;
+			if (vma->vm_flags & VM_EXEC) {
+				/* Executable */
+				eaddr = aligned_start;
+				while (eaddr < vma_end) {
+					sh64_icache_inv_user_page(vma, eaddr);
+					eaddr += PAGE_SIZE;
+				}
+			}
+			aligned_start = vma->vm_end; /* Skip to start of next region */
+		}
+
+		if (mm_asid != current_asid) {
+			switch_and_save_asid(current_asid);
+			local_irq_restore(flags);
+		}
+	}
+}
+
+static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
+{
+	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
+	   cache hit on the virtual tag the instruction ends there, without a
+	   TLB lookup. */
+
+	unsigned long long aligned_start;
+	unsigned long long ull_end;
+	unsigned long long addr;
+
+	ull_end = end;
+
+	/* Just invalidate over the range using the natural addresses.  TLB
+	   miss handling will be OK (TBC).  Since it's for the current process,
+	   either we're already in the right ASID context, or the ASIDs have
+	   been recycled since we were last active in which case we might just
+	   invalidate another processes I-cache entries : no worries, just a
+	   performance drop for him. */
+	aligned_start = L1_CACHE_ALIGN(start);
+	addr = aligned_start;
+	while (addr < ull_end) {
+		__asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
+		__asm__ __volatile__ ("nop");
+		__asm__ __volatile__ ("nop");
+		addr += L1_CACHE_BYTES;
+	}
+}
+
+/* Buffer used as the target of alloco instructions to purge data from cache
+   sets by natural eviction. -- RPC */
+#define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
+static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
+
+static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
+{
+	/* Purge all ways in a particular block of sets, specified by the base
+	   set number and number of sets.  Can handle wrap-around, if that's
+	   needed.  */
+
+	int dummy_buffer_base_set;
+	unsigned long long eaddr, eaddr0, eaddr1;
+	int j;
+	int set_offset;
+
+	dummy_buffer_base_set = ((int)&dummy_alloco_area &
+				 cpu_data->dcache.entry_mask) >>
+				 cpu_data->dcache.entry_shift;
+	set_offset = sets_to_purge_base - dummy_buffer_base_set;
+
+	for (j = 0; j < n_sets; j++, set_offset++) {
+		set_offset &= (cpu_data->dcache.sets - 1);
+		eaddr0 = (unsigned long long)dummy_alloco_area +
+			(set_offset << cpu_data->dcache.entry_shift);
+
+		/*
+		 * Do one alloco which hits the required set per cache
+		 * way.  For write-back mode, this will purge the #ways
+		 * resident lines.  There's little point unrolling this
+		 * loop because the allocos stall more if they're too
+		 * close together.
+		 */
+		eaddr1 = eaddr0 + cpu_data->dcache.way_size *
+				  cpu_data->dcache.ways;
+
+		for (eaddr = eaddr0; eaddr < eaddr1;
+		     eaddr += cpu_data->dcache.way_size) {
+			__asm__ __volatile__ ("alloco %0, 0" : : "r" (eaddr));
+			__asm__ __volatile__ ("synco"); /* TAKum03020 */
+		}
+
+		eaddr1 = eaddr0 + cpu_data->dcache.way_size *
+				  cpu_data->dcache.ways;
+
+		for (eaddr = eaddr0; eaddr < eaddr1;
+		     eaddr += cpu_data->dcache.way_size) {
+			/*
+			 * Load from each address.  Required because
+			 * alloco is a NOP if the cache is write-through.
+			 */
+			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
+				__raw_readb((unsigned long)eaddr);
+		}
+	}
+
+	/*
+	 * Don't use OCBI to invalidate the lines.  That costs cycles
+	 * directly.  If the dummy block is just left resident, it will
+	 * naturally get evicted as required.
+	 */
+}
+
+/*
+ * Purge the entire contents of the dcache.  The most efficient way to
+ * achieve this is to use alloco instructions on a region of unused
+ * memory equal in size to the cache, thereby causing the current
+ * contents to be discarded by natural eviction.  The alternative, namely
+ * reading every tag, setting up a mapping for the corresponding page and
+ * doing an OCBP for the line, would be much more expensive.
+ */
+static void sh64_dcache_purge_all(void)
+{
+
+	sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
+}
+
+
+/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
+   anything else in the kernel */
+#define MAGIC_PAGE0_START 0xffffffffec000000ULL
+
+/* Purge the physical page 'paddr' from the cache.  It's known that any
+ * cache lines requiring attention have the same page colour as the the
+ * address 'eaddr'.
+ *
+ * This relies on the fact that the D-cache matches on physical tags when
+ * no virtual tag matches.  So we create an alias for the original page
+ * and purge through that.  (Alternatively, we could have done this by
+ * switching ASID to match the original mapping and purged through that,
+ * but that involves ASID switching cost + probably a TLBMISS + refill
+ * anyway.)
+ */
+static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr,
+					        unsigned long eaddr)
+{
+	unsigned long long magic_page_start;
+	unsigned long long magic_eaddr, magic_eaddr_end;
+
+	magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
+
+	/* As long as the kernel is not pre-emptible, this doesn't need to be
+	   under cli/sti. */
+	sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
+
+	magic_eaddr = magic_page_start;
+	magic_eaddr_end = magic_eaddr + PAGE_SIZE;
+
+	while (magic_eaddr < magic_eaddr_end) {
+		/* Little point in unrolling this loop - the OCBPs are blocking
+		   and won't go any quicker (i.e. the loop overhead is parallel
+		   to part of the OCBP execution.) */
+		__asm__ __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
+		magic_eaddr += L1_CACHE_BYTES;
+	}
+
+	sh64_teardown_dtlb_cache_slot();
+}
+
+/*
+ * Purge a page given its physical start address, by creating a temporary
+ * 1 page mapping and purging across that.  Even if we know the virtual
+ * address (& vma or mm) of the page, the method here is more elegant
+ * because it avoids issues of coping with page faults on the purge
+ * instructions (i.e. no special-case code required in the critical path
+ * in the TLB miss handling).
+ */
+static void sh64_dcache_purge_phy_page(unsigned long paddr)
+{
+	unsigned long long eaddr_start, eaddr, eaddr_end;
+	int i;
+
+	/* As long as the kernel is not pre-emptible, this doesn't need to be
+	   under cli/sti. */
+	eaddr_start = MAGIC_PAGE0_START;
+	for (i = 0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
+		sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
+
+		eaddr = eaddr_start;
+		eaddr_end = eaddr + PAGE_SIZE;
+		while (eaddr < eaddr_end) {
+			__asm__ __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
+			eaddr += L1_CACHE_BYTES;
+		}
+
+		sh64_teardown_dtlb_cache_slot();
+		eaddr_start += PAGE_SIZE;
+	}
+}
+
+static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
+				unsigned long addr, unsigned long end)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+	spinlock_t *ptl;
+	unsigned long paddr;
+
+	if (!mm)
+		return; /* No way to find physical address of page */
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_bad(*pgd))
+		return;
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud) || pud_bad(*pud))
+		return;
+
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		return;
+
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	do {
+		entry = *pte;
+		if (pte_none(entry) || !pte_present(entry))
+			continue;
+		paddr = pte_val(entry) & PAGE_MASK;
+		sh64_dcache_purge_coloured_phy_page(paddr, addr);
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(pte - 1, ptl);
+}
+
+/*
+ * There are at least 5 choices for the implementation of this, with
+ * pros (+), cons(-), comments(*):
+ *
+ * 1. ocbp each line in the range through the original user's ASID
+ *    + no lines spuriously evicted
+ *    - tlbmiss handling (must either handle faults on demand => extra
+ *	special-case code in tlbmiss critical path), or map the page in
+ *	advance (=> flush_tlb_range in advance to avoid multiple hits)
+ *    - ASID switching
+ *    - expensive for large ranges
+ *
+ * 2. temporarily map each page in the range to a special effective
+ *    address and ocbp through the temporary mapping; relies on the
+ *    fact that SH-5 OCB* always do TLB lookup and match on ptags (they
+ *    never look at the etags)
+ *    + no spurious evictions
+ *    - expensive for large ranges
+ *    * surely cheaper than (1)
+ *
+ * 3. walk all the lines in the cache, check the tags, if a match
+ *    occurs create a page mapping to ocbp the line through
+ *    + no spurious evictions
+ *    - tag inspection overhead
+ *    - (especially for small ranges)
+ *    - potential cost of setting up/tearing down page mapping for
+ *	every line that matches the range
+ *    * cost partly independent of range size
+ *
+ * 4. walk all the lines in the cache, check the tags, if a match
+ *    occurs use 4 * alloco to purge the line (+3 other probably
+ *    innocent victims) by natural eviction
+ *    + no tlb mapping overheads
+ *    - spurious evictions
+ *    - tag inspection overhead
+ *
+ * 5. implement like flush_cache_all
+ *    + no tag inspection overhead
+ *    - spurious evictions
+ *    - bad for small ranges
+ *
+ * (1) can be ruled out as more expensive than (2).  (2) appears best
+ * for small ranges.  The choice between (3), (4) and (5) for large
+ * ranges and the range size for the large/small boundary need
+ * benchmarking to determine.
+ *
+ * For now use approach (2) for small ranges and (5) for large ones.
+ */
+static void sh64_dcache_purge_user_range(struct mm_struct *mm,
+			  unsigned long start, unsigned long end)
+{
+	int n_pages = ((end - start) >> PAGE_SHIFT);
+
+	if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
+		sh64_dcache_purge_all();
+	} else {
+		/* Small range, covered by a single page table page */
+		start &= PAGE_MASK;	/* should already be so */
+		end = PAGE_ALIGN(end);	/* should already be so */
+		sh64_dcache_purge_user_pages(mm, start, end);
+	}
+}
+
+/*
+ * Invalidate the entire contents of both caches, after writing back to
+ * memory any dirty data from the D-cache.
+ */
+static void sh5_flush_cache_all(void *unused)
+{
+	sh64_dcache_purge_all();
+	sh64_icache_inv_all();
+}
+
+/*
+ * Invalidate an entire user-address space from both caches, after
+ * writing back dirty data (e.g. for shared mmap etc).
+ *
+ * This could be coded selectively by inspecting all the tags then
+ * doing 4*alloco on any set containing a match (as for
+ * flush_cache_range), but fork/exit/execve (where this is called from)
+ * are expensive anyway.
+ *
+ * Have to do a purge here, despite the comments re I-cache below.
+ * There could be odd-coloured dirty data associated with the mm still
+ * in the cache - if this gets written out through natural eviction
+ * after the kernel has reused the page there will be chaos.
+ *
+ * The mm being torn down won't ever be active again, so any Icache
+ * lines tagged with its ASID won't be visible for the rest of the
+ * lifetime of this ASID cycle.  Before the ASID gets reused, there
+ * will be a flush_cache_all.  Hence we don't need to touch the
+ * I-cache.  This is similar to the lack of action needed in
+ * flush_tlb_mm - see fault.c.
+ */
+static void sh5_flush_cache_mm(void *unused)
+{
+	sh64_dcache_purge_all();
+}
+
+/*
+ * Invalidate (from both caches) the range [start,end) of virtual
+ * addresses from the user address space specified by mm, after writing
+ * back any dirty data.
+ *
+ * Note, 'end' is 1 byte beyond the end of the range to flush.
+ */
+static void sh5_flush_cache_range(void *args)
+{
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
+
+	vma = data->vma;
+	start = data->addr1;
+	end = data->addr2;
+
+	sh64_dcache_purge_user_range(vma->vm_mm, start, end);
+	sh64_icache_inv_user_page_range(vma->vm_mm, start, end);
+}
+
+/*
+ * Invalidate any entries in either cache for the vma within the user
+ * address space vma->vm_mm for the page starting at virtual address
+ * 'eaddr'.   This seems to be used primarily in breaking COW.  Note,
+ * the I-cache must be searched too in case the page in question is
+ * both writable and being executed from (e.g. stack trampolines.)
+ *
+ * Note, this is called with pte lock held.
+ */
+static void sh5_flush_cache_page(void *args)
+{
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long eaddr, pfn;
+
+	vma = data->vma;
+	eaddr = data->addr1;
+	pfn = data->addr2;
+
+	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
+
+	if (vma->vm_flags & VM_EXEC)
+		sh64_icache_inv_user_page(vma, eaddr);
+}
+
+static void sh5_flush_dcache_page(void *page)
+{
+	sh64_dcache_purge_phy_page(page_to_phys(page));
+	wmb();
+}
+
+/*
+ * Flush the range [start,end] of kernel virtual adddress space from
+ * the I-cache.  The corresponding range must be purged from the
+ * D-cache also because the SH-5 doesn't have cache snooping between
+ * the caches.  The addresses will be visible through the superpage
+ * mapping, therefore it's guaranteed that there no cache entries for
+ * the range in cache sets of the wrong colour.
+ */
+static void sh5_flush_icache_range(void *args)
+{
+	struct flusher_data *data = args;
+	unsigned long start, end;
+
+	start = data->addr1;
+	end = data->addr2;
+
+	__flush_purge_region((void *)start, end);
+	wmb();
+	sh64_icache_inv_kernel_range(start, end);
+}
+
+/*
+ * For the address range [start,end), write back the data from the
+ * D-cache and invalidate the corresponding region of the I-cache for the
+ * current process.  Used to flush signal trampolines on the stack to
+ * make them executable.
+ */
+static void sh5_flush_cache_sigtramp(void *vaddr)
+{
+	unsigned long end = (unsigned long)vaddr + L1_CACHE_BYTES;
+
+	__flush_wback_region(vaddr, L1_CACHE_BYTES);
+	wmb();
+	sh64_icache_inv_current_user_range((unsigned long)vaddr, end);
+}
+
+void __init sh5_cache_init(void)
+{
+	local_flush_cache_all		= sh5_flush_cache_all;
+	local_flush_cache_mm		= sh5_flush_cache_mm;
+	local_flush_cache_dup_mm	= sh5_flush_cache_mm;
+	local_flush_cache_page		= sh5_flush_cache_page;
+	local_flush_cache_range		= sh5_flush_cache_range;
+	local_flush_dcache_page		= sh5_flush_dcache_page;
+	local_flush_icache_range	= sh5_flush_icache_range;
+	local_flush_cache_sigtramp	= sh5_flush_cache_sigtramp;
+
+	/* Reserve a slot for dcache colouring in the DTLB */
+	dtlb_cache_slot	= sh64_get_wired_dtlb_entry();
+
+	sh4__flush_region_init();
+}
--- a/kernel/arch/sh/mm/cache-sh7705.c
+++ b/kernel/arch/sh/mm/cache-sh7705.c
@@ -0,0 +1,195 @@
+/*
+ * arch/sh/mm/cache-sh7705.c
+ *
+ * Copyright (C) 1999, 2000  Niibe Yutaka
+ * Copyright (C) 2004  Alex Song
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/threads.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+/*
+ * The 32KB cache on the SH7705 suffers from the same synonym problem
+ * as SH4 CPUs
+ */
+static inline void cache_wback_all(void)
+{
+	unsigned long ways, waysize, addrstart;
+
+	ways = current_cpu_data.dcache.ways;
+	waysize = current_cpu_data.dcache.sets;
+	waysize <<= current_cpu_data.dcache.entry_shift;
+
+	addrstart = CACHE_OC_ADDRESS_ARRAY;
+
+	do {
+		unsigned long addr;
+
+		for (addr = addrstart;
+		     addr < addrstart + waysize;
+		     addr += current_cpu_data.dcache.linesz) {
+			unsigned long data;
+			int v = SH_CACHE_UPDATED | SH_CACHE_VALID;
+
+			data = ctrl_inl(addr);
+
+			if ((data & v) == v)
+				ctrl_outl(data & ~v, addr);
+
+		}
+
+		addrstart += current_cpu_data.dcache.way_incr;
+	} while (--ways);
+}
+
+/*
+ * Write back the range of D-cache, and purge the I-cache.
+ *
+ * Called from kernel/module.c:sys_init_module and routine for a.out format.
+ */
+static void sh7705_flush_icache_range(void *args)
+{
+	struct flusher_data *data = args;
+	unsigned long start, end;
+
+	start = data->addr1;
+	end = data->addr2;
+
+	__flush_wback_region((void *)start, end - start);
+}
+
+/*
+ * Writeback&Invalidate the D-cache of the page
+ */
+static void __uses_jump_to_uncached __flush_dcache_page(unsigned long phys)
+{
+	unsigned long ways, waysize, addrstart;
+	unsigned long flags;
+
+	phys |= SH_CACHE_VALID;
+
+	/*
+	 * Here, phys is the physical address of the page. We check all the
+	 * tags in the cache for those with the same page number as this page
+	 * (by masking off the lowest 2 bits of the 19-bit tag; these bits are
+	 * derived from the offset within in the 4k page). Matching valid
+	 * entries are invalidated.
+	 *
+	 * Since 2 bits of the cache index are derived from the virtual page
+	 * number, knowing this would reduce the number of cache entries to be
+	 * searched by a factor of 4. However this function exists to deal with
+	 * potential cache aliasing, therefore the optimisation is probably not
+	 * possible.
+	 */
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	ways = current_cpu_data.dcache.ways;
+	waysize = current_cpu_data.dcache.sets;
+	waysize <<= current_cpu_data.dcache.entry_shift;
+
+	addrstart = CACHE_OC_ADDRESS_ARRAY;
+
+	do {
+		unsigned long addr;
+
+		for (addr = addrstart;
+		     addr < addrstart + waysize;
+		     addr += current_cpu_data.dcache.linesz) {
+			unsigned long data;
+
+			data = ctrl_inl(addr) & (0x1ffffC00 | SH_CACHE_VALID);
+		        if (data == phys) {
+				data &= ~(SH_CACHE_VALID | SH_CACHE_UPDATED);
+				ctrl_outl(data, addr);
+			}
+		}
+
+		addrstart += current_cpu_data.dcache.way_incr;
+	} while (--ways);
+
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+/*
+ * Write back & invalidate the D-cache of the page.
+ * (To avoid "alias" issues)
+ */
+static void sh7705_flush_dcache_page(void *arg)
+{
+	struct page *page = arg;
+	struct address_space *mapping = page_mapping(page);
+
+	if (mapping && !mapping_mapped(mapping))
+		set_bit(PG_dcache_dirty, &page->flags);
+	else
+		__flush_dcache_page(PHYSADDR(page_address(page)));
+}
+
+static void __uses_jump_to_uncached sh7705_flush_cache_all(void *args)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	cache_wback_all();
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+/*
+ * Write back and invalidate I/D-caches for the page.
+ *
+ * ADDRESS: Virtual Address (U0 address)
+ */
+static void sh7705_flush_cache_page(void *args)
+{
+	struct flusher_data *data = args;
+	unsigned long pfn = data->addr2;
+
+	__flush_dcache_page(pfn << PAGE_SHIFT);
+}
+
+/*
+ * This is called when a page-cache page is about to be mapped into a
+ * user process' address space.  It offers an opportunity for a
+ * port to ensure d-cache/i-cache coherency if necessary.
+ *
+ * Not entirely sure why this is necessary on SH3 with 32K cache but
+ * without it we get occasional "Memory fault" when loading a program.
+ */
+static void sh7705_flush_icache_page(void *page)
+{
+	__flush_purge_region(page_address(page), PAGE_SIZE);
+}
+
+void __init sh7705_cache_init(void)
+{
+	local_flush_icache_range	= sh7705_flush_icache_range;
+	local_flush_dcache_page		= sh7705_flush_dcache_page;
+	local_flush_cache_all		= sh7705_flush_cache_all;
+	local_flush_cache_mm		= sh7705_flush_cache_all;
+	local_flush_cache_dup_mm	= sh7705_flush_cache_all;
+	local_flush_cache_range		= sh7705_flush_cache_all;
+	local_flush_cache_page		= sh7705_flush_cache_page;
+	local_flush_icache_page		= sh7705_flush_icache_page;
+}
--- a/kernel/arch/sh/mm/cache.c
+++ b/kernel/arch/sh/mm/cache.c
@@ -0,0 +1,329 @@
+/*
+ * arch/sh/mm/cache.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002 - 2009  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void (*local_flush_cache_all)(void *args) = cache_noop;
+void (*local_flush_cache_mm)(void *args) = cache_noop;
+void (*local_flush_cache_dup_mm)(void *args) = cache_noop;
+void (*local_flush_cache_page)(void *args) = cache_noop;
+void (*local_flush_cache_range)(void *args) = cache_noop;
+void (*local_flush_dcache_page)(void *args) = cache_noop;
+void (*local_flush_icache_range)(void *args) = cache_noop;
+void (*local_flush_icache_page)(void *args) = cache_noop;
+void (*local_flush_cache_sigtramp)(void *args) = cache_noop;
+
+void (*__flush_wback_region)(void *start, int size);
+void (*__flush_purge_region)(void *start, int size);
+void (*__flush_invalidate_region)(void *start, int size);
+
+static inline void noop__flush_region(void *start, int size)
+{
+}
+
+static inline void cacheop_on_each_cpu(void (*func) (void *info), void *info,
+                                   int wait)
+{
+	preempt_disable();
+	smp_call_function(func, info, wait);
+	func(info);
+	preempt_enable();
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+	    !test_bit(PG_dcache_dirty, &page->flags)) {
+		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(vto, src, len);
+		kunmap_coherent(vto);
+	} else {
+		memcpy(dst, src, len);
+		if (boot_cpu_data.dcache.n_aliases)
+			set_bit(PG_dcache_dirty, &page->flags);
+	}
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_cache_page(vma, vaddr, page_to_pfn(page));
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+			 unsigned long vaddr, void *dst, const void *src,
+			 unsigned long len)
+{
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+	    !test_bit(PG_dcache_dirty, &page->flags)) {
+		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(dst, vfrom, len);
+		kunmap_coherent(vfrom);
+	} else {
+		memcpy(dst, src, len);
+		if (boot_cpu_data.dcache.n_aliases)
+			set_bit(PG_dcache_dirty, &page->flags);
+	}
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *vfrom, *vto;
+
+	vto = kmap_atomic(to, KM_USER1);
+
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
+	    !test_bit(PG_dcache_dirty, &from->flags)) {
+		vfrom = kmap_coherent(from, vaddr);
+		copy_page(vto, vfrom);
+		kunmap_coherent(vfrom);
+	} else {
+		vfrom = kmap_atomic(from, KM_USER0);
+		copy_page(vto, vfrom);
+		kunmap_atomic(vfrom, KM_USER0);
+	}
+
+	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK) ||
+	    (vma->vm_flags & VM_EXEC))
+		__flush_purge_region(vto, PAGE_SIZE);
+
+	kunmap_atomic(vto, KM_USER1);
+	/* Make sure this page is cleared on other CPU's too before using it */
+	smp_wmb();
+}
+EXPORT_SYMBOL(copy_user_highpage);
+
+void clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *kaddr = kmap_atomic(page, KM_USER0);
+
+	clear_page(kaddr);
+
+	if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK))
+		__flush_purge_region(kaddr, PAGE_SIZE);
+
+	kunmap_atomic(kaddr, KM_USER0);
+}
+EXPORT_SYMBOL(clear_user_highpage);
+
+void __update_cache(struct vm_area_struct *vma,
+		    unsigned long address, pte_t pte)
+{
+	struct page *page;
+	unsigned long pfn = pte_pfn(pte);
+
+	if (!boot_cpu_data.dcache.n_aliases)
+		return;
+
+	page = pfn_to_page(pfn);
+	if (pfn_valid(pfn)) {
+		int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags);
+		if (dirty) {
+			unsigned long addr = (unsigned long)page_address(page);
+
+			if (pages_do_alias(addr, address & PAGE_MASK))
+				__flush_purge_region((void *)addr, PAGE_SIZE);
+			else if (vma->vm_flags & VM_EXEC)
+				__flush_wback_region((void *)addr, PAGE_SIZE);
+		}
+	}
+}
+
+void __flush_anon_page(struct page *page, unsigned long vmaddr)
+{
+	unsigned long addr = (unsigned long) page_address(page);
+
+	if (pages_do_alias(addr, vmaddr)) {
+		if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+		    !test_bit(PG_dcache_dirty, &page->flags)) {
+			void *kaddr;
+
+			kaddr = kmap_coherent(page, vmaddr);
+			/* XXX.. For now kunmap_coherent() does a purge */
+			/* __flush_purge_region((void *)kaddr, PAGE_SIZE); */
+			kunmap_coherent(kaddr);
+		} else
+			__flush_purge_region((void *)addr, PAGE_SIZE);
+	}
+}
+
+void flush_cache_all(void)
+{
+	cacheop_on_each_cpu(local_flush_cache_all, NULL, 1);
+}
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
+}
+
+void flush_cache_dup_mm(struct mm_struct *mm)
+{
+	cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
+		      unsigned long pfn)
+{
+	struct flusher_data data;
+
+	data.vma = vma;
+	data.addr1 = addr;
+	data.addr2 = pfn;
+
+	cacheop_on_each_cpu(local_flush_cache_page, (void *)&data, 1);
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+	struct flusher_data data;
+
+	data.vma = vma;
+	data.addr1 = start;
+	data.addr2 = end;
+
+	cacheop_on_each_cpu(local_flush_cache_range, (void *)&data, 1);
+}
+
+void flush_dcache_page(struct page *page)
+{
+	cacheop_on_each_cpu(local_flush_dcache_page, page, 1);
+}
+
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	struct flusher_data data;
+
+	data.vma = NULL;
+	data.addr1 = start;
+	data.addr2 = end;
+
+	cacheop_on_each_cpu(local_flush_icache_range, (void *)&data, 1);
+}
+
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	/* Nothing uses the VMA, so just pass the struct page along */
+	cacheop_on_each_cpu(local_flush_icache_page, page, 1);
+}
+
+void flush_cache_sigtramp(unsigned long address)
+{
+	cacheop_on_each_cpu(local_flush_cache_sigtramp, (void *)address, 1);
+}
+
+static void compute_alias(struct cache_info *c)
+{
+	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
+	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
+}
+
+static void __init emit_cache_params(void)
+{
+	printk(KERN_NOTICE "I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+		boot_cpu_data.icache.ways,
+		boot_cpu_data.icache.sets,
+		boot_cpu_data.icache.way_incr);
+	printk(KERN_NOTICE "I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+		boot_cpu_data.icache.entry_mask,
+		boot_cpu_data.icache.alias_mask,
+		boot_cpu_data.icache.n_aliases);
+	printk(KERN_NOTICE "D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+		boot_cpu_data.dcache.ways,
+		boot_cpu_data.dcache.sets,
+		boot_cpu_data.dcache.way_incr);
+	printk(KERN_NOTICE "D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+		boot_cpu_data.dcache.entry_mask,
+		boot_cpu_data.dcache.alias_mask,
+		boot_cpu_data.dcache.n_aliases);
+
+	/*
+	 * Emit Secondary Cache parameters if the CPU has a probed L2.
+	 */
+	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
+		printk(KERN_NOTICE "S-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+			boot_cpu_data.scache.ways,
+			boot_cpu_data.scache.sets,
+			boot_cpu_data.scache.way_incr);
+		printk(KERN_NOTICE "S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+			boot_cpu_data.scache.entry_mask,
+			boot_cpu_data.scache.alias_mask,
+			boot_cpu_data.scache.n_aliases);
+	}
+}
+
+void __init cpu_cache_init(void)
+{
+	unsigned int cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
+
+	compute_alias(&boot_cpu_data.icache);
+	compute_alias(&boot_cpu_data.dcache);
+	compute_alias(&boot_cpu_data.scache);
+
+	__flush_wback_region		= noop__flush_region;
+	__flush_purge_region		= noop__flush_region;
+	__flush_invalidate_region	= noop__flush_region;
+
+	/*
+	 * No flushing is necessary in the disabled cache case so we can
+	 * just keep the noop functions in local_flush_..() and __flush_..()
+	 */
+	if (unlikely(cache_disabled))
+		goto skip;
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH2) {
+		extern void __weak sh2_cache_init(void);
+
+		sh2_cache_init();
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH2A) {
+		extern void __weak sh2a_cache_init(void);
+
+		sh2a_cache_init();
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH3) {
+		extern void __weak sh3_cache_init(void);
+
+		sh3_cache_init();
+
+		if ((boot_cpu_data.type == CPU_SH7705) &&
+		    (boot_cpu_data.dcache.sets == 512)) {
+			extern void __weak sh7705_cache_init(void);
+
+			sh7705_cache_init();
+		}
+	}
+
+	if ((boot_cpu_data.family == CPU_FAMILY_SH4) ||
+	    (boot_cpu_data.family == CPU_FAMILY_SH4A) ||
+	    (boot_cpu_data.family == CPU_FAMILY_SH4AL_DSP)) {
+		extern void __weak sh4_cache_init(void);
+
+		sh4_cache_init();
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH5) {
+		extern void __weak sh5_cache_init(void);
+
+		sh5_cache_init();
+	}
+
+skip:
+	emit_cache_params();
+}
--- a/kernel/arch/sh/mm/consistent.c
+++ b/kernel/arch/sh/mm/consistent.c
@@ -0,0 +1,382 @@
+/*
+ * arch/sh/mm/consistent.c
+ *
+ * Copyright (C) 2004 - 2007  Paul Mundt
+ *
+ * Declared coherent memory functions based on arch/x86/kernel/pci-dma_32.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
+#include <linux/vmalloc.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/l2_cacheflush.h>
+#include <asm/addrspace.h>
+
+#ifdef CONFIG_PMB
+
+/*
+ * This is yet another copy of the ARM (and powerpc) VM region allocation
+ * code (which is Copyright (C) 2000-2004 Russell King).
+ *
+ * We have to do this (rather than use get_vm_area()) because
+ * dma_alloc_coherent() can be (and is) called from interrupt level.
+ */
+
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct vm_region	region;
+ *    unsigned long	flags;
+ *    struct page	**pages;
+ *    unsigned int	nr_pages;
+ *    unsigned long	phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call vm_region_alloc with an appropriate
+ * struct vm_region head (eg):
+ *
+ *  struct vm_region vmalloc_head = {
+ *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *	.vm_start	= VMALLOC_START,
+ *	.vm_end		= VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling vm_region_alloc().
+ */
+struct sh_vm_region {
+	struct list_head	vm_list;
+	unsigned long		vm_start;
+	unsigned long		vm_end;
+	struct page		*vm_pages;
+};
+
+static struct sh_vm_region consistent_head = {
+	.vm_list	= LIST_HEAD_INIT(consistent_head.vm_list),
+	.vm_start	= CONSISTENT_BASE,
+	.vm_end		= CONSISTENT_END,
+};
+
+static struct sh_vm_region *
+sh_vm_region_alloc(struct sh_vm_region *head, size_t size, gfp_t gfp)
+{
+	unsigned long addr = head->vm_start, end = head->vm_end - size;
+	unsigned long flags;
+	struct sh_vm_region *c, *new;
+
+	new = kmalloc(sizeof(struct sh_vm_region), gfp);
+	if (!new)
+		goto out;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if ((addr + size) < addr)
+			goto nospc;
+		if ((addr + size) <= c->vm_start)
+			goto found;
+		addr = c->vm_end;
+		if (addr > end)
+			goto nospc;
+	}
+
+found:
+	/*
+	 * Insert this entry _before_ the one we found.
+	 */
+	list_add_tail(&new->vm_list, &c->vm_list);
+	new->vm_start = addr;
+	new->vm_end = addr + size;
+
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	return new;
+
+nospc:
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	kfree(new);
+out:
+	return NULL;
+}
+
+static struct sh_vm_region *sh_vm_region_find(struct sh_vm_region *head,
+					 unsigned long addr)
+{
+	struct sh_vm_region *c;
+
+	list_for_each_entry(c, &head->vm_list, vm_list) {
+		if (c->vm_start == addr)
+			goto out;
+	}
+	c = NULL;
+out:
+	return c;
+}
+
+static void *__consistent_map(struct page *page, size_t size, gfp_t gfp)
+{
+	struct sh_vm_region *c;
+	unsigned long vaddr;
+	unsigned long paddr;
+
+	c = sh_vm_region_alloc(&consistent_head, size,
+			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+	if (!c)
+		return NULL;
+
+	vaddr = c->vm_start;
+	paddr = page_to_phys(page);
+	if (ioremap_page_range(vaddr, vaddr+size, paddr, PAGE_KERNEL_NOCACHE)) {
+		list_del(&c->vm_list);
+		return NULL;
+	}
+
+	c->vm_pages = page;
+
+	return (void *)vaddr;
+}
+
+static struct page *__consistent_unmap(void *vaddr, size_t size)
+{
+	unsigned long flags;
+	struct sh_vm_region *c;
+	struct page *page;
+
+	spin_lock_irqsave(&consistent_lock, flags);
+	c = sh_vm_region_find(&consistent_head, (unsigned long)vaddr);
+	spin_unlock_irqrestore(&consistent_lock, flags);
+	if (!c)
+		goto no_area;
+
+	if ((c->vm_end - c->vm_start) != size) {
+		printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
+		       __func__, c->vm_end - c->vm_start, size);
+		dump_stack();
+		size = c->vm_end - c->vm_start;
+	}
+
+	page = c->vm_pages;
+
+	unmap_kernel_range(c->vm_start, size);
+
+	spin_lock_irqsave(&consistent_lock, flags);
+	list_del(&c->vm_list);
+	spin_unlock_irqrestore(&consistent_lock, flags);
+
+	kfree(c);
+
+	return page;
+
+no_area:
+	printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
+	       __func__, vaddr);
+	dump_stack();
+
+	return NULL;
+}
+
+#else
+
+static void *__consistent_map(struct page *page, size_t size, gfp_t gfp)
+{
+	return P2SEGADDR(page_address(page));
+}
+
+static struct page *__consistent_unmap(void *vaddr, size_t size)
+{
+	unsigned long addr;
+
+	addr = P1SEGADDR((unsigned long)vaddr);
+	BUG_ON(!virt_addr_valid(addr));
+	return virt_to_page(addr);
+}
+
+#endif
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	4096
+
+static int __init dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(dma_init);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t gfp)
+{
+	void *ret;
+	int order;
+	struct page *page;
+	unsigned long phys_addr;
+	void* kernel_addr;
+	size_t orig_size;
+
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
+		return ret;
+
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+	orig_size = size;
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		return NULL;
+
+	kernel_addr = page_address(page);
+	phys_addr = virt_to_phys(kernel_addr);
+	ret = __consistent_map(page, size, gfp);
+	if (!ret) {
+		__free_pages(page, order);
+		return NULL;
+	}
+
+	memset(kernel_addr, 0, orig_size);
+	/*
+	 * Pages from the page allocator may have data present in
+	 * cache. So flush the cache before using uncached memory.
+	 */
+	dma_cache_sync(dev, kernel_addr, orig_size, DMA_BIDIRECTIONAL);
+
+	/*
+	 * Free the otherwise unused pages, unless got compound page
+	 */
+	if (!PageCompound(page)) {
+		struct page *end = page + (1 << order);
+
+		split_page(page, order);
+
+		for (page += size >> PAGE_SHIFT; page < end; page++)
+			__free_page(page);
+	}
+
+	*dma_handle = phys_addr;
+
+	debug_dma_alloc_coherent(dev, orig_size, *dma_handle, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle)
+{
+	int order = get_order(size);
+	struct page *page;
+
+	if (dma_release_from_coherent(dev, order, vaddr))
+		return;
+
+	debug_dma_free_coherent(dev, size, vaddr, dma_handle);
+
+	size = PAGE_ALIGN(size);
+	page = __consistent_unmap(vaddr, size);
+	if (page) {
+		if (PageCompound(page)) {
+			__free_pages(page, get_order(size));
+		} else {
+			int i;
+
+			for (i = 0; i < (size >> PAGE_SHIFT); i++)
+				__free_page(page + i);
+		}
+	}
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		    enum dma_data_direction direction)
+{
+	switch (direction) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		__flush_invalidate_region(vaddr, size);
+		__l2_flush_invalidate_region(vaddr, size);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		__flush_wback_region(vaddr, size);
+		__l2_flush_wback_region(vaddr, size);
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		__flush_purge_region(vaddr, size);
+		__l2_flush_purge_region(vaddr, size);
+		break;
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(dma_cache_sync);
+
+static int __init memchunk_setup(char *str)
+{
+	return 1; /* accept anything that begins with "memchunk." */
+}
+__setup("memchunk.", memchunk_setup);
+
+static void __init memchunk_cmdline_override(char *name, unsigned long *sizep)
+{
+	char *p = boot_command_line;
+	int k = strlen(name);
+
+	while ((p = strstr(p, "memchunk."))) {
+		p += 9; /* strlen("memchunk.") */
+		if (!strncmp(name, p, k) && p[k] == '=') {
+			p += k + 1;
+			*sizep = memparse(p, NULL);
+			pr_info("%s: forcing memory chunk size to 0x%08lx\n",
+				name, *sizep);
+			break;
+		}
+	}
+}
+
+int __init platform_resource_setup_memory(struct platform_device *pdev,
+					  char *name, unsigned long memsize)
+{
+	struct resource *r;
+	dma_addr_t dma_handle;
+	void *buf;
+
+	r = pdev->resource + pdev->num_resources - 1;
+	if (r->flags) {
+		pr_warning("%s: unable to find empty space for resource\n",
+			name);
+		return -EINVAL;
+	}
+
+	memchunk_cmdline_override(name, &memsize);
+	if (!memsize)
+		return 0;
+
+	buf = dma_alloc_coherent(NULL, memsize, &dma_handle, GFP_KERNEL);
+	if (!buf) {
+		pr_warning("%s: unable to allocate memory\n", name);
+		return -ENOMEM;
+	}
+
+	memset(buf, 0, memsize);
+
+	r->flags = IORESOURCE_MEM;
+	r->start = dma_handle;
+	r->end = r->start + memsize - 1;
+	r->name = name;
+	return 0;
+}
--- a/kernel/arch/sh/mm/extable_32.c
+++ b/kernel/arch/sh/mm/extable_32.c
@@ -0,0 +1,21 @@
+/*
+ * linux/arch/sh/mm/extable.c
+ *  Taken from:
+ *   linux/arch/i386/mm/extable.c
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return 1;
+	}
+
+	return 0;
+}
--- a/kernel/arch/sh/mm/extable_64.c
+++ b/kernel/arch/sh/mm/extable_64.c
@@ -0,0 +1,82 @@
+/*
+ * arch/sh/mm/extable_64.c
+ *
+ * Copyright (C) 2003 Richard Curnow
+ * Copyright (C) 2003, 2004  Paul Mundt
+ *
+ * Cloned from the 2.5 SH version..
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+extern unsigned long copy_user_memcpy, copy_user_memcpy_end;
+extern void __copy_user_fixup(void);
+
+static const struct exception_table_entry __copy_user_fixup_ex = {
+	.fixup = (unsigned long)&__copy_user_fixup,
+};
+
+/*
+ * Some functions that may trap due to a bad user-mode address have too
+ * many loads and stores in them to make it at all practical to label
+ * each one and put them all in the main exception table.
+ *
+ * In particular, the fast memcpy routine is like this.  It's fix-up is
+ * just to fall back to a slow byte-at-a-time copy, which is handled the
+ * conventional way.  So it's functionally OK to just handle any trap
+ * occurring in the fast memcpy with that fixup.
+ */
+static const struct exception_table_entry *check_exception_ranges(unsigned long addr)
+{
+	if ((addr >= (unsigned long)&copy_user_memcpy) &&
+	    (addr <= (unsigned long)&copy_user_memcpy_end))
+		return &__copy_user_fixup_ex;
+
+	return NULL;
+}
+
+/* Simple binary search */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+		 const struct exception_table_entry *last,
+		 unsigned long value)
+{
+	const struct exception_table_entry *mid;
+
+	mid = check_exception_ranges(value);
+	if (mid)
+		return mid;
+
+        while (first <= last) {
+		long diff;
+
+		mid = (last - first) / 2 + first;
+		diff = mid->insn - value;
+                if (diff == 0)
+                        return mid;
+                else if (diff < 0)
+                        first = mid+1;
+                else
+                        last = mid-1;
+        }
+
+        return NULL;
+}
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return 1;
+	}
+
+	return 0;
+}
--- a/kernel/arch/sh/mm/fault_32.c
+++ b/kernel/arch/sh/mm/fault_32.c
@@ -0,0 +1,374 @@
+/*
+ * Page fault handler for SH with an MMU.
+ *
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2003 - 2009  Paul Mundt
+ *
+ *  Based on linux/arch/i386/mm/fault.c:
+ *   Copyright (C) 1995  Linus Torvalds
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/perf_event.h>
+#include <asm/io_trapped.h>
+#include <asm/system.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	int ret = 0;
+
+	if (kprobes_built_in() && !user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
+
+	return ret;
+}
+
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+	unsigned index = pgd_index(address);
+	pgd_t *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd += index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k))
+		return NULL;
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		return NULL;
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		return NULL;
+
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
+	else {
+		/*
+		 * The page tables are fully synchronised so there must
+		 * be another reason for the fault. Return NULL here to
+		 * signal that we have not taken care of the fault.
+		 */
+		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+		return NULL;
+	}
+
+	return pmd_k;
+}
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+	pgd_t *pgd_k;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+
+	/* Make sure we are in vmalloc/module/P3 area: */
+	if (!(address >= VMALLOC_START && address < P3_ADDR_MAX))
+		return -1;
+
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "current" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	pgd_k = get_TTB();
+	pmd_k = vmalloc_sync_one(pgd_k, address);
+	if (!pmd_k)
+		return -1;
+
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+
+	return 0;
+}
+
+static int fault_in_kernel_space(unsigned long address)
+{
+	return address >= TASK_SIZE;
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs,
+					unsigned long writeaccess,
+					unsigned long address)
+{
+	unsigned long vec;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	int si_code;
+	int fault;
+	siginfo_t info;
+
+	tsk = current;
+	mm = tsk->mm;
+	si_code = SEGV_MAPERR;
+	vec = lookup_exception_vector();
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 */
+	if (unlikely(fault_in_kernel_space(address))) {
+		if (vmalloc_fault(address) >= 0)
+			return;
+		if (notify_page_fault(regs, vec))
+			return;
+
+		goto bad_area_nosemaphore;
+	}
+
+	if (unlikely(notify_page_fault(regs, vec)))
+		return;
+
+	/* Only enable interrupts if they were on before the fault */
+	if ((regs->sr & SR_IMASK) != SR_IMASK)
+		local_irq_enable();
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
+	/*
+	 * If we're in an interrupt, have no user context or are running
+	 * in an atomic region then we must not take the fault:
+	 */
+	if (in_atomic() || !mm)
+		goto no_context;
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it..
+	 */
+good_area:
+	si_code = SEGV_ACCERR;
+	if (writeaccess) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+			goto bad_area;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+survive:
+	fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+	if (fault & VM_FAULT_MAJOR) {
+		tsk->maj_flt++;
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
+	} else {
+		tsk->min_flt++;
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
+	}
+
+	up_read(&mm->mmap_sem);
+	return;
+
+	/*
+	 * Something tried to access memory that isn't in our memory map..
+	 * Fix it, but check if it's kernel or user first..
+	 */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	if (user_mode(regs)) {
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_code = si_code;
+		info.si_addr = (void *) address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+no_context:
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs))
+		return;
+
+	if (handle_trapped_io(regs, address))
+		return;
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ *
+ */
+
+	bust_spinlocks(1);
+
+	if (oops_may_print()) {
+		unsigned long page;
+
+		if (address < PAGE_SIZE)
+			printk(KERN_ALERT "Unable to handle kernel NULL "
+					  "pointer dereference");
+		else
+			printk(KERN_ALERT "Unable to handle kernel paging "
+					  "request");
+		printk(" at virtual address %08lx\n", address);
+		printk(KERN_ALERT "pc = %08lx\n", regs->pc);
+		page = (unsigned long)get_TTB();
+		if (page) {
+			page = ((__typeof__(page) *)page)[pgd_index(address)];
+			printk(KERN_ALERT "*pde = %08lx\n", page);
+			if (virt_addr_valid(page)) {
+				address = pte_index(address);
+				page = ((__typeof__(page) *)page)[address];
+				printk(KERN_ALERT "*pte = %08lx\n", page);
+			}
+		}
+	}
+
+	die("Oops", regs, writeaccess);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (is_global_init(current)) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", tsk->comm);
+	if (user_mode(regs))
+		do_group_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void *)address;
+	force_sig_info(SIGBUS, &info, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+}
+
+/*
+ * Called with interrupts disabled.
+ */
+asmlinkage int __kprobes
+handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
+	       unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	/*
+	 * We don't take page faults for P1, P2, and parts of P4, these
+	 * are always mapped, whether it be due to legacy behaviour in
+	 * 29-bit mode, or due to PMB configuration in 32-bit mode.
+	 */
+	if (address >= P3SEG && address < P3_ADDR_MAX) {
+		pgd = pgd_offset_k(address);
+	} else {
+		if (unlikely(address >= TASK_SIZE || !current->mm))
+			return 1;
+
+		pgd = pgd_offset(current->mm, address);
+	}
+
+	pud = pud_offset(pgd, address);
+	if (pud_none_or_clear_bad(pud))
+		return 1;
+	pmd = pmd_offset(pud, address);
+	if (pmd_none_or_clear_bad(pmd))
+		return 1;
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+	if (unlikely(pte_none(entry) || pte_not_present(entry)))
+		return 1;
+	if (unlikely(writeaccess && !pte_write(entry)))
+		return 1;
+
+	if (writeaccess)
+		entry = pte_mkdirty(entry);
+	entry = pte_mkyoung(entry);
+
+	set_pte(pte, entry);
+
+#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
+	/*
+	 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
+	 * the case of an initial page write exception, so we need to
+	 * flush it in order to avoid potential TLB entry duplication.
+	 */
+	if (writeaccess == 2)
+		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
+#endif
+
+	update_mmu_cache(NULL, address, entry);
+
+	return 0;
+}
--- a/kernel/arch/sh/mm/fault_64.c
+++ b/kernel/arch/sh/mm/fault_64.c
@@ -0,0 +1,266 @@
+/*
+ * The SH64 TLB miss.
+ *
+ * Original code from fault.c
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * Fast PTE->TLB refill path
+ * Copyright (C) 2003 Richard.Curnow@superh.com
+ *
+ * IMPORTANT NOTES :
+ * The do_fast_page_fault function is called from a context in entry.S
+ * where very few registers have been saved.  In particular, the code in
+ * this file must be compiled not to use ANY caller-save registers that
+ * are not part of the restricted save set.  Also, it means that code in
+ * this file must not make calls to functions elsewhere in the kernel, or
+ * else the excepting context will see corruption in its caller-save
+ * registers.  Plus, the entry.S save area is non-reentrant, so this code
+ * has to run with SR.BL==1, i.e. no interrupts taken inside it and panic
+ * on any exception.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <asm/system.h>
+#include <asm/tlb.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <cpu/registers.h>
+
+/* Callable from fault.c, so not static */
+inline void __do_tlb_refill(unsigned long address,
+                            unsigned long long is_text_not_data, pte_t *pte)
+{
+	unsigned long long ptel;
+	unsigned long long pteh=0;
+	struct tlb_info *tlbp;
+	unsigned long long next;
+
+	/* Get PTEL first */
+	ptel = pte_val(*pte);
+
+	/*
+	 * Set PTEH register
+	 */
+	pteh = neff_sign_extend(address & MMU_VPN_MASK);
+
+	/* Set the ASID. */
+	pteh |= get_asid() << PTEH_ASID_SHIFT;
+	pteh |= PTEH_VALID;
+
+	/* Set PTEL register, set_pte has performed the sign extension */
+	ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+
+	tlbp = is_text_not_data ? &(cpu_data->itlb) : &(cpu_data->dtlb);
+	next = tlbp->next;
+	__flush_tlb_slot(next);
+	asm volatile ("putcfg %0,1,%2\n\n\t"
+		      "putcfg %0,0,%1\n"
+		      :  : "r" (next), "r" (pteh), "r" (ptel) );
+
+	next += TLB_STEP;
+	if (next > tlbp->last) next = tlbp->first;
+	tlbp->next = next;
+
+}
+
+static int handle_vmalloc_fault(struct mm_struct *mm,
+				unsigned long protection_flags,
+                                unsigned long long textaccess,
+				unsigned long address)
+{
+	pgd_t *dir;
+	pud_t *pud;
+	pmd_t *pmd;
+	static pte_t *pte;
+	pte_t entry;
+
+	dir = pgd_offset_k(address);
+
+	pud = pud_offset(dir, address);
+	if (pud_none_or_clear_bad(pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none_or_clear_bad(pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+
+	if (pte_none(entry) || !pte_present(entry))
+		return 0;
+	if ((pte_val(entry) & protection_flags) != protection_flags)
+		return 0;
+
+        __do_tlb_refill(address, textaccess, pte);
+
+	return 1;
+}
+
+static int handle_tlbmiss(struct mm_struct *mm,
+			  unsigned long long protection_flags,
+			  unsigned long long textaccess,
+			  unsigned long address)
+{
+	pgd_t *dir;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	/* NB. The PGD currently only contains a single entry - there is no
+	   page table tree stored for the top half of the address space since
+	   virtual pages in that region should never be mapped in user mode.
+	   (In kernel mode, the only things in that region are the 512Mb super
+	   page (locked in), and vmalloc (modules) +  I/O device pages (handled
+	   by handle_vmalloc_fault), so no PGD for the upper half is required
+	   by kernel mode either).
+
+	   See how mm->pgd is allocated and initialised in pgd_alloc to see why
+	   the next test is necessary.  - RPC */
+	if (address >= (unsigned long) TASK_SIZE)
+		/* upper half - never has page table entries. */
+		return 0;
+
+	dir = pgd_offset(mm, address);
+	if (pgd_none(*dir) || !pgd_present(*dir))
+		return 0;
+	if (!pgd_present(*dir))
+		return 0;
+
+	pud = pud_offset(dir, address);
+	if (pud_none(*pud) || !pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd) || !pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+
+	if (pte_none(entry) || !pte_present(entry))
+		return 0;
+
+	/*
+	 * If the page doesn't have sufficient protection bits set to
+	 * service the kind of fault being handled, there's not much
+	 * point doing the TLB refill.  Punt the fault to the general
+	 * handler.
+	 */
+	if ((pte_val(entry) & protection_flags) != protection_flags)
+		return 0;
+
+        __do_tlb_refill(address, textaccess, pte);
+
+	return 1;
+}
+
+/*
+ * Put all this information into one structure so that everything is just
+ * arithmetic relative to a single base address.  This reduces the number
+ * of movi/shori pairs needed just to load addresses of static data.
+ */
+struct expevt_lookup {
+	unsigned short protection_flags[8];
+	unsigned char  is_text_access[8];
+	unsigned char  is_write_access[8];
+};
+
+#define PRU (1<<9)
+#define PRW (1<<8)
+#define PRX (1<<7)
+#define PRR (1<<6)
+
+#define DIRTY (_PAGE_DIRTY | _PAGE_ACCESSED)
+#define YOUNG (_PAGE_ACCESSED)
+
+/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether
+   the fault happened in user mode or privileged mode. */
+static struct expevt_lookup expevt_lookup_table = {
+	.protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW},
+	.is_text_access   = {1,   1,   0, 0, 0,   0,   0,   0}
+};
+
+/*
+   This routine handles page faults that can be serviced just by refilling a
+   TLB entry from an existing page table entry.  (This case represents a very
+   large majority of page faults.) Return 1 if the fault was successfully
+   handled.  Return 0 if the fault could not be handled.  (This leads into the
+   general fault handling in fault.c which deals with mapping file-backed
+   pages, stack growth, segmentation faults, swapping etc etc)
+ */
+asmlinkage int do_fast_page_fault(unsigned long long ssr_md,
+				  unsigned long long expevt,
+			          unsigned long address)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	unsigned long long textaccess;
+	unsigned long long protection_flags;
+	unsigned long long index;
+	unsigned long long expevt4;
+
+	/* The next few lines implement a way of hashing EXPEVT into a
+	 * small array index which can be used to lookup parameters
+	 * specific to the type of TLBMISS being handled.
+	 *
+	 * Note:
+	 *	ITLBMISS has EXPEVT==0xa40
+	 *	RTLBMISS has EXPEVT==0x040
+	 *	WTLBMISS has EXPEVT==0x060
+	 */
+	expevt4 = (expevt >> 4);
+	/* TODO : xor ssr_md into this expression too. Then we can check
+	 * that PRU is set when it needs to be. */
+	index = expevt4 ^ (expevt4 >> 5);
+	index &= 7;
+	protection_flags = expevt_lookup_table.protection_flags[index];
+	textaccess       = expevt_lookup_table.is_text_access[index];
+
+	/* SIM
+	 * Note this is now called with interrupts still disabled
+	 * This is to cope with being called for a missing IO port
+	 * address with interrupts disabled. This should be fixed as
+	 * soon as we have a better 'fast path' miss handler.
+	 *
+	 * Plus take care how you try and debug this stuff.
+	 * For example, writing debug data to a port which you
+	 * have just faulted on is not going to work.
+	 */
+
+	tsk = current;
+	mm = tsk->mm;
+
+	if ((address >= VMALLOC_START && address < VMALLOC_END) ||
+	    (address >= IOBASE_VADDR  && address < IOBASE_END)) {
+		if (ssr_md)
+			/*
+			 * Process-contexts can never have this address
+			 * range mapped
+			 */
+			if (handle_vmalloc_fault(mm, protection_flags,
+						 textaccess, address))
+				return 1;
+	} else if (!in_interrupt() && mm) {
+		if (handle_tlbmiss(mm, protection_flags, textaccess, address))
+			return 1;
+	}
+
+	return 0;
+}
--- a/kernel/arch/sh/mm/flush-sh4.c
+++ b/kernel/arch/sh/mm/flush-sh4.c
@@ -0,0 +1,108 @@
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+static void sh4__flush_wback_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+
+	while (cnt) {
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+static void sh4__flush_purge_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+	while (cnt) {
+		__ocbp(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+/*
+ * No write back please
+ */
+static void sh4__flush_invalidate_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+
+	while (cnt) {
+		__ocbi(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+void __init sh4__flush_region_init(void)
+{
+	__flush_wback_region		= sh4__flush_wback_region;
+	__flush_invalidate_region	= sh4__flush_invalidate_region;
+	__flush_purge_region		= sh4__flush_purge_region;
+}
--- a/kernel/arch/sh/mm/hugetlbpage.c
+++ b/kernel/arch/sh/mm/hugetlbpage.c
@@ -0,0 +1,91 @@
+/*
+ * arch/sh/mm/hugetlbpage.c
+ *
+ * SuperH HugeTLB page support.
+ *
+ * Cloned from sparc64 by Paul Mundt.
+ *
+ * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd) {
+		pud = pud_alloc(mm, pgd, addr);
+		if (pud) {
+			pmd = pmd_alloc(mm, pud, addr);
+			if (pmd)
+				pte = pte_alloc_map(mm, pmd, addr);
+		}
+	}
+
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd) {
+		pud = pud_offset(pgd, addr);
+		if (pud) {
+			pmd = pmd_offset(pud, addr);
+			if (pmd)
+				pte = pte_offset_map(pmd, addr);
+		}
+	}
+
+	return pte;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm,
+			      unsigned long address, int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+			     pmd_t *pmd, int write)
+{
+	return NULL;
+}
--- a/kernel/arch/sh/mm/init.c
+++ b/kernel/arch/sh/mm/init.c
@@ -0,0 +1,326 @@
+/*
+ * linux/arch/sh/mm/init.c
+ *
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2002 - 2007  Paul Mundt
+ *
+ *  Based on linux/arch/i386/mm/init.c:
+ *   Copyright (C) 1995  Linus Torvalds
+ */
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <linux/pagemap.h>
+#include <linux/percpu.h>
+#include <linux/io.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+#include <asm/cache.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+#ifdef CONFIG_SUPERH32
+/*
+ * Handle trivial transitions between cached and uncached
+ * segments, making use of the 1:1 mapping relationship in
+ * 512MB lowmem.
+ *
+ * This is the offset of the uncached section from its cached alias.
+ * Default value only valid in 29 bit mode, in 32bit mode will be
+ * overridden in pmb_init.
+ */
+unsigned long cached_to_uncached = P2SEG - P1SEG;
+#endif
+
+#ifdef CONFIG_MMU
+static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd)) {
+		pgd_ERROR(*pgd);
+		return;
+	}
+
+	pud = pud_alloc(NULL, pgd, addr);
+	if (unlikely(!pud)) {
+		pud_ERROR(*pud);
+		return;
+	}
+
+	pmd = pmd_alloc(NULL, pud, addr);
+	if (unlikely(!pmd)) {
+		pmd_ERROR(*pmd);
+		return;
+	}
+
+	pte = pte_offset_kernel(pmd, addr);
+	if (!pte_none(*pte)) {
+		pte_ERROR(*pte);
+		return;
+	}
+
+	set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot));
+	local_flush_tlb_one(get_asid(), addr);
+}
+
+/*
+ * As a performance optimization, other platforms preserve the fixmap mapping
+ * across a context switch, we don't presently do this, but this could be done
+ * in a similar fashion as to the wired TLB interface that sh64 uses (by way
+ * of the memory mapped UTLB configuration) -- this unfortunately forces us to
+ * give up a TLB entry for each mapping we want to preserve. While this may be
+ * viable for a small number of fixmaps, it's not particularly useful for
+ * everything and needs to be carefully evaluated. (ie, we may want this for
+ * the vsyscall page).
+ *
+ * XXX: Perhaps add a _PAGE_WIRED flag or something similar that we can pass
+ * in at __set_fixmap() time to determine the appropriate behavior to follow.
+ *
+ *					 -- PFM.
+ */
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+{
+	unsigned long address = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+
+	set_pte_phys(address, phys, prot);
+}
+
+void __init page_table_range_init(unsigned long start, unsigned long end,
+					 pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	int i, j, k;
+	unsigned long vaddr;
+
+	vaddr = start;
+	i = __pgd_offset(vaddr);
+	j = __pud_offset(vaddr);
+	k = __pmd_offset(vaddr);
+	pgd = pgd_base + i;
+
+	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+		pud = (pud_t *)pgd;
+		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
+			pmd = (pmd_t *)pud;
+			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
+				if (pmd_none(*pmd)) {
+					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+					pmd_populate_kernel(&init_mm, pmd, pte);
+					BUG_ON(pte != pte_offset_kernel(pmd, 0));
+				}
+				vaddr += PMD_SIZE;
+			}
+			k = 0;
+		}
+		j = 0;
+	}
+}
+#endif	/* CONFIG_MMU */
+
+/*
+ * paging_init() sets up the page tables
+ */
+void __init paging_init(void)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	unsigned long vaddr, end;
+	int nid;
+
+	/* We don't need to map the kernel through the TLB, as
+	 * it is permanatly mapped using P1. So clear the
+	 * entire pgd. */
+	memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
+
+	/* Set an initial value for the MMU.TTB so we don't have to
+	 * check for a null value. */
+	set_TTB(swapper_pg_dir);
+
+	/*
+	 * Populate the relevant portions of swapper_pg_dir so that
+	 * we can use the fixmap entries without calling kmalloc.
+	 * pte's will be filled in by __set_fixmap().
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+	page_table_range_init(vaddr, end, swapper_pg_dir);
+
+	kmap_coherent_init();
+
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		unsigned long low, start_pfn;
+
+		start_pfn = pgdat->bdata->node_min_pfn;
+		low = pgdat->bdata->node_low_pfn;
+
+		if (max_zone_pfns[ZONE_NORMAL] < low)
+			max_zone_pfns[ZONE_NORMAL] = low;
+
+		printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
+		       nid, start_pfn, low);
+	}
+
+	free_area_init_nodes(max_zone_pfns);
+
+	/* Set up the uncached fixmap */
+	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
+}
+
+void __init mem_init(void)
+{
+	int codesize, datasize, initsize;
+	int nid;
+
+	num_physpages = 0;
+	high_memory = NULL;
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		unsigned long node_pages = 0;
+		void *node_high_memory;
+
+		num_physpages += pgdat->node_present_pages;
+
+		if (pgdat->node_spanned_pages)
+			node_pages = free_all_bootmem_node(pgdat);
+
+		totalram_pages += node_pages;
+
+		node_high_memory = (void *)__va((pgdat->node_start_pfn +
+						 pgdat->node_spanned_pages) <<
+						 PAGE_SHIFT);
+		if (node_high_memory > high_memory)
+			high_memory = node_high_memory;
+	}
+
+	/* Set this up early, so we can take care of the zero page */
+	cpu_cache_init();
+
+	/* clear the zero-page */
+	memset(empty_zero_page, 0, PAGE_SIZE);
+	__flush_wback_region(empty_zero_page, PAGE_SIZE);
+
+	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
+	       "%dk data, %dk init)\n",
+		nr_free_pages() << (PAGE_SHIFT-10),
+		num_physpages << (PAGE_SHIFT-10),
+		codesize >> 10,
+		datasize >> 10,
+		initsize >> 10);
+
+	/* Initialize the vDSO */
+	vsyscall_init();
+}
+
+void free_initmem(void)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)(&__init_begin);
+	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk("Freeing unused kernel memory: %ldk freed\n",
+	       ((unsigned long)&__init_end -
+	        (unsigned long)&__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	unsigned long p;
+	for (p = start; p < end; p += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(p));
+		init_page_count(virt_to_page(p));
+		free_page(p);
+		totalram_pages++;
+	}
+	printk("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+}
+#endif
+
+#if THREAD_SHIFT < PAGE_SHIFT
+static struct kmem_cache *thread_info_cache;
+
+struct thread_info *alloc_thread_info(struct task_struct *tsk)
+{
+	struct thread_info *ti;
+
+	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
+	if (unlikely(ti == NULL))
+		return NULL;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	memset(ti, 0, THREAD_SIZE);
+#endif
+	return ti;
+}
+
+void free_thread_info(struct thread_info *ti)
+{
+	kmem_cache_free(thread_info_cache, ti);
+}
+
+void thread_info_cache_init(void)
+{
+	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+					      THREAD_SIZE, 0, NULL);
+	BUG_ON(thread_info_cache == NULL);
+}
+#endif /* THREAD_SHIFT < PAGE_SHIFT */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_add_memory(int nid, u64 start, u64 size)
+{
+	pg_data_t *pgdat;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int ret;
+
+	pgdat = NODE_DATA(nid);
+
+	/* We only have ZONE_NORMAL, so this is easy.. */
+	ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
+				start_pfn, nr_pages);
+	if (unlikely(ret))
+		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(arch_add_memory);
+
+#ifdef CONFIG_NUMA
+int memory_add_physaddr_to_nid(u64 addr)
+{
+	/* Node 0 for now.. */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
+#endif /* CONFIG_MEMORY_HOTPLUG */
--- a/kernel/arch/sh/mm/ioremap_32.c
+++ b/kernel/arch/sh/mm/ioremap_32.c
@@ -0,0 +1,148 @@
+/*
+ * arch/sh/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ * (C) Copyright 2005, 2006 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+static void __iomem *__ioremap_prot(unsigned long phys_addr, unsigned long size,
+				    pgprot_t pgprot)
+{
+	struct vm_struct * area;
+	unsigned long offset, last_addr, addr;
+	int simple = (pgprot_val(pgprot) == pgprot_val(PAGE_KERNEL)) ||
+		(pgprot_val(pgprot) == pgprot_val(PAGE_KERNEL_NOCACHE));
+	int cached = pgprot_val(pgprot) & _PAGE_CACHABLE;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * If we're in the fixed PCI memory range, mapping through page
+	 * tables is not only pointless, but also fundamentally broken.
+	 * Just return the physical address instead.
+	 *
+	 * For boards that map a small PCI memory aperture somewhere in
+	 * P1/P2 space, ioremap() will already do the right thing,
+	 * and we'll never get this far.
+	 */
+	if (is_pci_memory_fixed_range(phys_addr, size))
+		return (void __iomem *)phys_addr;
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using..
+	 */
+	if ((phys_addr >= __pa(memory_start)) && (last_addr < __pa(memory_end))) {
+		char *t_addr, *t_end;
+		struct page *page;
+
+		t_addr = __va(phys_addr);
+		t_end = t_addr + (size - 1);
+
+		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+			if(!PageReserved(page))
+				return NULL;
+	}
+
+	/* P4 uncached addresses are permanently mapped */
+	if ((PXSEG(phys_addr) == P4SEG) && simple && !cached)
+		return (void __iomem *)phys_addr;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+#ifdef CONFIG_PMB
+	addr = pmb_remap(phys_addr, size, cached ? _PAGE_CACHABLE : 0);
+	if (addr)
+		return (void __iomem *)(offset + (char *)addr);
+#endif
+
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	addr = (unsigned long)area->addr;
+
+	if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
+		vunmap((void *)addr);
+		return NULL;
+	}
+
+	return (void __iomem *)(offset + (char *)addr);
+}
+
+void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
+			unsigned long flags)
+{
+	pgprot_t pgprot;
+
+	if (unlikely(flags & _PAGE_CACHABLE))
+		pgprot = PAGE_KERNEL;
+	else
+		pgprot = PAGE_KERNEL_NOCACHE;
+
+	return __ioremap_prot(phys_addr, size, pgprot);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+	unsigned long vaddr = (unsigned long __force)addr;
+	unsigned long seg = PXSEG(vaddr);
+	struct vm_struct *p;
+
+	if (seg == P4SEG || is_pci_memory_fixed_range(vaddr, 0))
+		return;
+
+#ifdef CONFIG_29BIT
+	if (seg < P3SEG)
+		return;
+#endif
+
+#ifdef CONFIG_PMB
+	if (pmb_unmap(vaddr))
+		return;
+#endif
+
+	p = remove_vm_area((void *)(vaddr & PAGE_MASK));
+	if (!p) {
+		printk(KERN_ERR "%s: bad address %p\n", __func__, addr);
+		return;
+	}
+
+	kfree(p);
+}
+EXPORT_SYMBOL(__iounmap);
--- a/kernel/arch/sh/mm/ioremap_64.c
+++ b/kernel/arch/sh/mm/ioremap_64.c
@@ -0,0 +1,326 @@
+/*
+ * arch/sh/mm/ioremap_64.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003 - 2007  Paul Mundt
+ *
+ * Mostly derived from arch/sh/mm/ioremap.c which, in turn is mostly
+ * derived from arch/i386/mm/ioremap.c .
+ *
+ *   (C) Copyright 1995 1996 Linus Torvalds
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/vmalloc.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+static struct resource shmedia_iomap = {
+	.name	= "shmedia_iomap",
+	.start	= IOBASE_VADDR + PAGE_SIZE,
+	.end	= IOBASE_END - 1,
+};
+
+static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
+			      unsigned long flags);
+static void shmedia_unmapioaddr(unsigned long vaddr);
+static void __iomem *shmedia_ioremap(struct resource *res, u32 pa,
+				     int sz, unsigned long flags);
+
+/*
+ * We have the same problem as the SPARC, so lets have the same comment:
+ * Our mini-allocator...
+ * Boy this is gross! We need it because we must map I/O for
+ * timers and interrupt controller before the kmalloc is available.
+ */
+
+#define XNMLN  15
+#define XNRES  10
+
+struct xresource {
+	struct resource xres;   /* Must be first */
+	int xflag;              /* 1 == used */
+	char xname[XNMLN+1];
+};
+
+static struct xresource xresv[XNRES];
+
+static struct xresource *xres_alloc(void)
+{
+	struct xresource *xrp;
+	int n;
+
+	xrp = xresv;
+	for (n = 0; n < XNRES; n++) {
+		if (xrp->xflag == 0) {
+			xrp->xflag = 1;
+			return xrp;
+		}
+		xrp++;
+	}
+	return NULL;
+}
+
+static void xres_free(struct xresource *xrp)
+{
+	xrp->xflag = 0;
+}
+
+static struct resource *shmedia_find_resource(struct resource *root,
+					      unsigned long vaddr)
+{
+	struct resource *res;
+
+	for (res = root->child; res; res = res->sibling)
+		if (res->start <= vaddr && res->end >= vaddr)
+			return res;
+
+	return NULL;
+}
+
+static void __iomem *shmedia_alloc_io(unsigned long phys, unsigned long size,
+				      const char *name, unsigned long flags)
+{
+	struct xresource *xres;
+	struct resource *res;
+	char *tack;
+	int tlen;
+
+	if (name == NULL)
+		name = "???";
+
+	xres = xres_alloc();
+	if (xres != 0) {
+		tack = xres->xname;
+		res = &xres->xres;
+	} else {
+		printk_once(KERN_NOTICE "%s: done with statics, "
+			       "switching to kmalloc\n", __func__);
+		tlen = strlen(name);
+		tack = kmalloc(sizeof(struct resource) + tlen + 1, GFP_KERNEL);
+		if (!tack)
+			return NULL;
+		memset(tack, 0, sizeof(struct resource));
+		res = (struct resource *) tack;
+		tack += sizeof(struct resource);
+	}
+
+	strncpy(tack, name, XNMLN);
+	tack[XNMLN] = 0;
+	res->name = tack;
+
+	return shmedia_ioremap(res, phys, size, flags);
+}
+
+static void __iomem *shmedia_ioremap(struct resource *res, u32 pa, int sz,
+				     unsigned long flags)
+{
+	unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
+	unsigned long round_sz = (offset + sz + PAGE_SIZE-1) & PAGE_MASK;
+	unsigned long va;
+	unsigned int psz;
+
+	if (allocate_resource(&shmedia_iomap, res, round_sz,
+			      shmedia_iomap.start, shmedia_iomap.end,
+			      PAGE_SIZE, NULL, NULL) != 0) {
+		panic("alloc_io_res(%s): cannot occupy\n",
+		      (res->name != NULL) ? res->name : "???");
+	}
+
+	va = res->start;
+	pa &= PAGE_MASK;
+
+	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
+
+	for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
+		shmedia_mapioaddr(pa, va, flags);
+		va += PAGE_SIZE;
+		pa += PAGE_SIZE;
+	}
+
+	return (void __iomem *)(unsigned long)(res->start + offset);
+}
+
+static void shmedia_free_io(struct resource *res)
+{
+	unsigned long len = res->end - res->start + 1;
+
+	BUG_ON((len & (PAGE_SIZE - 1)) != 0);
+
+	while (len) {
+		len -= PAGE_SIZE;
+		shmedia_unmapioaddr(res->start + len);
+	}
+
+	release_resource(res);
+}
+
+static __init_refok void *sh64_get_page(void)
+{
+	void *page;
+
+	if (slab_is_available())
+		page = (void *)get_zeroed_page(GFP_KERNEL);
+	else
+		page = alloc_bootmem_pages(PAGE_SIZE);
+
+	if (!page || ((unsigned long)page & ~PAGE_MASK))
+		panic("sh64_get_page: Out of memory already?\n");
+
+	return page;
+}
+
+static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
+			      unsigned long flags)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep, pte;
+	pgprot_t prot;
+
+	pr_debug("shmedia_mapiopage pa %08lx va %08lx\n",  pa, va);
+
+	if (!flags)
+		flags = 1; /* 1 = CB0-1 device */
+
+	pgdp = pgd_offset_k(va);
+	if (pgd_none(*pgdp) || !pgd_present(*pgdp)) {
+		pudp = (pud_t *)sh64_get_page();
+		set_pgd(pgdp, __pgd((unsigned long)pudp | _KERNPG_TABLE));
+	}
+
+	pudp = pud_offset(pgdp, va);
+	if (pud_none(*pudp) || !pud_present(*pudp)) {
+		pmdp = (pmd_t *)sh64_get_page();
+		set_pud(pudp, __pud((unsigned long)pmdp | _KERNPG_TABLE));
+	}
+
+	pmdp = pmd_offset(pudp, va);
+	if (pmd_none(*pmdp) || !pmd_present(*pmdp)) {
+		ptep = (pte_t *)sh64_get_page();
+		set_pmd(pmdp, __pmd((unsigned long)ptep + _PAGE_TABLE));
+	}
+
+	prot = __pgprot(_PAGE_PRESENT | _PAGE_READ     | _PAGE_WRITE  |
+			_PAGE_DIRTY   | _PAGE_ACCESSED | _PAGE_SHARED | flags);
+
+	pte = pfn_pte(pa >> PAGE_SHIFT, prot);
+	ptep = pte_offset_kernel(pmdp, va);
+
+	if (!pte_none(*ptep) &&
+	    pte_val(*ptep) != pte_val(pte))
+		pte_ERROR(*ptep);
+
+	set_pte(ptep, pte);
+
+	flush_tlb_kernel_range(va, PAGE_SIZE);
+}
+
+static void shmedia_unmapioaddr(unsigned long vaddr)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pgdp = pgd_offset_k(vaddr);
+	if (pgd_none(*pgdp) || pgd_bad(*pgdp))
+		return;
+
+	pudp = pud_offset(pgdp, vaddr);
+	if (pud_none(*pudp) || pud_bad(*pudp))
+		return;
+
+	pmdp = pmd_offset(pudp, vaddr);
+	if (pmd_none(*pmdp) || pmd_bad(*pmdp))
+		return;
+
+	ptep = pte_offset_kernel(pmdp, vaddr);
+
+	if (pte_none(*ptep) || !pte_present(*ptep))
+		return;
+
+	clear_page((void *)ptep);
+	pte_clear(&init_mm, vaddr, ptep);
+}
+
+void __iomem *__ioremap(unsigned long offset, unsigned long size,
+			unsigned long flags)
+{
+	char name[14];
+
+	sprintf(name, "phys_%08x", (u32)offset);
+	return shmedia_alloc_io(offset, size, name, flags);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *virtual)
+{
+	unsigned long vaddr = (unsigned long)virtual & PAGE_MASK;
+	struct resource *res;
+	unsigned int psz;
+
+	res = shmedia_find_resource(&shmedia_iomap, vaddr);
+	if (!res) {
+		printk(KERN_ERR "%s: Failed to free 0x%08lx\n",
+		       __func__, vaddr);
+		return;
+	}
+
+	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
+
+	shmedia_free_io(res);
+
+	if ((char *)res >= (char *)xresv &&
+	    (char *)res <  (char *)&xresv[XNRES]) {
+		xres_free((struct xresource *)res);
+	} else {
+		kfree(res);
+	}
+}
+EXPORT_SYMBOL(__iounmap);
+
+static int
+ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
+		  void *data)
+{
+	char *p = buf, *e = buf + length;
+	struct resource *r;
+	const char *nm;
+
+	for (r = ((struct resource *)data)->child; r != NULL; r = r->sibling) {
+		if (p + 32 >= e)        /* Better than nothing */
+			break;
+		nm = r->name;
+		if (nm == NULL)
+			nm = "???";
+
+		p += sprintf(p, "%08lx-%08lx: %s\n",
+			     (unsigned long)r->start,
+			     (unsigned long)r->end, nm);
+	}
+
+	return p-buf;
+}
+
+static int __init register_proc_onchip(void)
+{
+	create_proc_read_entry("io_map", 0, 0, ioremap_proc_info,
+			       &shmedia_iomap);
+	return 0;
+}
+late_initcall(register_proc_onchip);
--- a/kernel/arch/sh/mm/kmap.c
+++ b/kernel/arch/sh/mm/kmap.c
@@ -0,0 +1,65 @@
+/*
+ * arch/sh/mm/kmap.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002 - 2009  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+#define kmap_get_fixmap_pte(vaddr)                                     \
+	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
+
+static pte_t *kmap_coherent_pte;
+
+void __init kmap_coherent_init(void)
+{
+	unsigned long vaddr;
+
+	/* cache the first coherent kmap pte */
+	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
+	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
+}
+
+void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	BUG_ON(test_bit(PG_dcache_dirty, &page->flags));
+
+	pagefault_disable();
+
+	idx = FIX_CMAP_END -
+		((addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT);
+	vaddr = __fix_to_virt(idx);
+
+	BUG_ON(!pte_none(*(kmap_coherent_pte - idx)));
+	set_pte(kmap_coherent_pte - idx, mk_pte(page, PAGE_KERNEL));
+
+	return (void *)vaddr;
+}
+
+void kunmap_coherent(void *kvaddr)
+{
+	if (kvaddr >= (void *)FIXADDR_START) {
+		unsigned long vaddr = (unsigned long)kvaddr & PAGE_MASK;
+		enum fixed_addresses idx = __virt_to_fix(vaddr);
+
+		/* XXX.. Kill this later, here for sanity at the moment.. */
+		__flush_purge_region((void *)vaddr, PAGE_SIZE);
+
+		pte_clear(&init_mm, vaddr, kmap_coherent_pte - idx);
+		local_flush_tlb_one(get_asid(), vaddr);
+	}
+
+	pagefault_enable();
+}
--- a/kernel/arch/sh/mm/mmap.c
+++ b/kernel/arch/sh/mm/mmap.c
@@ -0,0 +1,254 @@
+/*
+ * arch/sh/mm/mmap.c
+ *
+ * Copyright (C) 2008 - 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
+EXPORT_SYMBOL(shm_align_mask);
+
+#ifdef CONFIG_MMU
+/*
+ * To avoid cache aliases, we map the shared page with same color.
+ */
+static inline unsigned long COLOUR_ALIGN(unsigned long addr,
+					 unsigned long pgoff)
+{
+	unsigned long base = (addr + shm_align_mask) & ~shm_align_mask;
+	unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
+
+	return base + off;
+}
+
+static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
+					      unsigned long pgoff)
+{
+	unsigned long base = addr & ~shm_align_mask;
+	unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
+
+	if (base + off <= addr)
+		return base + off;
+
+	return base - off;
+}
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+	unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start_addr;
+	int do_colour_align;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (unlikely(len > TASK_SIZE))
+		return -ENOMEM;
+
+	do_colour_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_colour_align = 1;
+
+	if (addr) {
+		if (do_colour_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	if (len > mm->cached_hole_size) {
+		start_addr = addr = mm->free_area_cache;
+	} else {
+	        mm->cached_hole_size = 0;
+		start_addr = addr = TASK_UNMAPPED_BASE;
+	}
+
+full_search:
+	if (do_colour_align)
+		addr = COLOUR_ALIGN(addr, pgoff);
+	else
+		addr = PAGE_ALIGN(mm->free_area_cache);
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (unlikely(TASK_SIZE - len < addr)) {
+			/*
+			 * Start a new search - just in case we missed
+			 * some holes.
+			 */
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				start_addr = addr = TASK_UNMAPPED_BASE;
+				mm->cached_hole_size = 0;
+				goto full_search;
+			}
+			return -ENOMEM;
+		}
+		if (likely(!vma || addr + len <= vma->vm_start)) {
+			/*
+			 * Remember the place where we stopped the search:
+			 */
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		if (addr + mm->cached_hole_size < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+
+		addr = vma->vm_end;
+		if (do_colour_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+	}
+}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	int do_colour_align;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (unlikely(len > TASK_SIZE))
+		return -ENOMEM;
+
+	do_colour_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_colour_align = 1;
+
+	/* requesting a specific address */
+	if (addr) {
+		if (do_colour_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	/* check if free_area_cache is useful for us */
+	if (len <= mm->cached_hole_size) {
+	        mm->cached_hole_size = 0;
+		mm->free_area_cache = mm->mmap_base;
+	}
+
+	/* either no address requested or can't fit in requested address hole */
+	addr = mm->free_area_cache;
+	if (do_colour_align) {
+		unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff);
+
+		addr = base + len;
+	}
+
+	/* make sure it can fit in the remaining address space */
+	if (likely(addr > len)) {
+		vma = find_vma(mm, addr-len);
+		if (!vma || addr <= vma->vm_start) {
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr-len);
+		}
+	}
+
+	if (unlikely(mm->mmap_base < len))
+		goto bottomup;
+
+	addr = mm->mmap_base-len;
+	if (do_colour_align)
+		addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * else if new region fits below vma->vm_start,
+		 * return with success:
+		 */
+		vma = find_vma(mm, addr);
+		if (likely(!vma || addr+len <= vma->vm_start)) {
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr);
+		}
+
+		/* remember the largest hole we saw so far */
+		if (addr + mm->cached_hole_size < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start-len;
+		if (do_colour_align)
+			addr = COLOUR_ALIGN_DOWN(addr, pgoff);
+	} while (likely(len < vma->vm_start));
+
+bottomup:
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->cached_hole_size = ~0UL;
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = mm->mmap_base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
+#endif /* CONFIG_MMU */
+
+/*
+ * You really shouldn't be using read() or write() on /dev/mem.  This
+ * might go away in the future.
+ */
+int valid_phys_addr_range(unsigned long addr, size_t count)
+{
+	if (addr < __MEMORY_START)
+		return 0;
+	if (addr + count > __pa(high_memory))
+		return 0;
+
+	return 1;
+}
+
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
+{
+	return 1;
+}
--- a/kernel/arch/sh/mm/nommu.c
+++ b/kernel/arch/sh/mm/nommu.c
@@ -0,0 +1,96 @@
+/*
+ * arch/sh/mm/nommu.c
+ *
+ * Various helper routines and stubs for MMUless SH.
+ *
+ * Copyright (C) 2002 - 2009 Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+/*
+ * Nothing too terribly exciting here ..
+ */
+void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+
+__kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
+{
+	memcpy(to, from, n);
+	return 0;
+}
+
+__kernel_size_t __clear_user(void *to, __kernel_size_t n)
+{
+	memset(to, 0, n);
+	return 0;
+}
+
+void local_flush_tlb_all(void)
+{
+	BUG();
+}
+
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	BUG();
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end)
+{
+	BUG();
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	BUG();
+}
+
+void local_flush_tlb_one(unsigned long asid, unsigned long page)
+{
+	BUG();
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	BUG();
+}
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+}
+
+void __init kmap_coherent_init(void)
+{
+}
+
+void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	BUG();
+	return NULL;
+}
+
+void kunmap_coherent(void *kvaddr)
+{
+	BUG();
+}
+
+void __init page_table_range_init(unsigned long start, unsigned long end,
+				  pgd_t *pgd_base)
+{
+}
+
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+{
+}
--- a/kernel/arch/sh/mm/numa.c
+++ b/kernel/arch/sh/mm/numa.c
@@ -0,0 +1,101 @@
+/*
+ * arch/sh/mm/numa.c - Multiple node support for SH machines
+ *
+ *  Copyright (C) 2007  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/lmb.h>
+#include <linux/mm.h>
+#include <linux/numa.h>
+#include <linux/pfn.h>
+#include <asm/sections.h>
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL_GPL(node_data);
+
+/*
+ * On SH machines the conventional approach is to stash system RAM
+ * in node 0, and other memory blocks in to node 1 and up, ordered by
+ * latency. Each node's pgdat is node-local at the beginning of the node,
+ * immediately followed by the node mem map.
+ */
+void __init setup_memory(void)
+{
+	unsigned long free_pfn = PFN_UP(__pa(_end));
+	u64 base = min_low_pfn << PAGE_SHIFT;
+	u64 size = (max_low_pfn << PAGE_SHIFT) - min_low_pfn;
+
+	lmb_add(base, size);
+
+	/* Reserve the LMB regions used by the kernel, initrd, etc.. */
+	lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
+		    (PFN_PHYS(free_pfn) + PAGE_SIZE - 1) -
+		    (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET));
+
+	/*
+	 * Node 0 sets up its pgdat at the first available pfn,
+	 * and bumps it up before setting up the bootmem allocator.
+	 */
+	NODE_DATA(0) = pfn_to_kaddr(free_pfn);
+	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
+	free_pfn += PFN_UP(sizeof(struct pglist_data));
+	NODE_DATA(0)->bdata = &bootmem_node_data[0];
+
+	/* Set up node 0 */
+	setup_bootmem_allocator(free_pfn);
+
+	/* Give the platforms a chance to hook up their nodes */
+	plat_mem_setup();
+}
+
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+	unsigned long bootmap_pages;
+	unsigned long start_pfn, end_pfn;
+	unsigned long bootmem_paddr;
+
+	/* Don't allow bogus node assignment */
+	BUG_ON(nid > MAX_NUMNODES || nid == 0);
+
+	start_pfn = start >> PAGE_SHIFT;
+	end_pfn = end >> PAGE_SHIFT;
+
+	lmb_add(start, end - start);
+
+	__add_active_range(nid, start_pfn, end_pfn);
+
+	/* Node-local pgdat */
+	NODE_DATA(nid) = __va(lmb_alloc_base(sizeof(struct pglist_data),
+					     SMP_CACHE_BYTES, end_pfn));
+	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+
+	/* Node-local bootmap */
+	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
+	bootmem_paddr = lmb_alloc_base(bootmap_pages << PAGE_SHIFT,
+				       PAGE_SIZE, end_pfn);
+	init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+			  start_pfn, end_pfn);
+
+	free_bootmem_with_active_regions(nid, end_pfn);
+
+	/* Reserve the pgdat and bootmap space with the bootmem allocator */
+	reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT,
+			     sizeof(struct pglist_data), BOOTMEM_DEFAULT);
+	reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr,
+			     bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
+
+	/* It's up */
+	node_set_online(nid);
+
+	/* Kick sparsemem */
+	sparse_memory_present_with_active_regions(nid);
+}
--- a/kernel/arch/sh/mm/pmb-fixed.c
+++ b/kernel/arch/sh/mm/pmb-fixed.c
@@ -0,0 +1,45 @@
+/*
+ * arch/sh/mm/fixed_pmb.c
+ *
+ * Copyright (C) 2009  Renesas Solutions Corp.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+
+static int __uses_jump_to_uncached fixed_pmb_init(void)
+{
+	int i;
+	unsigned long addr, data;
+
+	jump_to_uncached();
+
+	for (i = 0; i < PMB_ENTRY_MAX; i++) {
+		addr = PMB_DATA + (i << PMB_E_SHIFT);
+		data = ctrl_inl(addr);
+		if (!(data & PMB_V))
+			continue;
+
+		if (data & PMB_C) {
+#if defined(CONFIG_CACHE_WRITETHROUGH)
+			data |= PMB_WT;
+#elif defined(CONFIG_CACHE_WRITEBACK)
+			data &= ~PMB_WT;
+#else
+			data &= ~(PMB_C | PMB_WT);
+#endif
+		}
+		ctrl_outl(data, addr);
+	}
+
+	back_to_cached();
+
+	return 0;
+}
+arch_initcall(fixed_pmb_init);
--- a/kernel/arch/sh/mm/pmb.c
+++ b/kernel/arch/sh/mm/pmb.c
@@ -0,0 +1,861 @@
+/*
+ * arch/sh/mm/pmb.c
+ *
+ * Privileged Space Mapping Buffer (PMB) Support.
+ *
+ * Copyright (C) 2005, 2006, 2007 Paul Mundt
+ *
+ * P1/P2 Section mapping definitions from map32.h, which was:
+ *
+ *	Copyright 2003 (c) Lineo Solutions,Inc.
+ *
+ * Large changes to support dynamic mappings using PMB
+ * Copyright (c) 2007 STMicroelectronics Limited
+ * Author: Stuart Menefy <stuart.menefy@st.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/err.h>
+#include <linux/pm.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+
+#if 0
+#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
+#else
+#define DPRINTK(fmt, args...) do { ; } while (0)
+#endif
+
+
+#define NR_PMB_ENTRIES	16
+#define MIN_PMB_MAPPING_SIZE	(8*1024*1024)
+
+#ifdef CONFIG_PMB_64M_TILES
+#define PMB_FIXED_SHIFT 26
+#define PMB_VIRT2POS(virt) (((virt) >> PMB_FIXED_SHIFT) & (NR_PMB_ENTRIES - 1))
+#define PMB_POS2VIRT(pos) (((pos) << PMB_FIXED_SHIFT) + P1SEG)
+#endif
+
+struct pmb_entry {
+	unsigned long vpn;
+	unsigned long ppn;
+	unsigned long flags;	/* Only size */
+	struct pmb_entry *next;
+	unsigned long size;
+	int pos;
+};
+
+struct pmb_mapping {
+	unsigned long phys;
+	unsigned long virt;
+	unsigned long size;
+	unsigned long flags;	/* Only cache etc */
+	struct pmb_entry *entries;
+	struct pmb_mapping *next;
+	int usage;
+};
+
+static DEFINE_RWLOCK(pmb_lock);
+static unsigned long pmb_map;
+static struct pmb_entry   pmbe[NR_PMB_ENTRIES] __attribute__ ((__section__ (".uncached.data")));
+static struct pmb_mapping pmbm[NR_PMB_ENTRIES];
+static struct pmb_mapping *pmb_mappings, *pmb_mappings_free;
+
+static __always_inline unsigned long mk_pmb_entry(unsigned int entry)
+{
+	return (entry & PMB_E_MASK) << PMB_E_SHIFT;
+}
+
+static __always_inline unsigned long mk_pmb_addr(unsigned int entry)
+{
+	return mk_pmb_entry(entry) | PMB_ADDR;
+}
+
+static __always_inline unsigned long mk_pmb_data(unsigned int entry)
+{
+	return mk_pmb_entry(entry) | PMB_DATA;
+}
+
+static __always_inline void __set_pmb_entry(unsigned long vpn,
+	unsigned long ppn, unsigned long flags, int pos)
+{
+#ifdef CONFIG_CACHE_WRITETHROUGH
+	/*
+	 * When we are in 32-bit address extended mode, CCR.CB becomes
+	 * invalid, so care must be taken to manually adjust cacheable
+	 * translations.
+	 */
+	if (likely(flags & PMB_C))
+		flags |= PMB_WT;
+#endif
+#ifdef CONFIG_PMB_64M_TILES
+	BUG_ON(pos != PMB_VIRT2POS(vpn));
+#endif
+	ctrl_outl(0, mk_pmb_addr(pos));
+	ctrl_outl(vpn, mk_pmb_addr(pos));
+	ctrl_outl(ppn | flags | PMB_V, mk_pmb_data(pos));
+
+	/*
+	 * Read back the value just written. This shouldn't be necessary,
+	 * but when resuming from hibernation it appears to fix a problem.
+	 */
+	ctrl_inl(mk_pmb_addr(pos));
+}
+
+static void __uses_jump_to_uncached set_pmb_entry(unsigned long vpn,
+	unsigned long ppn, unsigned long flags, int pos)
+{
+	jump_to_uncached();
+	__set_pmb_entry(vpn, ppn, flags, pos);
+	back_to_cached();
+}
+
+static __always_inline void __clear_pmb_entry(int pos)
+{
+#ifdef CONFIG_PMB_64M_TILES
+	ctrl_outl(0, mk_pmb_addr(pos));
+	ctrl_outl(PMB_POS2VIRT(pos), mk_pmb_addr(pos));
+	ctrl_outl((CONFIG_PMB_64M_TILES_PHYS & ~((1 << PMB_FIXED_SHIFT)-1)) |
+		  PMB_SZ_64M | PMB_WT | PMB_UB | PMB_V, mk_pmb_data(pos));
+#else
+	ctrl_outl(0, mk_pmb_addr(pos));
+#endif
+}
+
+static void __uses_jump_to_uncached clear_pmb_entry(int pos)
+{
+	jump_to_uncached();
+	__clear_pmb_entry(pos);
+	back_to_cached();
+}
+
+static int pmb_alloc(int pos)
+{
+	if (likely(pos == PMB_NO_ENTRY))
+		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
+
+repeat:
+	if (unlikely(pos >= NR_PMB_ENTRIES))
+		return PMB_NO_ENTRY;
+
+	if (test_and_set_bit(pos, &pmb_map)) {
+		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
+		goto repeat;
+	}
+
+	return pos;
+}
+
+static void pmb_free(int entry)
+{
+	clear_bit(entry, &pmb_map);
+}
+
+static struct pmb_mapping* pmb_mapping_alloc(void)
+{
+	struct pmb_mapping *mapping;
+
+	if (pmb_mappings_free == NULL)
+		return NULL;
+
+	mapping = pmb_mappings_free;
+	pmb_mappings_free = mapping->next;
+
+	memset(mapping, 0, sizeof(*mapping));
+
+	return mapping;
+}
+
+static void pmb_mapping_free(struct pmb_mapping* mapping)
+{
+	mapping->next = pmb_mappings_free;
+	pmb_mappings_free = mapping;
+}
+
+static __always_inline void __pmb_mapping_set(struct pmb_mapping *mapping)
+{
+	struct pmb_entry *entry = mapping->entries;
+
+	do {
+		__set_pmb_entry(entry->vpn, entry->ppn,
+			      entry->flags | mapping->flags, entry->pos);
+		entry = entry->next;
+	} while (entry);
+}
+
+static void pmb_mapping_set(struct pmb_mapping *mapping)
+{
+	struct pmb_entry *entry = mapping->entries;
+
+	do {
+		set_pmb_entry(entry->vpn, entry->ppn,
+			      entry->flags | mapping->flags, entry->pos);
+		entry = entry->next;
+	} while (entry);
+}
+
+static void pmb_mapping_clear_and_free(struct pmb_mapping *mapping)
+{
+	struct pmb_entry *entry = mapping->entries;
+
+	do {
+		clear_pmb_entry(entry->pos);
+		pmb_free(entry->pos);
+		entry = entry->next;
+	} while (entry);
+}
+
+#ifdef CONFIG_PMB_64M_TILES
+
+static struct {
+	unsigned long size;
+	int flag;
+} pmb_sizes[] = {
+	{ .size = 1 << PMB_FIXED_SHIFT, .flag = PMB_SZ_64M,  },
+};
+
+/*
+ * Different algorithm when we tile the entire P1/P2 region with
+ * 64M PMB entries. This means the PMB entry is tied to the virtual
+ * address it covers, so we only need to search for the virtual
+ * address which accomodates the mapping we're interested in.
+ */
+static struct pmb_mapping* pmb_calc(unsigned long phys, unsigned long size,
+				    unsigned long req_virt, int *req_pos,
+				    unsigned long pmb_flags)
+{
+	struct pmb_mapping *new_mapping;
+	struct pmb_mapping **prev_ptr;
+	unsigned long prev_end, next_start;
+	struct pmb_mapping *next_mapping;
+	unsigned long new_start, new_end;
+	const unsigned long pmb_size = pmb_sizes[0].size;
+	struct pmb_entry *entry;
+	struct pmb_entry **prev_entry_ptr;
+
+	if (size == 0)
+		return NULL;
+
+	new_mapping = pmb_mapping_alloc();
+	if (!new_mapping)
+		return NULL;
+
+	DPRINTK("request: phys %08lx, size %08lx\n", phys, size);
+
+	prev_end = P1SEG;
+	next_mapping = pmb_mappings;
+	prev_ptr = &pmb_mappings;
+	for (;;) {
+		if (next_mapping == NULL)
+			next_start = P3SEG;
+		else
+			next_start = next_mapping->virt;
+
+		DPRINTK("checking space between %08lx and %08lx\n",
+			prev_end, next_start);
+
+		if (req_virt) {
+			if ((req_virt < prev_end) || (req_virt > next_start))
+				goto next;
+			new_start = req_virt;
+		} else {
+			new_start = prev_end + (phys & (pmb_size-1));
+		}
+
+		new_end = new_start + size;
+
+		if (new_end <= next_start)
+			break;
+
+next:
+		if (next_mapping == NULL) {
+			DPRINTK("failed, give up\n");
+			return NULL;
+		}
+
+		prev_ptr = &next_mapping->next;
+		prev_end = next_mapping->virt + next_mapping->size;
+		next_mapping = next_mapping->next;
+	}
+
+	DPRINTK("found space at %08lx to %08lx\n", new_start, new_end);
+
+	BUG_ON(req_pos && (*req_pos != PMB_VIRT2POS(new_start)));
+
+	phys &= ~(pmb_size - 1);
+	new_start &= ~(pmb_size - 1);
+
+	new_mapping->phys = phys;
+	new_mapping->virt = new_start;
+	new_mapping->size = 0;
+	new_mapping->flags = pmb_flags;
+	new_mapping->entries = NULL;
+	new_mapping->usage = 1;
+	new_mapping->next = *prev_ptr;
+	*prev_ptr = new_mapping;
+
+	prev_entry_ptr = &new_mapping->entries;
+	while (new_start < new_end) {
+		int pos = PMB_VIRT2POS(new_start);
+
+		pos = pmb_alloc(pos);
+		BUG_ON(pos == PMB_NO_ENTRY);
+		DPRINTK("using PMB entry %d\n", pos);
+
+		entry = &pmbe[pos];
+		entry->vpn = new_start;
+		entry->ppn = phys;
+		entry->flags = pmb_sizes[0].flag;
+		entry->next = NULL;
+		entry->size = pmb_size;
+		*prev_entry_ptr = entry;
+		prev_entry_ptr = &entry->next;
+
+		new_start += pmb_size;
+		phys += pmb_size;
+		new_mapping->size += pmb_size;
+	}
+
+	return new_mapping;
+}
+
+#else
+
+static struct {
+	unsigned long size;
+	int flag;
+} pmb_sizes[] = {
+	{ .size = 0x01000000, .flag = PMB_SZ_16M,  },
+	{ .size = 0x04000000, .flag = PMB_SZ_64M,  },
+	{ .size = 0x08000000, .flag = PMB_SZ_128M, },
+	{ .size	= 0x20000000, .flag = PMB_SZ_512M, },
+};
+
+static struct pmb_mapping* pmb_calc(unsigned long phys, unsigned long size,
+				    unsigned long req_virt, int *req_pos,
+				    unsigned long pmb_flags)
+{
+	unsigned long orig_phys = phys;
+	unsigned long orig_size = size;
+	int max_i = ARRAY_SIZE(pmb_sizes)-1;
+	struct pmb_mapping *new_mapping;
+	unsigned long alignment;
+	unsigned long virt_offset;
+	struct pmb_entry **prev_entry_ptr;
+	unsigned long prev_end, next_start;
+	struct pmb_mapping *next_mapping;
+	struct pmb_mapping **prev_ptr;
+	struct pmb_entry *entry;
+	unsigned long start;
+
+	if (size == 0)
+		return NULL;
+
+	new_mapping = pmb_mapping_alloc();
+	if (!new_mapping)
+		return NULL;
+
+	DPRINTK("request: phys %08lx, size %08lx\n", phys, size);
+
+	/*
+	 * First work out the PMB entries to tile the physical region.
+	 *
+	 * Fill in new_mapping and its list of entries, all fields
+	 * except those related to virtual addresses.
+	 *
+	 * alignment is the maximum alignment of all of the entries which
+	 * make up the mapping.
+	 * virt_offset will be non-zero in case some of the entries leading
+	 * upto those which force the maximal alignment are smaller than
+	 * those largest ones, and in this case virt_offset must be added
+	 * to the eventual virtual address (which is aligned to alignment),
+	 * to get the virtual address of the first entry.
+	 */
+ retry:
+	phys = orig_phys;
+	size = orig_size;
+	alignment = 0;
+	virt_offset = 0;
+	prev_entry_ptr = &new_mapping->entries;
+	new_mapping->size = 0;
+	while (size > 0) {
+		unsigned long best_size;	/* bytes of size covered by tile */
+		int best_i;
+		unsigned long entry_phys;
+		unsigned long entry_size;	/* total size of tile */
+		int i;
+
+		entry = *prev_entry_ptr;
+		if (entry == NULL) {
+			int pos;
+
+			pos = pmb_alloc(req_pos ? *req_pos++ : PMB_NO_ENTRY);
+			if (pos == PMB_NO_ENTRY)
+				goto failed_give_up;
+			entry = &pmbe[pos];
+			entry->next = NULL;
+			*prev_entry_ptr = entry;
+		}
+		prev_entry_ptr = &entry->next;
+
+		/*
+		 * Calculate the 'best' PMB entry size. This is the
+		 * one which covers the largest amount of the physical
+		 * address range we are trying to map, but if
+		 * increasing the size wouldn't increase the amount we
+		 * would be able to map, don't bother. Similarly, if
+		 * increasing the size would result in a mapping where
+		 * half or more of the coverage is wasted, don't bother.
+		 */
+		best_size = best_i = 0;
+		for (i = 0; i <= max_i; i++) {
+			unsigned long pmb_size = pmb_sizes[i].size;
+			unsigned long tmp_start, tmp_end, tmp_size;
+			tmp_start = phys & ~(pmb_size-1);
+			tmp_end = tmp_start + pmb_size;
+			tmp_size = min(phys+size, tmp_end)-max(phys, tmp_start);
+			if (tmp_size <= best_size)
+				continue;
+
+			if (best_size) {
+				unsigned long wasted_size;
+				wasted_size = pmb_size - tmp_size;
+				if (wasted_size >= (pmb_size / 2))
+					continue;
+			}
+
+			best_i = i;
+			best_size = tmp_size;
+		}
+
+		BUG_ON(best_size == 0);
+
+		entry_size = pmb_sizes[best_i].size;
+		entry_phys = phys & ~(entry_size-1);
+		DPRINTK("using PMB %d: phys %08lx, size %08lx\n",
+			entry->pos, entry_phys, entry_size);
+
+		entry->ppn   = entry_phys;
+		entry->size  = entry_size;
+		entry->flags = pmb_sizes[best_i].flag;
+
+		if (pmb_sizes[best_i].size > alignment) {
+			alignment = entry_size;
+			if (new_mapping->size)
+				virt_offset = alignment - new_mapping->size;
+		}
+
+		new_mapping->size += entry_size;
+		size -= best_size;
+		phys += best_size;
+	}
+
+	new_mapping->phys = new_mapping->entries->ppn;
+
+	DPRINTK("mapping: phys %08lx, size %08lx\n", new_mapping->phys, new_mapping->size);
+	DPRINTK("virtual alignment %08lx, offset %08lx\n", alignment, virt_offset);
+
+	/* Each iteration should use at least as many entries previous ones */
+	BUG_ON(entry->next);
+
+	/* Do we have a conflict with the requested maping? */
+	BUG_ON(req_virt && ((req_virt & (alignment-1)) != virt_offset));
+
+	/* Next try and find a virtual address to map this */
+	prev_end = P1SEG;
+	next_mapping = pmb_mappings;
+	prev_ptr = &pmb_mappings;
+	do {
+		if (next_mapping == NULL)
+			next_start = P3SEG;
+		else
+			next_start = next_mapping->virt;
+
+		if (req_virt)
+			start = req_virt;
+		else
+			start = ALIGN(prev_end, alignment) + virt_offset;
+
+		DPRINTK("checking for virt %08lx between %08lx and %08lx\n",
+			start, prev_end, next_start);
+
+		if ((start >= prev_end) &&
+		    (start + new_mapping->size <= next_start))
+			break;
+
+		if (next_mapping == NULL)
+			goto failed;
+
+		prev_ptr = &next_mapping->next;
+		prev_end = next_mapping->virt + next_mapping->size;
+		next_mapping = next_mapping->next;
+	} while (1);
+
+	DPRINTK("success, using %08lx\n", start);
+	new_mapping->virt = start;
+	new_mapping->flags = pmb_flags;
+	new_mapping->usage = 1;
+	new_mapping->next = *prev_ptr;
+	*prev_ptr = new_mapping;
+
+	/* Finally fill in the vpn's */
+	for (entry = new_mapping->entries; entry; entry=entry->next) {
+		entry->vpn = start;
+		start += entry->size;
+	}
+
+	return new_mapping;
+
+failed:
+	if (--max_i >= 0) {
+		DPRINTK("failed, try again with max_i %d\n", max_i);
+		goto retry;
+	}
+
+failed_give_up:
+	DPRINTK("failed, give up\n");
+	for (entry = new_mapping->entries; entry; entry = entry->next)
+		pmb_free(entry->pos);
+	pmb_mapping_free(new_mapping);
+	return NULL;
+}
+#endif
+
+long pmb_remap(unsigned long phys,
+	       unsigned long size, unsigned long flags)
+{
+	struct pmb_mapping *mapping;
+	int pmb_flags;
+	unsigned long offset;
+
+	/* Convert typical pgprot value to the PMB equivalent */
+	if (flags & _PAGE_CACHABLE) {
+		if (flags & _PAGE_WT)
+			pmb_flags = PMB_WT;
+		else
+			pmb_flags = PMB_C;
+	} else
+		pmb_flags = PMB_WT | PMB_UB;
+
+	DPRINTK("phys: %08lx, size %08lx, flags %08lx->%08x\n",
+		phys, size, flags, pmb_flags);
+
+	write_lock(&pmb_lock);
+
+	for (mapping = pmb_mappings; mapping; mapping=mapping->next) {
+		DPRINTK("check against phys %08lx size %08lx flags %08lx\n",
+			mapping->phys, mapping->size, mapping->flags);
+		if ((phys >= mapping->phys) &&
+		    (phys+size <= mapping->phys+mapping->size) &&
+		    (pmb_flags == mapping->flags))
+			break;
+	}
+
+	if (mapping) {
+		/* If we hit an existing mapping, use it */
+		mapping->usage++;
+		DPRINTK("found, usage now %d\n", mapping->usage);
+	} else if (size < MIN_PMB_MAPPING_SIZE) {
+		/* We spit upon small mappings */
+		write_unlock(&pmb_lock);
+		return 0;
+	} else {
+		mapping = pmb_calc(phys, size, 0, NULL, pmb_flags);
+		if (!mapping) {
+			write_unlock(&pmb_lock);
+			return 0;
+		}
+		pmb_mapping_set(mapping);
+	}
+
+	write_unlock(&pmb_lock);
+
+	offset = phys - mapping->phys;
+	return mapping->virt + offset;
+}
+
+static struct pmb_mapping *pmb_mapping_find(unsigned long addr,
+					    struct pmb_mapping ***prev)
+{
+	struct pmb_mapping *mapping;
+	struct pmb_mapping **prev_mapping = &pmb_mappings;
+
+	for (mapping = pmb_mappings; mapping; mapping=mapping->next) {
+		if ((addr >= mapping->virt) &&
+		    (addr < mapping->virt + mapping->size))
+			break;
+		prev_mapping = &mapping->next;
+	}
+
+	if (prev != NULL)
+		*prev = prev_mapping;
+
+	return mapping;
+}
+
+int pmb_unmap(unsigned long addr)
+{
+	struct pmb_mapping *mapping;
+	struct pmb_mapping **prev_mapping;
+
+	write_lock(&pmb_lock);
+
+	mapping = pmb_mapping_find(addr, &prev_mapping);
+
+	if (unlikely(!mapping)) {
+		write_unlock(&pmb_lock);
+		return 0;
+	}
+
+	DPRINTK("mapping: phys %08lx, size %08lx, count %d\n",
+		mapping->phys, mapping->size, mapping->usage);
+
+	if (--mapping->usage == 0) {
+		pmb_mapping_clear_and_free(mapping);
+		*prev_mapping = mapping->next;
+		pmb_mapping_free(mapping);
+	}
+
+	write_unlock(&pmb_lock);
+
+	return 1;
+}
+
+static void noinline __uses_jump_to_uncached
+apply_boot_mappings(struct pmb_mapping *uc_mapping, struct pmb_mapping *ram_mapping)
+{
+	register int i __asm__("r1");
+	register unsigned long c2uc __asm__("r2");
+	register struct pmb_entry *entry __asm__("r3");
+	register unsigned long flags __asm__("r4");
+
+	/* We can execute this directly, as the current PMB is uncached */
+	__pmb_mapping_set(uc_mapping);
+
+	cached_to_uncached = uc_mapping->virt -
+		(((unsigned long)&__uncached_start) & ~(uc_mapping->entries->size-1));
+
+	jump_to_uncached();
+
+	/*
+	 * We have to be cautious here, as we will temporarily lose access to
+	 * the PMB entry which is mapping main RAM, and so loose access to
+	 * data. So make sure all data is going to be in registers or the
+	 * uncached region.
+	 */
+
+	c2uc = cached_to_uncached;
+	entry = ram_mapping->entries;
+	flags = ram_mapping->flags;
+
+	for (i=0; i<NR_PMB_ENTRIES-1; i++)
+		__clear_pmb_entry(i);
+
+	do {
+		entry = (struct pmb_entry*)(((unsigned long)entry) + c2uc);
+		__set_pmb_entry(entry->vpn, entry->ppn,
+				entry->flags | flags, entry->pos);
+		entry = entry->next;
+	} while (entry);
+
+	/* Flush out the TLB */
+	i =  ctrl_inl(MMUCR);
+	i |= MMUCR_TI;
+	ctrl_outl(i, MMUCR);
+
+	back_to_cached();
+}
+
+struct pmb_mapping *uc_mapping, *ram_mapping
+	__attribute__ ((__section__ (".uncached.data")));
+
+void __init pmb_init(void)
+{
+	int i;
+	int entry;
+
+	/* Create the free list of mappings */
+	pmb_mappings_free = &pmbm[0];
+	for (i=0; i<NR_PMB_ENTRIES-1; i++)
+		pmbm[i].next = &pmbm[i+1];
+	pmbm[NR_PMB_ENTRIES-1].next = NULL;
+
+	/* Initialise the PMB entrie's pos */
+	for (i=0; i<NR_PMB_ENTRIES; i++)
+		pmbe[i].pos = i;
+
+	/* Create the initial mappings */
+	entry = NR_PMB_ENTRIES-1;
+	uc_mapping = pmb_calc(__pa(&__uncached_start), &__uncached_end - &__uncached_start,
+		 P3SEG-pmb_sizes[0].size, &entry, PMB_WT | PMB_UB);
+	ram_mapping = pmb_calc(__MEMORY_START, __MEMORY_SIZE, P1SEG, 0, PMB_C);
+	apply_boot_mappings(uc_mapping, ram_mapping);
+}
+
+int pmb_virt_to_phys(void *addr, unsigned long *phys, unsigned long *flags)
+{
+	struct pmb_mapping *mapping;
+	unsigned long vaddr = (unsigned long __force)addr;
+
+	read_lock(&pmb_lock);
+
+	mapping = pmb_mapping_find(vaddr, NULL);
+	if (!mapping) {
+		read_unlock(&pmb_lock);
+		return EFAULT;
+	}
+
+	if (phys)
+		*phys = mapping->phys + (vaddr - mapping->virt);
+	if (flags)
+		*flags = mapping->flags;
+
+	read_unlock(&pmb_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(pmb_virt_to_phys);
+
+bool __in_29bit_mode(void)
+{
+#ifdef CONFIG_CPU_SUBTYPE_STX7100
+	/* ST40-200 used a different mechanism to control SE mode */
+	return (__raw_readl(MMUCR) & MMUCR_SE) == 0;
+#else
+	return (__raw_readl(PMB_PASCR) & PASCR_SE) == 0;
+#endif
+}
+
+static int pmb_seq_show(struct seq_file *file, void *iter)
+{
+	int i;
+
+	seq_printf(file, "V: Valid, C: Cacheable, WT: Write-Through\n"
+			 "CB: Copy-Back, B: Buffered, UB: Unbuffered\n");
+	seq_printf(file, "ety   vpn  ppn  size   flags\n");
+
+	for (i = 0; i < NR_PMB_ENTRIES; i++) {
+		unsigned long addr, data;
+		unsigned int size;
+		char *sz_str = NULL;
+
+		addr = ctrl_inl(mk_pmb_addr(i));
+		data = ctrl_inl(mk_pmb_data(i));
+
+		size = data & PMB_SZ_MASK;
+		sz_str = (size == PMB_SZ_16M)  ? " 16MB":
+			 (size == PMB_SZ_64M)  ? " 64MB":
+			 (size == PMB_SZ_128M) ? "128MB":
+					         "512MB";
+
+		/* 02: V 0x88 0x08 128MB C CB  B */
+		seq_printf(file, "%02d: %c 0x%02lx 0x%02lx %s %c %s %s\n",
+			   i, ((addr & PMB_V) && (data & PMB_V)) ? 'V' : ' ',
+			   (addr >> 24) & 0xff, (data >> 24) & 0xff,
+			   sz_str, (data & PMB_C) ? 'C' : ' ',
+			   (data & PMB_WT) ? "WT" : "CB",
+			   (data & PMB_UB) ? "UB" : " B");
+	}
+
+	return 0;
+}
+
+static int pmb_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pmb_seq_show, NULL);
+}
+
+static const struct file_operations pmb_debugfs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pmb_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init pmb_debugfs_init(void)
+{
+	struct dentry *dentry;
+
+	dentry = debugfs_create_file("pmb", S_IFREG | S_IRUGO,
+				     sh_debugfs_root, NULL, &pmb_debugfs_fops);
+	if (!dentry)
+		return -ENOMEM;
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	return 0;
+}
+subsys_initcall(pmb_debugfs_init);
+
+#ifdef CONFIG_PM
+static __uses_jump_to_uncached
+int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state)
+{
+	static pm_message_t prev_state;
+	int idx;
+	switch (state.event) {
+	case PM_EVENT_ON:
+		/* Resumeing from hibernation */
+		if (prev_state.event == PM_EVENT_FREEZE) {
+			for (idx = 1; idx < NR_PMB_ENTRIES; ++idx)
+				if (pmbm[idx].usage && pmbm[idx].virt != 0xbf)
+					pmb_mapping_set(&pmbm[idx]);
+			flush_cache_all();
+		}
+	  break;
+	case PM_EVENT_SUSPEND:
+	  break;
+	case PM_EVENT_FREEZE:
+	  break;
+	}
+	prev_state = state;
+	return 0;
+}
+
+static int pmb_sysdev_resume(struct sys_device *dev)
+{
+	return pmb_sysdev_suspend(dev, PMSG_ON);
+}
+
+static struct sysdev_driver pmb_sysdev_driver = {
+	.suspend = pmb_sysdev_suspend,
+	.resume = pmb_sysdev_resume,
+};
+
+static int __init pmb_sysdev_init(void)
+{
+	return sysdev_driver_register(&cpu_sysdev_class, &pmb_sysdev_driver);
+}
+
+subsys_initcall(pmb_sysdev_init);
+
+#ifdef CONFIG_HIBERNATION_ON_MEMORY
+
+void __uses_jump_to_uncached stm_hom_pmb_init(void)
+{
+	apply_boot_mappings(uc_mapping, ram_mapping);
+
+	/* Now I can call the pmb_sysdev_resume */
+	pmb_sysdev_suspend(NULL, PMSG_ON);
+}
+#endif
+#endif
--- a/kernel/arch/sh/mm/stm-l2-cache.c
+++ b/kernel/arch/sh/mm/stm-l2-cache.c
@@ -0,0 +1,795 @@
+/*
+ * arch/sh/mm/stm-l2-cache.c
+ *
+ * Copyright (C) 2008 STMicroelectronics
+ *
+ * Authors: Richard P. Curnow
+ *          Pawel Moll <pawel.moll@st.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/sysfs.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/cache.h>
+#include <linux/io.h>
+#include <linux/pm.h>
+#include <linux/uaccess.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <asm/stm-l2-cache.h>
+
+
+
+#define L2VCR  0x00
+#define L2CFG  0x04
+#define L2CCR  0x08
+#define L2SYNC 0x0c
+#define L2IA   0x10
+#define L2FA   0x14
+#define L2PA   0x18
+#define L2FE   0x24
+#define L2IS   0x30
+
+#define L2PMC    0x70
+#define L2ECO    0x74
+#define L2CCO    0x78
+#define L2ECA(n) (0x100 + (n * 4))
+#define L2CCA(n) (0x180 + (n * 8))
+
+
+
+enum stm_l2_mode {
+	MODE_BYPASS,
+	MODE_WRITE_THROUGH,
+	MODE_COPY_BACK,
+	MODE_LAST
+};
+
+
+
+static void *stm_l2_base;
+static int stm_l2_block_size;
+static int stm_l2_n_sets;
+static int stm_l2_n_ways;
+static enum stm_l2_mode stm_l2_current_mode = MODE_BYPASS;
+static DEFINE_SPINLOCK(stm_l2_current_mode_lock);
+
+
+
+/* Performance informations */
+
+#if defined(CONFIG_DEBUG_FS)
+
+static struct stm_l2_perf_counter {
+	enum { EVENT, CYCLE } type;
+	int index;
+	const char *name;
+	const char *description;
+} stm_l2_perf_counters[] = {
+	{ EVENT,  0, "L32H", "32-byte Load Hit" },
+	{ EVENT,  1, "L32M", "32-byte Load Miss" },
+	{ EVENT,  2, "S32H", "32-byte Store Hit" },
+	{ EVENT,  3, "S32M", "32-byte Store Miss" },
+	{ EVENT,  4, "OLH",  "Other Load Hit" },
+	{ EVENT,  5, "OLM",  "Other Load Miss" },
+	{ EVENT,  6, "OSH",  "Other Store Hit" },
+	{ EVENT,  7, "OSM",  "Other Store Miss" },
+	{ EVENT,  8, "PFH",  "Prefetch Hit" },
+	{ EVENT,  9, "PFM",  "Prefetch Miss" },
+	{ EVENT, 10, "CCA",  "Cache Control by Address" },
+	{ EVENT, 11, "CCE",  "Cache Control By Entry" },
+	{ EVENT, 12, "CCS",  "Cache Control By Set" },
+	{ EVENT, 13, "CBL",  "Copy-Back Line" },
+	{ EVENT, 14, "HPM",  "Hit on Pending Miss" },
+	{ CYCLE,  0, "TBC",  "Total Bus Cycles" },
+	{ CYCLE,  1, "L32L", "32-byte Load Latency" },
+	{ CYCLE,  2, "S32L", "32-byte Store Latency" },
+	{ CYCLE,  3, "OLL",  "Other Load Latency" },
+	{ CYCLE,  4, "OSL",  "Other Store Latency" },
+	{ CYCLE,  5, "HPML", "Hit on Pending Miss Latency" },
+};
+
+static int stm_l2_perf_seq_printf_counter(struct seq_file *s,
+		struct stm_l2_perf_counter *counter)
+{
+	void *address;
+	long long unsigned int val64;
+
+	switch (counter->type) {
+	case EVENT:
+		address = stm_l2_base + L2ECA(counter->index);
+		return seq_printf(s, "%u", readl(address));
+	case CYCLE:
+		address = stm_l2_base + L2CCA(counter->index);
+		val64 = readl(address + 4) & 0xffff;
+		val64 = (val64 << 32) | readl(address);
+		return seq_printf(s, "%llu", val64);
+	}
+	BUG();
+	return -EFAULT;
+}
+
+static int stm_l2_perf_get_overflow(struct stm_l2_perf_counter *counter)
+{
+	void *address;
+
+	switch (counter->type) {
+	case EVENT:
+		address = stm_l2_base + L2ECO;
+		break;
+	case CYCLE:
+		address = stm_l2_base + L2CCO;
+		break;
+	default:
+		BUG();
+		return -EFAULT;
+	}
+
+	return !!(readl(address) & (1 << counter->index));
+}
+
+
+
+static ssize_t stm_l2_perf_enabled_read(struct file *file,
+		char __user *buf, size_t count, loff_t *ppos)
+{
+	char status[] = " \n";
+
+	if (readl(stm_l2_base + L2PMC) & 1)
+		status[0] = 'Y';
+	else
+		status[0] = 'N';
+
+	return simple_read_from_buffer(buf, count, ppos, status, 2);
+
+}
+
+static ssize_t stm_l2_perf_enabled_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	char value[] = " \n";
+
+	if (copy_from_user(value, buf, min(sizeof(value), count)) != 0)
+		return -EFAULT;
+
+	if (count == 1 || (count == 2 && value[1] == '\n')) {
+		switch (buf[0]) {
+		case 'y':
+		case 'Y':
+		case '1':
+			writel(1, stm_l2_base + L2PMC);
+			break;
+		case 'n':
+		case 'N':
+		case '0':
+			writel(0, stm_l2_base + L2PMC);
+			break;
+		}
+	}
+
+	return count;
+}
+
+static const struct file_operations stm_l2_perf_enabled_fops = {
+	.read = stm_l2_perf_enabled_read,
+	.write = stm_l2_perf_enabled_write,
+};
+
+
+
+static ssize_t stm_l2_perf_clear_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	if (count) {
+		unsigned int l2pmc;
+
+		l2pmc = readl(stm_l2_base + L2PMC);
+		l2pmc &= 1; /* only preserve enable/disable bit. */
+		l2pmc |= (1<<1);
+		writel(l2pmc, stm_l2_base + L2PMC);
+	}
+	return count;
+}
+
+static const struct file_operations stm_l2_perf_clear_fops = {
+	.write = stm_l2_perf_clear_write,
+};
+
+
+
+enum stm_l2_perf_all_mode { VALUES, OVERFLOWS, VERBOSE };
+
+static int stm_l2_perf_all_show(struct seq_file *s, void *v)
+{
+	enum stm_l2_perf_all_mode mode = (enum stm_l2_perf_all_mode)s->private;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(stm_l2_perf_counters); i++) {
+		struct stm_l2_perf_counter *counter = &stm_l2_perf_counters[i];
+
+		switch (mode) {
+		case VALUES:
+			seq_printf(s, i ? " " : "");
+			stm_l2_perf_seq_printf_counter(s, counter);
+			break;
+		case OVERFLOWS:
+			seq_printf(s, "%s%d", i ? " " : "",
+					stm_l2_perf_get_overflow(counter));
+			break;
+		case VERBOSE:
+			seq_printf(s, i ? "\n" : "");
+			seq_printf(s, "%s:\t", counter->name);
+			stm_l2_perf_seq_printf_counter(s, counter);
+			seq_printf(s, "%s\t(%s)",
+					stm_l2_perf_get_overflow(counter) ?
+					" <ovr>" : "", counter->description);
+			break;
+		default:
+			BUG();
+			return -EFAULT;
+		};
+	}
+	seq_printf(s, "\n");
+
+	return 0;
+}
+
+static int stm_l2_perf_all_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, stm_l2_perf_all_show, inode->i_private);
+}
+
+static const struct file_operations stm_l2_perf_all_fops = {
+	.open = stm_l2_perf_all_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+
+static int stm_l2_perf_counter_show(struct seq_file *s, void *v)
+{
+	struct stm_l2_perf_counter *counter = s->private;
+
+	stm_l2_perf_seq_printf_counter(s, counter);
+	seq_printf(s, "\n");
+
+	return 0;
+}
+
+static int stm_l2_perf_counter_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, stm_l2_perf_counter_show, inode->i_private);
+}
+
+static const struct file_operations stm_l2_perf_counter_fops = {
+	.open = stm_l2_perf_counter_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+
+static int __init stm_l2_perf_counters_init(void)
+{
+	struct dentry *dir;
+	int i;
+
+	if (!stm_l2_base)
+		return 0;
+
+	dir = debugfs_create_dir("stm-l2-cache", NULL);
+	if (!dir || IS_ERR(dir))
+		return -ENOMEM;
+
+	debugfs_create_file("enabled", S_IFREG | S_IRUGO | S_IWUSR,
+			dir, NULL, &stm_l2_perf_enabled_fops);
+	debugfs_create_file("clear", S_IFREG | S_IWUSR,
+			dir, NULL, &stm_l2_perf_clear_fops);
+
+	debugfs_create_file("all", S_IFREG | S_IRUGO,
+			dir, (void *)VALUES, &stm_l2_perf_all_fops);
+	debugfs_create_file("overflow", S_IFREG | S_IRUGO,
+			dir, (void *)OVERFLOWS, &stm_l2_perf_all_fops);
+	debugfs_create_file("verbose", S_IFREG | S_IRUGO,
+			dir, (void *)VERBOSE, &stm_l2_perf_all_fops);
+
+	for (i = 0; i < ARRAY_SIZE(stm_l2_perf_counters); i++) {
+		struct stm_l2_perf_counter *counter = &stm_l2_perf_counters[i];
+
+		debugfs_create_file(counter->name, S_IFREG | S_IRUGO,
+				dir, counter, &stm_l2_perf_counter_fops);
+	}
+
+	return 0;
+}
+device_initcall(stm_l2_perf_counters_init);
+
+#endif /* defined(CONFIG_DEBUG_FS) */
+
+
+
+/* Wait for the cache to finalize all pending operations */
+
+static void stm_l2_sync(void)
+{
+	writel(1, stm_l2_base + L2SYNC);
+	while (readl(stm_l2_base + L2SYNC) & 1)
+		cpu_relax();
+}
+
+
+
+/* Flushing interface */
+
+static void stm_l2_flush_common(unsigned long start, int size, int is_phys,
+		unsigned int l2reg)
+{
+	/* Any code trying to flush P4 address is definitely wrong... */
+	BUG_ON(!is_phys && start >= P4SEG);
+
+	/* Ensure L1 writeback is done before starting writeback on L2 */
+	asm volatile("synco"
+			: /* no output */
+			: /* no input */
+			: "memory");
+
+	if (likely(is_phys || (start >= P1SEG && start < P3SEG))) {
+		unsigned long phys_addr;
+		unsigned long phys_end;
+
+		if (is_phys) {
+			/* Physical address given. Cool. */
+			phys_addr = start;
+		} else {
+			/* We can assume that the memory pointed by a P1/P2
+			 * virtual address is physically contiguous, as it is
+			 * supposed to be kernel logical memory (not an
+			 * ioremapped area) */
+			BUG_ON(!virt_addr_valid(start));
+			phys_addr = virt_to_phys(start);
+		}
+		phys_end = phys_addr + size;
+
+		/* Round down to start of block (cache line). */
+		phys_addr &= ~(stm_l2_block_size - 1);
+
+		/* Do the honours! */
+		while (phys_addr < phys_end) {
+			writel(phys_addr, stm_l2_base + l2reg);
+			phys_addr += stm_l2_block_size;
+		}
+	} else if ((start >= P3SEG && start < P4SEG) || (start < P1SEG)) {
+		/* Round down to start of block (cache line). */
+		unsigned long virt_addr = start & ~(stm_l2_block_size - 1);
+		unsigned long virt_end = start + size;
+
+		while (virt_addr < virt_end) {
+			unsigned long phys_addr;
+			unsigned long phys_end;
+			pgd_t *pgd;
+			pud_t *pud;
+			pmd_t *pmd;
+			pte_t *pte;
+
+			/* When dealing with P1 or P3 memory, we have to go
+			 * through the page directory... */
+			if (start < P1SEG)
+				pgd = pgd_offset(current->mm, virt_addr);
+			else
+				pgd = pgd_offset_k(virt_addr);
+			BUG_ON(pgd_none(*pgd));
+			pud = pud_offset(pgd, virt_addr);
+			BUG_ON(pud_none(*pud));
+			pmd = pmd_offset(pud, virt_addr);
+			BUG_ON(pmd_none(*pmd));
+			pte = pte_offset_kernel(pmd, virt_addr);
+			BUG_ON(pte_not_present(*pte));
+
+			/* Get the physical address */
+			phys_addr = pte_val(*pte) & PTE_PHYS_MASK; /* Page */
+			phys_addr += virt_addr & PAGE_MASK; /* Offset */
+
+			/* Beginning of the next page */
+			phys_end = PAGE_ALIGN(phys_addr + 1);
+
+			while (phys_addr < phys_end && virt_addr < virt_end) {
+				writel(phys_addr, stm_l2_base + l2reg);
+				phys_addr += stm_l2_block_size;
+				virt_addr += stm_l2_block_size;
+			}
+		}
+	}
+}
+
+void stm_l2_flush_wback(unsigned long start, int size, int is_phys)
+{
+	if (!stm_l2_base)
+		return;
+
+	switch (stm_l2_current_mode) {
+	case MODE_COPY_BACK:
+		stm_l2_flush_common(start, size, is_phys, L2FA);
+		/* Fall through */
+	case MODE_WRITE_THROUGH:
+		/* Since this is for the purposes of DMA, we have to
+		 * guarantee that the data has all got out to memory
+		 * before returning. */
+		stm_l2_sync();
+		break;
+	case MODE_BYPASS:
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+}
+EXPORT_SYMBOL(stm_l2_flush_wback);
+
+void stm_l2_flush_purge(unsigned long start, int size, int is_phys)
+{
+	if (!stm_l2_base)
+		return;
+
+	switch (stm_l2_current_mode) {
+	case MODE_COPY_BACK:
+		stm_l2_flush_common(start, size, is_phys, L2PA);
+		/* Fall through */
+	case MODE_WRITE_THROUGH:
+		/* Since this is for the purposes of DMA, we have to
+		 * guarantee that the data has all got out to memory
+		 * before returning. */
+		stm_l2_sync();
+		break;
+	case MODE_BYPASS:
+		break;
+	default:
+		BUG();
+		break;
+	}
+}
+EXPORT_SYMBOL(stm_l2_flush_purge);
+
+void stm_l2_flush_invalidate(unsigned long start, int size, int is_phys)
+{
+	if (!stm_l2_base)
+		return;
+
+	/* The L2 sync here is just belt-n-braces.  It's not required in the
+	 * same way as for wback and purge, because the subsequent DMA is
+	 * _from_ a device so isn't reliant on it to see the correct data.
+	 * When the CPU gets to read the DMA'd-in data later, because the L2
+	 * keeps the ops in-order, there is no hazard in terms of the L1 miss
+	 * being serviced from the stale line in the L2.
+	 *
+	 * The reason I'm doing this is in case somehow a line in the L2 that's
+	 * about to get invalidated gets evicted just before it in the L2 op
+	 * queue and the DMA onto the same memory line has already begun.  This
+	 * may actually be a non-issue (may be impossible in view of L2
+	 * implementation), or is going to be at least very rare. */
+	switch (stm_l2_current_mode) {
+	case MODE_COPY_BACK:
+	case MODE_WRITE_THROUGH:
+		stm_l2_flush_common(start, size, is_phys, L2IA);
+		stm_l2_sync();
+		break;
+	case MODE_BYPASS:
+		break;
+	default:
+		BUG();
+		break;
+	}
+}
+EXPORT_SYMBOL(stm_l2_flush_invalidate);
+
+
+
+/* Mode control */
+static void stm_l2_invalidate(void)
+{
+	unsigned int i;
+	unsigned long top = stm_l2_block_size * stm_l2_n_sets;
+
+	for (i = 0; i < top; i += stm_l2_block_size)
+		writel(i, stm_l2_base + L2IS);
+	wmb();
+	stm_l2_sync();
+}
+
+static void stm_l2_mode_write_through_to_bypass(void)
+{
+	/* As the cache is known to be clean, we can just shut it off then
+	 * invalidate it afterwards.
+	 *
+	 * There is a potential risk if we have pre-empt on and we're fiddling
+	 * with the cache mode from another process at the same time.   Gloss
+	 * over that for now. */
+
+	unsigned int l2ccr;
+	unsigned int top, step;
+
+	stm_l2_sync();
+	l2ccr = readl(stm_l2_base + L2CCR);
+	l2ccr &= ~3; /* discard CE and CBE bits */
+	writel(l2ccr, stm_l2_base + L2CCR);
+	stm_l2_sync();
+
+	/* Invalidate the L2 */
+	step = stm_l2_block_size;
+	top = stm_l2_block_size * stm_l2_n_sets;
+
+	stm_l2_invalidate();
+}
+
+static void stm_l2_mode_bypass_to_write_through(void)
+{
+	unsigned int l2ccr;
+
+	stm_l2_sync();
+	l2ccr = readl(stm_l2_base + L2CCR);
+	l2ccr &= ~(1 << 1); /* discard CBE bit */
+	l2ccr |= 1; /* CE */
+	writel(l2ccr, stm_l2_base + L2CCR);
+	wmb();
+	stm_l2_sync();
+}
+
+/* stm-l2-helper.S */
+void stm_l2_copy_back_to_write_through_helper(unsigned long top,
+	void *l2ccr, void *l2fe, void *l2sync);
+
+static void stm_l2_mode_copy_back_to_write_through(void)
+{
+	/* Have to purge with interrupts off. */
+	unsigned int top;
+
+	top = stm_l2_block_size * stm_l2_n_sets * stm_l2_n_ways;
+	stm_l2_copy_back_to_write_through_helper(top, stm_l2_base + L2CCR,
+			stm_l2_base + L2FE, stm_l2_base + L2SYNC);
+}
+
+static void stm_l2_mode_write_through_to_copy_back(void)
+{
+	unsigned int l2ccr;
+
+	l2ccr = readl(stm_l2_base + L2CCR);
+	l2ccr |= (1 << 1); /* CBE bit */
+	writel(l2ccr, stm_l2_base + L2CCR);
+	wmb();
+}
+
+static void stm_l2_set_mode(enum stm_l2_mode new_mode)
+{
+	spin_lock(&stm_l2_current_mode_lock);
+
+	while (new_mode < stm_l2_current_mode) {
+		switch (stm_l2_current_mode) {
+		case MODE_WRITE_THROUGH:
+			stm_l2_mode_write_through_to_bypass();
+			break;
+		case MODE_COPY_BACK:
+			stm_l2_mode_copy_back_to_write_through();
+			break;
+		default:
+			BUG();
+			break;
+		}
+		stm_l2_current_mode--;
+	}
+
+	while (new_mode > stm_l2_current_mode) {
+		switch (stm_l2_current_mode) {
+		case MODE_BYPASS:
+			stm_l2_mode_bypass_to_write_through();
+			break;
+		case MODE_WRITE_THROUGH:
+			stm_l2_mode_write_through_to_copy_back();
+			break;
+		default:
+			BUG();
+			break;
+		}
+		stm_l2_current_mode++;
+	}
+
+	spin_unlock(&stm_l2_current_mode_lock);
+}
+
+
+
+/* sysfs interface */
+
+static const char *l2_mode_name[] = {
+	[MODE_BYPASS] = "bypass",
+	[MODE_WRITE_THROUGH] = "write_through",
+	[MODE_COPY_BACK] = "copy_back",
+};
+
+static ssize_t stm_l2_mode_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	ssize_t len = 0;
+	enum stm_l2_mode current_mode, mode;
+
+	spin_lock(&stm_l2_current_mode_lock);
+	current_mode = stm_l2_current_mode;
+	spin_unlock(&stm_l2_current_mode_lock);
+
+	for (mode = MODE_BYPASS; mode < MODE_LAST; mode++)
+		len += sprintf(buf + len,
+				mode == current_mode ? "[%s] " : "%s ",
+				l2_mode_name[mode]);
+	len += sprintf(buf + len, "\n");
+
+	return len;
+}
+
+static ssize_t stm_l2_mode_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	enum stm_l2_mode mode;
+
+	for (mode = MODE_BYPASS; mode < MODE_LAST; mode++) {
+		if (sysfs_streq(buf, l2_mode_name[mode])) {
+			stm_l2_set_mode(mode);
+			return count;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static struct device_attribute stm_l2_mode_attr =
+	__ATTR(mode, S_IRUGO | S_IWUSR, stm_l2_mode_show, stm_l2_mode_store);
+
+static struct attribute_group stm_l2_attr_group = {
+	.name = "l2",
+	.attrs = (struct attribute * []) {
+		&stm_l2_mode_attr.attr,
+		NULL
+	},
+};
+
+static int __init stm_l2_sysfs_init(void)
+{
+	if (!stm_l2_base)
+		return 0;
+
+	return sysfs_create_group(mm_kobj, &stm_l2_attr_group);
+}
+late_initcall(stm_l2_sysfs_init);
+
+
+
+/* Driver initialization */
+
+static int __init stm_l2_probe(struct platform_device *pdev)
+{
+	struct resource *mem;
+	unsigned long addr, size;
+	void *base;
+	unsigned int vcr;
+	unsigned int cfg;
+	unsigned int top, step;
+	int blksz, setsz, nsets;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(&pdev->dev, "No memory resource!\n");
+		return -EINVAL;
+	}
+	addr = mem->start;
+	size = mem->end - mem->start + 1;
+	mem = request_mem_region(addr, size, pdev->name);
+	if (!mem) {
+		dev_err(&pdev->dev, "Control registers already in use!");
+		return -EBUSY;
+	}
+	base = ioremap(addr, size);
+	if (!base) {
+		dev_err(&pdev->dev, "Can't remap control registers!\n");
+		release_mem_region(addr, size);
+		return -EFAULT;
+	}
+
+	vcr = readl(base + L2VCR);
+	cfg = readl(base + L2CFG);
+
+	blksz = (cfg & 0xf);
+	setsz = ((cfg >> 4) & 0xf);
+	nsets = ((cfg >> 8) & 0xf);
+
+	stm_l2_block_size = 1 << blksz;
+	stm_l2_n_sets = 1 << nsets;
+	stm_l2_n_ways = 1 << setsz;
+
+	/* This is a reasonable test that the L2 is present.  We are never
+	 * likely to do a L2 with a different line size. */
+	if (stm_l2_block_size != 32) {
+		dev_err(&pdev->dev, "Wrong line size detected, "
+				"assuming no L2-Cache!\n");
+		iounmap(base);
+		release_mem_region(addr, size);
+		return -ENODEV;
+	}
+	stm_l2_base = base;
+
+	/* Invalidate the L2 */
+	step = stm_l2_block_size;
+	top = step << nsets;
+
+	stm_l2_invalidate();
+
+#if defined(CONFIG_STM_L2_CACHE_WRITETHROUGH)
+	stm_l2_set_mode(MODE_WRITE_THROUGH);
+#elif defined(CONFIG_STM_L2_CACHE_WRITEBACK)
+	stm_l2_set_mode(MODE_COPY_BACK);
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_HIBERNATION
+static enum stm_l2_mode stm_l2_saved_mode;
+static int stm_l2_freeze_noirq(struct device *dev)
+{
+	/*
+	 * Disable the L2-cache
+	 */
+	stm_l2_saved_mode = stm_l2_current_mode;
+
+	stm_l2_sync();
+	stm_l2_set_mode(MODE_BYPASS);
+	return 0;
+}
+
+static int stm_l2_restore_noirq(struct device *dev)
+{
+	stm_l2_invalidate();
+
+	stm_l2_set_mode(stm_l2_saved_mode);
+	return 0;
+}
+
+static struct dev_pm_ops stm_l2_pm = {
+	.freeze_noirq = stm_l2_freeze_noirq,
+	.restore_noirq = stm_l2_restore_noirq,
+};
+
+#else
+
+static struct dev_pm_ops stm_l2_pm;
+
+#endif
+
+static struct platform_driver stm_l2_driver = {
+	.driver.name = "stm-l2-cache",
+	.driver.pm = &stm_l2_pm,
+	.probe = stm_l2_probe,
+};
+
+static int __init stm_l2_init(void)
+{
+	return platform_driver_register(&stm_l2_driver);
+}
+postcore_initcall(stm_l2_init);
--- a/kernel/arch/sh/mm/stm-l2-helper.S
+++ b/kernel/arch/sh/mm/stm-l2-helper.S
@@ -0,0 +1,116 @@
+/*
+ * arch/sh/mm/stm-l2-helper.S
+ *
+ * Copyright (C) 2008 STMicroelectronics
+ * Written by Richard P. Curnow
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+	.text
+	.little
+
+	.macro L2SYNC l2syncreg, scratch
+	mov	#1, \scratch
+	mov.l	\scratch, @\l2syncreg
+1:
+	mov.l	@\l2syncreg, \scratch
+	shlr	\scratch
+	bt	1b
+	.endm
+
+	/* The CHAIN_* macros are used to build a chain of branches,
+	 * interleaved (carefully) into the mainline code,
+	 * which can be used to prefetch all the lines into the
+	 * instruction cache before we start the real work.
+	 */
+	.macro CHAIN_HEAD
+	sett
+	bt	999f
+	777:
+	.endm
+
+	/* There MUST be one of these per cache line's worth of code.
+	 * Otherwise the prefetch will be incomplete. */
+	.macro CHAIN_MID
+	bra	888f
+	nop
+	.balign 32
+	999:
+	bt	999f
+	888:
+	.endm
+
+	.macro CHAIN_TAIL
+	.balign 32
+	999:
+	bra	777b
+	nop
+	.endm
+
+	.balign 32
+	.global stm_l2_copy_back_to_write_through_helper
+stm_l2_copy_back_to_write_through_helper:
+
+	/* args
+	   r4 = top of range
+	   r5 = L2CCR
+	   r6 = L2FE
+	   r7 = L2SYNC
+	   */
+
+	mov	#0, r2
+	sts.l	pr, @-r15
+	mov	#0x10, r3
+	shll16	r3
+	shll8	r3	! r3 = 1<<28
+
+	! irq off
+	stc	sr, r0
+	or	r0, r3
+	ldc	r3, sr	! block on
+
+	CHAIN_HEAD
+	synco
+	bsr	do_l2_sync
+	  nop
+
+	CHAIN_MID
+1:
+	mov.l	r2, @r6
+	add	#32, r2	! assumes L2 line size is 32; will not change
+	cmp/hs	r4, r2
+	bf	1b
+
+	bsr	do_l2_sync
+	  nop
+
+	! cache now flushed
+	mov.l	@r5, r1
+	mov	#-3, r2	! 0xfffffffd aka ~2 aka ~(1<<1)
+	and	r2, r1
+
+	bsr do_l2_sync
+	  mov.l	r1, @r5	! clear L2CCR.CBE
+
+	ldc	r0, sr	! restore SR : block off
+
+	CHAIN_MID
+
+	lds.l	@r15+, pr
+	rts
+	  nop
+
+	CHAIN_MID
+do_l2_sync:
+	L2SYNC	r7, r1
+	rts
+	  nop
+
+	CHAIN_TAIL
+
+
+
+
--- a/kernel/arch/sh/mm/tlb-pteaex.c
+++ b/kernel/arch/sh/mm/tlb-pteaex.c
@@ -0,0 +1,78 @@
+/*
+ * arch/sh/mm/tlb-pteaex.c
+ *
+ * TLB operations for SH-X3 CPUs featuring PTE ASID Extensions.
+ *
+ * Copyright (C) 2009 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/system.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags, pteval, vpn;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (vma && current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+
+	/* Set PTEH register */
+	vpn = address & MMU_VPN_MASK;
+	__raw_writel(vpn, MMU_PTEH);
+
+	/* Set PTEAEX */
+	__raw_writel(get_asid(), MMU_PTEAEX);
+
+	pteval = pte.pte_low;
+
+	/* Set PTEA register */
+#ifdef CONFIG_X2TLB
+	/*
+	 * For the extended mode TLB this is trivial, only the ESZ and
+	 * EPR bits need to be written out to PTEA, with the remainder of
+	 * the protection bits (with the exception of the compat-mode SZ
+	 * and PR bits, which are cleared) being written out in PTEL.
+	 */
+	__raw_writel(pte.pte_high, MMU_PTEA);
+#endif
+
+	/* Set PTEL register */
+	pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+#ifdef CONFIG_CACHE_WRITETHROUGH
+	pteval |= _PAGE_WT;
+#endif
+	/* conveniently, we want all the software flags to be 0 anyway */
+	__raw_writel(pteval, MMU_PTEL);
+
+	/* Load the TLB */
+	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
+	local_irq_restore(flags);
+}
+
+/*
+ * While SH-X2 extended TLB mode splits out the memory-mapped I/UTLB
+ * data arrays, SH-X3 cores with PTEAEX split out the memory-mapped
+ * address arrays. In compat mode the second array is inaccessible, while
+ * in extended mode, the legacy 8-bit ASID field in address array 1 has
+ * undefined behaviour.
+ */
+void __uses_jump_to_uncached local_flush_tlb_one(unsigned long asid,
+						 unsigned long page)
+{
+	jump_to_uncached();
+	__raw_writel(page, MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT);
+	__raw_writel(asid, MMU_UTLB_ADDRESS_ARRAY2 | MMU_PAGE_ASSOC_BIT);
+	back_to_cached();
+}
--- a/kernel/arch/sh/mm/tlb-sh3.c
+++ b/kernel/arch/sh/mm/tlb-sh3.c
@@ -0,0 +1,79 @@
+/*
+ * arch/sh/mm/tlb-sh3.c
+ *
+ * SH-3 specific TLB operations
+ *
+ * Copyright (C) 1999  Niibe Yutaka
+ * Copyright (C) 2002  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags, pteval, vpn;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (vma && current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+
+	/* Set PTEH register */
+	vpn = (address & MMU_VPN_MASK) | get_asid();
+	ctrl_outl(vpn, MMU_PTEH);
+
+	pteval = pte_val(pte);
+
+	/* Set PTEL register */
+	pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+	/* conveniently, we want all the software flags to be 0 anyway */
+	ctrl_outl(pteval, MMU_PTEL);
+
+	/* Load the TLB */
+	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_one(unsigned long asid, unsigned long page)
+{
+	unsigned long addr, data;
+	int i, ways = MMU_NTLB_WAYS;
+
+	/*
+	 * NOTE: PTEH.ASID should be set to this MM
+	 *       _AND_ we need to write ASID to the array.
+	 *
+	 * It would be simple if we didn't need to set PTEH.ASID...
+	 */
+	addr = MMU_TLB_ADDRESS_ARRAY | (page & 0x1F000);
+	data = (page & 0xfffe0000) | asid; /* VALID bit is off */
+
+	if ((current_cpu_data.flags & CPU_HAS_MMU_PAGE_ASSOC)) {
+		addr |= MMU_PAGE_ASSOC_BIT;
+		ways = 1;	/* we already know the way .. */
+	}
+
+	for (i = 0; i < ways; i++)
+		ctrl_outl(data, addr + (i << 8));
+}
--- a/kernel/arch/sh/mm/tlb-sh4.c
+++ b/kernel/arch/sh/mm/tlb-sh4.c
@@ -0,0 +1,83 @@
+/*
+ * arch/sh/mm/tlb-sh4.c
+ *
+ * SH-4 specific TLB operations
+ *
+ * Copyright (C) 1999  Niibe Yutaka
+ * Copyright (C) 2002 - 2007 Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/system.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags, pteval, vpn;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (vma && current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+
+	/* Set PTEH register */
+	vpn = (address & MMU_VPN_MASK) | get_asid();
+	ctrl_outl(vpn, MMU_PTEH);
+
+	pteval = pte.pte_low;
+
+	/* Set PTEA register */
+#ifdef CONFIG_X2TLB
+	/*
+	 * For the extended mode TLB this is trivial, only the ESZ and
+	 * EPR bits need to be written out to PTEA, with the remainder of
+	 * the protection bits (with the exception of the compat-mode SZ
+	 * and PR bits, which are cleared) being written out in PTEL.
+	 */
+	ctrl_outl(pte.pte_high, MMU_PTEA);
+#else
+	if (cpu_data->flags & CPU_HAS_PTEA) {
+		/* The last 3 bits and the first one of pteval contains
+		 * the PTEA timing control and space attribute bits
+		 */
+		ctrl_outl(copy_ptea_attributes(pteval), MMU_PTEA);
+	}
+#endif
+
+	/* Set PTEL register */
+	pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+#ifdef CONFIG_CACHE_WRITETHROUGH
+	pteval |= _PAGE_WT;
+#endif
+	/* conveniently, we want all the software flags to be 0 anyway */
+	ctrl_outl(pteval, MMU_PTEL);
+
+	/* Load the TLB */
+	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
+	local_irq_restore(flags);
+}
+
+void __uses_jump_to_uncached local_flush_tlb_one(unsigned long asid,
+						 unsigned long page)
+{
+	unsigned long addr, data;
+
+	/*
+	 * NOTE: PTEH.ASID should be set to this MM
+	 *       _AND_ we need to write ASID to the array.
+	 *
+	 * It would be simple if we didn't need to set PTEH.ASID...
+	 */
+	addr = MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT;
+	data = page | asid; /* VALID bit is off */
+	jump_to_uncached();
+	ctrl_outl(data, addr);
+	back_to_cached();
+}
--- a/kernel/arch/sh/mm/tlb-sh5.c
+++ b/kernel/arch/sh/mm/tlb-sh5.c
@@ -0,0 +1,145 @@
+/*
+ * arch/sh/mm/tlb-sh5.c
+ *
+ * Copyright (C) 2003  Paul Mundt <lethal@linux-sh.org>
+ * Copyright (C) 2003  Richard Curnow <richard.curnow@superh.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <asm/page.h>
+#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+
+/**
+ * sh64_tlb_init - Perform initial setup for the DTLB and ITLB.
+ */
+int __init sh64_tlb_init(void)
+{
+	/* Assign some sane DTLB defaults */
+	cpu_data->dtlb.entries	= 64;
+	cpu_data->dtlb.step	= 0x10;
+
+	cpu_data->dtlb.first	= DTLB_FIXED | cpu_data->dtlb.step;
+	cpu_data->dtlb.next	= cpu_data->dtlb.first;
+
+	cpu_data->dtlb.last	= DTLB_FIXED |
+				  ((cpu_data->dtlb.entries - 1) *
+				   cpu_data->dtlb.step);
+
+	/* And again for the ITLB */
+	cpu_data->itlb.entries	= 64;
+	cpu_data->itlb.step	= 0x10;
+
+	cpu_data->itlb.first	= ITLB_FIXED | cpu_data->itlb.step;
+	cpu_data->itlb.next	= cpu_data->itlb.first;
+	cpu_data->itlb.last	= ITLB_FIXED |
+				  ((cpu_data->itlb.entries - 1) *
+				   cpu_data->itlb.step);
+
+	return 0;
+}
+
+/**
+ * sh64_next_free_dtlb_entry - Find the next available DTLB entry
+ */
+unsigned long long sh64_next_free_dtlb_entry(void)
+{
+	return cpu_data->dtlb.next;
+}
+
+/**
+ * sh64_get_wired_dtlb_entry - Allocate a wired (locked-in) entry in the DTLB
+ */
+unsigned long long sh64_get_wired_dtlb_entry(void)
+{
+	unsigned long long entry = sh64_next_free_dtlb_entry();
+
+	cpu_data->dtlb.first += cpu_data->dtlb.step;
+	cpu_data->dtlb.next  += cpu_data->dtlb.step;
+
+	return entry;
+}
+
+/**
+ * sh64_put_wired_dtlb_entry - Free a wired (locked-in) entry in the DTLB.
+ *
+ * @entry:	Address of TLB slot.
+ *
+ * Works like a stack, last one to allocate must be first one to free.
+ */
+int sh64_put_wired_dtlb_entry(unsigned long long entry)
+{
+	__flush_tlb_slot(entry);
+
+	/*
+	 * We don't do any particularly useful tracking of wired entries,
+	 * so this approach works like a stack .. last one to be allocated
+	 * has to be the first one to be freed.
+	 *
+	 * We could potentially load wired entries into a list and work on
+	 * rebalancing the list periodically (which also entails moving the
+	 * contents of a TLB entry) .. though I have a feeling that this is
+	 * more trouble than it's worth.
+	 */
+
+	/*
+	 * Entry must be valid .. we don't want any ITLB addresses!
+	 */
+	if (entry <= DTLB_FIXED)
+		return -EINVAL;
+
+	/*
+	 * Next, check if we're within range to be freed. (ie, must be the
+	 * entry beneath the first 'free' entry!
+	 */
+	if (entry < (cpu_data->dtlb.first - cpu_data->dtlb.step))
+		return -EINVAL;
+
+	/* If we are, then bring this entry back into the list */
+	cpu_data->dtlb.first	-= cpu_data->dtlb.step;
+	cpu_data->dtlb.next	= entry;
+
+	return 0;
+}
+
+/**
+ * sh64_setup_tlb_slot - Load up a translation in a wired slot.
+ *
+ * @config_addr:	Address of TLB slot.
+ * @eaddr:		Virtual address.
+ * @asid:		Address Space Identifier.
+ * @paddr:		Physical address.
+ *
+ * Load up a virtual<->physical translation for @eaddr<->@paddr in the
+ * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry).
+ */
+void sh64_setup_tlb_slot(unsigned long long config_addr, unsigned long eaddr,
+			 unsigned long asid, unsigned long paddr)
+{
+	unsigned long long pteh, ptel;
+
+	pteh = neff_sign_extend(eaddr);
+	pteh &= PAGE_MASK;
+	pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
+	ptel = neff_sign_extend(paddr);
+	ptel &= PAGE_MASK;
+	ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE);
+
+	asm volatile("putcfg %0, 1, %1\n\t"
+			"putcfg %0, 0, %2\n"
+			: : "r" (config_addr), "r" (ptel), "r" (pteh));
+}
+
+/**
+ * sh64_teardown_tlb_slot - Teardown a translation.
+ *
+ * @config_addr:	Address of TLB slot.
+ *
+ * Teardown any existing mapping in the TLB slot @config_addr.
+ */
+void sh64_teardown_tlb_slot(unsigned long long config_addr)
+	__attribute__ ((alias("__flush_tlb_slot")));
--- a/kernel/arch/sh/mm/tlbflush_32.c
+++ b/kernel/arch/sh/mm/tlbflush_32.c
@@ -0,0 +1,140 @@
+/*
+ * TLB flushing operations for SH with an MMU.
+ *
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2003  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	unsigned int cpu = smp_processor_id();
+
+	if (vma->vm_mm && cpu_context(cpu, vma->vm_mm) != NO_CONTEXT) {
+		unsigned long flags;
+		unsigned long asid;
+		unsigned long saved_asid = MMU_NO_ASID;
+
+		asid = cpu_asid(cpu, vma->vm_mm);
+		page &= PAGE_MASK;
+
+		local_irq_save(flags);
+		if (vma->vm_mm != current->mm) {
+			saved_asid = get_asid();
+			set_asid(asid);
+		}
+		local_flush_tlb_one(asid, page);
+		if (saved_asid != MMU_NO_ASID)
+			set_asid(saved_asid);
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) != NO_CONTEXT) {
+		unsigned long flags;
+		int size;
+
+		local_irq_save(flags);
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */
+			cpu_context(cpu, mm) = NO_CONTEXT;
+			if (mm == current->mm)
+				activate_context(mm, cpu);
+		} else {
+			unsigned long asid;
+			unsigned long saved_asid = MMU_NO_ASID;
+
+			asid = cpu_asid(cpu, mm);
+			start &= PAGE_MASK;
+			end += (PAGE_SIZE - 1);
+			end &= PAGE_MASK;
+			if (mm != current->mm) {
+				saved_asid = get_asid();
+				set_asid(asid);
+			}
+			while (start < end) {
+				local_flush_tlb_one(asid, start);
+				start += PAGE_SIZE;
+			}
+			if (saved_asid != MMU_NO_ASID)
+				set_asid(saved_asid);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned int cpu = smp_processor_id();
+	unsigned long flags;
+	int size;
+
+	local_irq_save(flags);
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */
+		local_flush_tlb_all();
+	} else {
+		unsigned long asid;
+		unsigned long saved_asid = get_asid();
+
+		asid = cpu_asid(cpu, &init_mm);
+		start &= PAGE_MASK;
+		end += (PAGE_SIZE - 1);
+		end &= PAGE_MASK;
+		set_asid(asid);
+		while (start < end) {
+			local_flush_tlb_one(asid, start);
+			start += PAGE_SIZE;
+		}
+		set_asid(saved_asid);
+	}
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned int cpu = smp_processor_id();
+
+	/* Invalidate all TLB of this process. */
+	/* Instead of invalidating each TLB, we get new MMU context. */
+	if (cpu_context(cpu, mm) != NO_CONTEXT) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		cpu_context(cpu, mm) = NO_CONTEXT;
+		if (mm == current->mm)
+			activate_context(mm, cpu);
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags, status;
+
+	/*
+	 * Flush all the TLB.
+	 *
+	 * Write to the MMU control register's bit:
+	 *	TF-bit for SH-3, TI-bit for SH-4.
+	 *      It's same position, bit #2.
+	 */
+	local_irq_save(flags);
+	status = ctrl_inl(MMUCR);
+	status |= 0x04;
+	ctrl_outl(status, MMUCR);
+	ctrl_barrier();
+	local_irq_restore(flags);
+}
--- a/kernel/arch/sh/mm/tlbflush_64.c
+++ b/kernel/arch/sh/mm/tlbflush_64.c
@@ -0,0 +1,472 @@
+/*
+ * arch/sh/mm/tlb-flush_64.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003  Richard Curnow (/proc/tlb, bug fixes)
+ * Copyright (C) 2003 - 2009 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/signal.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/perf_event.h>
+#include <linux/interrupt.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/tlb.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+#define PFLAG(val,flag)   (( (val) & (flag) ) ? #flag : "" )
+#define PPROT(flag) PFLAG(pgprot_val(prot),flag)
+
+static inline void print_prots(pgprot_t prot)
+{
+	printk("prot is 0x%08lx\n",pgprot_val(prot));
+
+	printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ),
+	       PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER));
+}
+
+static inline void print_vma(struct vm_area_struct *vma)
+{
+	printk("vma start 0x%08lx\n", vma->vm_start);
+	printk("vma end   0x%08lx\n", vma->vm_end);
+
+	print_prots(vma->vm_page_prot);
+	printk("vm_flags 0x%08lx\n", vma->vm_flags);
+}
+
+static inline void print_task(struct task_struct *tsk)
+{
+	printk("Task pid %d\n", task_pid_nr(tsk));
+}
+
+static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
+{
+	pgd_t *dir;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	dir = pgd_offset(mm, address);
+	if (pgd_none(*dir))
+		return NULL;
+
+	pud = pud_offset(dir, address);
+	if (pud_none(*pud))
+		return NULL;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd))
+		return NULL;
+
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+	if (pte_none(entry) || !pte_present(entry))
+		return NULL;
+
+	return pte;
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+			      unsigned long textaccess, unsigned long address)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	const struct exception_table_entry *fixup;
+	pte_t *pte;
+	int fault;
+
+	/* SIM
+	 * Note this is now called with interrupts still disabled
+	 * This is to cope with being called for a missing IO port
+	 * address with interrupts disabled. This should be fixed as
+	 * soon as we have a better 'fast path' miss handler.
+	 *
+	 * Plus take care how you try and debug this stuff.
+	 * For example, writing debug data to a port which you
+	 * have just faulted on is not going to work.
+	 */
+
+	tsk = current;
+	mm = tsk->mm;
+
+	/* Not an IO address, so reenable interrupts */
+	local_irq_enable();
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_atomic() || !mm)
+		goto no_context;
+
+	/* TLB misses upon some cache flushes get done under cli() */
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+
+	if (!vma) {
+#ifdef DEBUG_FAULT
+		print_task(tsk);
+		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
+		       __func__, __LINE__,
+		       address,regs->pc,textaccess,writeaccess);
+		show_regs(regs);
+#endif
+		goto bad_area;
+	}
+	if (vma->vm_start <= address) {
+		goto good_area;
+	}
+
+	if (!(vma->vm_flags & VM_GROWSDOWN)) {
+#ifdef DEBUG_FAULT
+		print_task(tsk);
+		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
+		       __func__, __LINE__,
+		       address,regs->pc,textaccess,writeaccess);
+		show_regs(regs);
+
+		print_vma(vma);
+#endif
+		goto bad_area;
+	}
+	if (expand_stack(vma, address)) {
+#ifdef DEBUG_FAULT
+		print_task(tsk);
+		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
+		       __func__, __LINE__,
+		       address,regs->pc,textaccess,writeaccess);
+		show_regs(regs);
+#endif
+		goto bad_area;
+	}
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	if (textaccess) {
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+	} else {
+		if (writeaccess) {
+			if (!(vma->vm_flags & VM_WRITE))
+				goto bad_area;
+		} else {
+			if (!(vma->vm_flags & VM_READ))
+				goto bad_area;
+		}
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+survive:
+	fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+
+	if (fault & VM_FAULT_MAJOR) {
+		tsk->maj_flt++;
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
+	} else {
+		tsk->min_flt++;
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
+	}
+
+	/* If we get here, the page fault has been handled.  Do the TLB refill
+	   now from the newly-setup PTE, to avoid having to fault again right
+	   away on the same instruction. */
+	pte = lookup_pte (mm, address);
+	if (!pte) {
+		/* From empirical evidence, we can get here, due to
+		   !pte_present(pte).  (e.g. if a swap-in occurs, and the page
+		   is swapped back out again before the process that wanted it
+		   gets rescheduled?) */
+		goto no_pte;
+	}
+
+	__do_tlb_refill(address, textaccess, pte);
+
+no_pte:
+
+	up_read(&mm->mmap_sem);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+#ifdef DEBUG_FAULT
+	printk("fault:bad area\n");
+#endif
+	up_read(&mm->mmap_sem);
+
+	if (user_mode(regs)) {
+		static int count=0;
+		siginfo_t info;
+		if (count < 4) {
+			/* This is really to help debug faults when starting
+			 * usermode, so only need a few */
+			count++;
+			printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
+				address, task_pid_nr(current), current->comm,
+				(unsigned long) regs->pc);
+#if 0
+			show_regs(regs);
+#endif
+		}
+		if (is_global_init(tsk)) {
+			panic("INIT had user mode bad_area\n");
+		}
+		tsk->thread.address = address;
+		tsk->thread.error_code = writeaccess;
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_addr = (void *) address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+no_context:
+#ifdef DEBUG_FAULT
+	printk("fault:No context\n");
+#endif
+	/* Are we prepared to handle this kernel fault?  */
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return;
+	}
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ *
+ */
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n", address);
+	printk(KERN_ALERT "pc = %08Lx%08Lx\n", regs->pc >> 32, regs->pc & 0xffffffff);
+	die("Oops", regs, writeaccess);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	if (is_global_init(current)) {
+		panic("INIT out of memory\n");
+		yield();
+		goto survive;
+	}
+	printk("fault:Out of memory\n");
+	up_read(&mm->mmap_sem);
+	if (is_global_init(current)) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", tsk->comm);
+	if (user_mode(regs))
+		do_group_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	printk("fault:Do sigbus\n");
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.address = address;
+	tsk->thread.error_code = writeaccess;
+	tsk->thread.trap_no = 14;
+	force_sig(SIGBUS, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+}
+
+void local_flush_tlb_one(unsigned long asid, unsigned long page)
+{
+	unsigned long long match, pteh=0, lpage;
+	unsigned long tlb;
+
+	/*
+	 * Sign-extend based on neff.
+	 */
+	lpage = neff_sign_extend(page);
+	match = (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
+	match |= lpage;
+
+	for_each_itlb_entry(tlb) {
+		asm volatile ("getcfg	%1, 0, %0"
+			      : "=r" (pteh)
+			      : "r" (tlb) );
+
+		if (pteh == match) {
+			__flush_tlb_slot(tlb);
+			break;
+		}
+	}
+
+	for_each_dtlb_entry(tlb) {
+		asm volatile ("getcfg	%1, 0, %0"
+			      : "=r" (pteh)
+			      : "r" (tlb) );
+
+		if (pteh == match) {
+			__flush_tlb_slot(tlb);
+			break;
+		}
+
+	}
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	unsigned long flags;
+
+	if (vma->vm_mm) {
+		page &= PAGE_MASK;
+		local_irq_save(flags);
+		local_flush_tlb_one(get_asid(), page);
+		local_irq_restore(flags);
+	}
+}
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end)
+{
+	unsigned long flags;
+	unsigned long long match, pteh=0, pteh_epn, pteh_low;
+	unsigned long tlb;
+	unsigned int cpu = smp_processor_id();
+	struct mm_struct *mm;
+
+	mm = vma->vm_mm;
+	if (cpu_context(cpu, mm) == NO_CONTEXT)
+		return;
+
+	local_irq_save(flags);
+
+	start &= PAGE_MASK;
+	end &= PAGE_MASK;
+
+	match = (cpu_asid(cpu, mm) << PTEH_ASID_SHIFT) | PTEH_VALID;
+
+	/* Flush ITLB */
+	for_each_itlb_entry(tlb) {
+		asm volatile ("getcfg	%1, 0, %0"
+			      : "=r" (pteh)
+			      : "r" (tlb) );
+
+		pteh_epn = pteh & PAGE_MASK;
+		pteh_low = pteh & ~PAGE_MASK;
+
+		if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
+			__flush_tlb_slot(tlb);
+	}
+
+	/* Flush DTLB */
+	for_each_dtlb_entry(tlb) {
+		asm volatile ("getcfg	%1, 0, %0"
+			      : "=r" (pteh)
+			      : "r" (tlb) );
+
+		pteh_epn = pteh & PAGE_MASK;
+		pteh_low = pteh & ~PAGE_MASK;
+
+		if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
+			__flush_tlb_slot(tlb);
+	}
+
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned long flags;
+	unsigned int cpu = smp_processor_id();
+
+	if (cpu_context(cpu, mm) == NO_CONTEXT)
+		return;
+
+	local_irq_save(flags);
+
+	cpu_context(cpu, mm) = NO_CONTEXT;
+	if (mm == current->mm)
+		activate_context(mm, cpu);
+
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_all(void)
+{
+	/* Invalidate all, including shared pages, excluding fixed TLBs */
+	unsigned long flags, tlb;
+
+	local_irq_save(flags);
+
+	/* Flush each ITLB entry */
+	for_each_itlb_entry(tlb)
+		__flush_tlb_slot(tlb);
+
+	/* Flush each DTLB entry */
+	for_each_dtlb_entry(tlb)
+		__flush_tlb_slot(tlb);
+
+	local_irq_restore(flags);
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+        /* FIXME: Optimize this later.. */
+        flush_tlb_all();
+}
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+}