diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
index af0e9411b83e62fbbb6539eadc0bd2cdcdc34f13..fa06ea04837b9525da90580e0c4db0a3d9f3a952 100644
--- a/arch/sparc64/Kconfig.debug
+++ b/arch/sparc64/Kconfig.debug
@@ -33,6 +33,14 @@ config DEBUG_BOOTMEM
 	depends on DEBUG_KERNEL
 	bool "Debug BOOTMEM initialization"
 
+config DEBUG_PAGEALLOC
+	bool "Page alloc debugging"
+	depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND
+	help
+	  Unmap pages from the kernel linear mapping after free_pages().
+	  This results in a large slowdown, but helps to find certain types
+	  of memory corruptions.
+
 config MCOUNT
 	bool
 	depends on STACK_DEBUG
diff --git a/arch/sparc64/kernel/devices.c b/arch/sparc64/kernel/devices.c
index d710274e516bf6a400b573473692611b6dbe024c..df9a1ca8fd773ad7e97418bf69fba2428a07aa99 100644
--- a/arch/sparc64/kernel/devices.c
+++ b/arch/sparc64/kernel/devices.c
@@ -135,6 +135,28 @@ void __init device_scan(void)
 		cpu_data(0).clock_tick = prom_getintdefault(cpu_node,
 							    "clock-frequency",
 							    0);
+		cpu_data(0).dcache_size = prom_getintdefault(cpu_node,
+							     "dcache-size",
+							     16 * 1024);
+		cpu_data(0).dcache_line_size =
+			prom_getintdefault(cpu_node, "dcache-line-size", 32);
+		cpu_data(0).icache_size = prom_getintdefault(cpu_node,
+							     "icache-size",
+							     16 * 1024);
+		cpu_data(0).icache_line_size =
+			prom_getintdefault(cpu_node, "icache-line-size", 32);
+		cpu_data(0).ecache_size = prom_getintdefault(cpu_node,
+							     "ecache-size",
+							     4 * 1024 * 1024);
+		cpu_data(0).ecache_line_size =
+			prom_getintdefault(cpu_node, "ecache-line-size", 64);
+		printk("CPU[0]: Caches "
+		       "D[sz(%d):line_sz(%d)] "
+		       "I[sz(%d):line_sz(%d)] "
+		       "E[sz(%d):line_sz(%d)]\n",
+		       cpu_data(0).dcache_size, cpu_data(0).dcache_line_size,
+		       cpu_data(0).icache_size, cpu_data(0).icache_line_size,
+		       cpu_data(0).ecache_size, cpu_data(0).ecache_line_size);
 	}
 #endif
 
diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S
index 538522848ad4ee6661ba545c0232849fc4a5d3ff..acc889a7f9c1f3cd78e620cbcaeea37edfb0f5c3 100644
--- a/arch/sparc64/kernel/dtlb_backend.S
+++ b/arch/sparc64/kernel/dtlb_backend.S
@@ -9,17 +9,7 @@
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 
-#if PAGE_SHIFT == 13
-#define SZ_BITS		_PAGE_SZ8K
-#elif PAGE_SHIFT == 16
-#define SZ_BITS		_PAGE_SZ64K
-#elif PAGE_SHIFT == 19
-#define SZ_BITS		_PAGE_SZ512K
-#elif PAGE_SHIFT == 22
-#define SZ_BITS		_PAGE_SZ4MB
-#endif
-
-#define VALID_SZ_BITS	(_PAGE_VALID | SZ_BITS)
+#define VALID_SZ_BITS	(_PAGE_VALID | _PAGE_SZBITS)
 
 #define VPTE_BITS		(_PAGE_CP | _PAGE_CV | _PAGE_P )
 #define VPTE_SHIFT		(PAGE_SHIFT - 3)
@@ -163,7 +153,6 @@ sparc64_vpte_continue:
 	stxa		%g4, [%g1 + %g1] ASI_DMMU	! Restore previous TAG_ACCESS
 	retry						! Load PTE once again
 
-#undef SZ_BITS
 #undef VALID_SZ_BITS
 #undef VPTE_SHIFT
 #undef VPTE_BITS
diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S
index ded2fed23fcc5654245f658d6d98388d61eb33df..702d349c1e88f7d22cf208f93150913600f78a26 100644
--- a/arch/sparc64/kernel/dtlb_base.S
+++ b/arch/sparc64/kernel/dtlb_base.S
@@ -71,7 +71,7 @@
 from_tl1_trap:
 	rdpr		%tl, %g5			! For TL==3 test
 	CREATE_VPTE_OFFSET1(%g4, %g6)			! Create VPTE offset
-	be,pn		%xcc, 3f			! Yep, special processing
+	be,pn		%xcc, kvmap			! Yep, special processing
 	 CREATE_VPTE_OFFSET2(%g4, %g6)			! Create VPTE offset
 	cmp		%g5, 4				! Last trap level?
 	be,pn		%xcc, longpath			! Yep, cannot risk VPTE miss
@@ -83,9 +83,9 @@ from_tl1_trap:
 	 nop						! Delay-slot
 9:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
 	retry						! Trap return
-3:	brlz,pt		%g4, 9b				! Kernel virtual map?
-	 xor		%g2, %g4, %g5			! Finish bit twiddles
-	ba,a,pt		%xcc, kvmap			! Yep, go check for obp/vmalloc
+	nop
+	nop
+	nop
 
 /* DTLB ** ICACHE line 3: winfixups+real_faults		*/
 longpath:
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index b48349527853eff14a0eaab5befc217f66273a0a..2879b10729217ca6e5ea5777e76b5f767607ecca 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -30,159 +30,6 @@
 	.text
 	.align		32
 
-	.globl		sparc64_vpte_patchme1
-	.globl		sparc64_vpte_patchme2
-/*
- * On a second level vpte miss, check whether the original fault is to the OBP 
- * range (note that this is only possible for instruction miss, data misses to
- * obp range do not use vpte). If so, go back directly to the faulting address.
- * This is because we want to read the tpc, otherwise we have no way of knowing
- * the 8k aligned faulting address if we are using >8k kernel pagesize. This
- * also ensures no vpte range addresses are dropped into tlb while obp is
- * executing (see inherit_locked_prom_mappings() rant).
- */
-sparc64_vpte_nucleus:
-	/* Note that kvmap below has verified that the address is
-	 * in the range MODULES_VADDR --> VMALLOC_END already.  So
-	 * here we need only check if it is an OBP address or not.
-	 */
-	sethi		%hi(LOW_OBP_ADDRESS), %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, sparc64_vpte_patchme1
-	 mov		0x1, %g5
-	sllx		%g5, 32, %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, obp_iaddr_patch
-	 nop
-
-	/* These two instructions are patched by paginig_init().  */
-sparc64_vpte_patchme1:
-	sethi		%hi(0), %g5
-sparc64_vpte_patchme2:
-	or		%g5, %lo(0), %g5
-
-	/* With kernel PGD in %g5, branch back into dtlb_backend.  */
-	ba,pt		%xcc, sparc64_kpte_continue
-	 andn		%g1, 0x3, %g1	/* Finish PMD offset adjustment.  */
-
-vpte_noent:
-	/* Restore previous TAG_ACCESS, %g5 is zero, and we will
-	 * skip over the trap instruction so that the top level
-	 * TLB miss handler will thing this %g5 value is just an
-	 * invalid PTE, thus branching to full fault processing.
-	 */
-	mov		TLB_SFSR, %g1
-	stxa		%g4, [%g1 + %g1] ASI_DMMU
-	done
-
-	.globl		obp_iaddr_patch
-obp_iaddr_patch:
-	/* These two instructions patched by inherit_prom_mappings().  */
-	sethi		%hi(0), %g5
-	or		%g5, %lo(0), %g5
-
-	/* Behave as if we are at TL0.  */
-	wrpr		%g0, 1, %tl
-	rdpr		%tpc, %g4	/* Find original faulting iaddr */
-	srlx		%g4, 13, %g4	/* Throw out context bits */
-	sllx		%g4, 13, %g4	/* g4 has vpn + ctx0 now */
-
-	/* Restore previous TAG_ACCESS.  */
-	mov		TLB_SFSR, %g1
-	stxa		%g4, [%g1 + %g1] ASI_IMMU
-
-	/* Get PMD offset.  */
-	srlx		%g4, 23, %g6
-	and		%g6, 0x7ff, %g6
-	sllx		%g6, 2, %g6
-
-	/* Load PMD, is it valid?  */
-	lduwa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
-	brz,pn		%g5, longpath
-	 sllx		%g5, 11, %g5
-
-	/* Get PTE offset.  */
-	srlx		%g4, 13, %g6
-	and		%g6, 0x3ff, %g6
-	sllx		%g6, 3, %g6
-
-	/* Load PTE.  */
-	ldxa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
-	brgez,pn	%g5, longpath
-	 nop
-
-	/* TLB load and return from trap.  */
-	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
-	retry
-
-	.globl		obp_daddr_patch
-obp_daddr_patch:
-	/* These two instructions patched by inherit_prom_mappings().  */
-	sethi		%hi(0), %g5
-	or		%g5, %lo(0), %g5
-
-	/* Get PMD offset.  */
-	srlx		%g4, 23, %g6
-	and		%g6, 0x7ff, %g6
-	sllx		%g6, 2, %g6
-
-	/* Load PMD, is it valid?  */
-	lduwa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
-	brz,pn		%g5, longpath
-	 sllx		%g5, 11, %g5
-
-	/* Get PTE offset.  */
-	srlx		%g4, 13, %g6
-	and		%g6, 0x3ff, %g6
-	sllx		%g6, 3, %g6
-
-	/* Load PTE.  */
-	ldxa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
-	brgez,pn	%g5, longpath
-	 nop
-
-	/* TLB load and return from trap.  */
-	stxa		%g5, [%g0] ASI_DTLB_DATA_IN
-	retry
-
-/*
- * On a first level data miss, check whether this is to the OBP range (note
- * that such accesses can be made by prom, as well as by kernel using
- * prom_getproperty on "address"), and if so, do not use vpte access ...
- * rather, use information saved during inherit_prom_mappings() using 8k
- * pagesize.
- */
-	.align		32
-kvmap:
-	sethi		%hi(MODULES_VADDR), %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, longpath
-	 mov		(VMALLOC_END >> 24), %g5
-	sllx		%g5, 24, %g5
-	cmp		%g4, %g5
-	bgeu,pn		%xcc, longpath
-	 nop
-
-kvmap_check_obp:
-	sethi		%hi(LOW_OBP_ADDRESS), %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, kvmap_vmalloc_addr
-	 mov		0x1, %g5
-	sllx		%g5, 32, %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, obp_daddr_patch
-	 nop
-
-kvmap_vmalloc_addr:
-	/* If we get here, a vmalloc addr was accessed, load kernel VPTE.  */
-	ldxa		[%g3 + %g6] ASI_N, %g5
-	brgez,pn	%g5, longpath
-	 nop
-
-	/* PTE is valid, load into TLB and return from trap.  */
-	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
-	retry
-
 	/* This is trivial with the new code... */
 	.globl		do_fpdis
 do_fpdis:
@@ -525,14 +372,13 @@ cheetah_plus_patch_fpdis:
 	 *
 	 * DATA 0: [low 32-bits]  Address of function to call, jmp to this
 	 *         [high 32-bits] MMU Context Argument 0, place in %g5
-	 * DATA 1: Address Argument 1, place in %g6
+	 * DATA 1: Address Argument 1, place in %g1
 	 * DATA 2: Address Argument 2, place in %g7
 	 *
 	 * With this method we can do most of the cross-call tlb/cache
 	 * flushing very quickly.
 	 *
-	 * Current CPU's IRQ worklist table is locked into %g1,
-	 * don't touch.
+	 * Current CPU's IRQ worklist table is locked into %g6, don't touch.
 	 */
 	.text
 	.align		32
@@ -1006,13 +852,14 @@ cheetah_plus_dcpe_trap_vector:
 	nop
 
 do_cheetah_plus_data_parity:
-	ba,pt		%xcc, etrap
+	rdpr		%pil, %g2
+	wrpr		%g0, 15, %pil
+	ba,pt		%xcc, etrap_irq
 	 rd		%pc, %g7
 	mov		0x0, %o0
 	call		cheetah_plus_parity_error
 	 add		%sp, PTREGS_OFF, %o1
-	ba,pt		%xcc, rtrap
-	 clr		%l6
+	ba,a,pt		%xcc, rtrap_irq
 
 cheetah_plus_dcpe_trap_vector_tl1:
 	membar		#Sync
@@ -1036,13 +883,14 @@ cheetah_plus_icpe_trap_vector:
 	nop
 
 do_cheetah_plus_insn_parity:
-	ba,pt		%xcc, etrap
+	rdpr		%pil, %g2
+	wrpr		%g0, 15, %pil
+	ba,pt		%xcc, etrap_irq
 	 rd		%pc, %g7
 	mov		0x1, %o0
 	call		cheetah_plus_parity_error
 	 add		%sp, PTREGS_OFF, %o1
-	ba,pt		%xcc, rtrap
-	 clr		%l6
+	ba,a,pt		%xcc, rtrap_irq
 
 cheetah_plus_icpe_trap_vector_tl1:
 	membar		#Sync
@@ -1075,6 +923,10 @@ do_dcpe_tl1:
 	 nop
 	wrpr		%g1, %tl		! Restore original trap level
 do_dcpe_tl1_nonfatal:	/* Ok we may use interrupt globals safely. */
+	sethi		%hi(dcache_parity_tl1_occurred), %g2
+	lduw		[%g2 + %lo(dcache_parity_tl1_occurred)], %g1
+	add		%g1, 1, %g1
+	stw		%g1, [%g2 + %lo(dcache_parity_tl1_occurred)]
 	/* Reset D-cache parity */
 	sethi		%hi(1 << 16), %g1	! D-cache size
 	mov		(1 << 5), %g2		! D-cache line size
@@ -1121,6 +973,10 @@ do_icpe_tl1:
 	 nop
 	wrpr		%g1, %tl		! Restore original trap level
 do_icpe_tl1_nonfatal:	/* Ok we may use interrupt globals safely. */
+	sethi		%hi(icache_parity_tl1_occurred), %g2
+	lduw		[%g2 + %lo(icache_parity_tl1_occurred)], %g1
+	add		%g1, 1, %g1
+	stw		%g1, [%g2 + %lo(icache_parity_tl1_occurred)]
 	/* Flush I-cache */
 	sethi		%hi(1 << 15), %g1	! I-cache size
 	mov		(1 << 5), %g2		! I-cache line size
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 1fa06c4e3bdb8da1c5496dc14fa5547a69fe79f0..ecc748fb9ad74b70cb6295dc4fde6fe7bc5376b5 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -80,15 +80,165 @@ sparc_ramdisk_image64:
 	.xword	0
 	.word	_end
 
-	/* We must be careful, 32-bit OpenBOOT will get confused if it
-	 * tries to save away a register window to a 64-bit kernel
-	 * stack address.  Flush all windows, disable interrupts,
-	 * remap if necessary, jump onto kernel trap table, then kernel
-	 * stack, or else we die.
+	/* PROM cif handler code address is in %o4.  */
+sparc64_boot:
+1:	rd	%pc, %g7
+	set	1b, %g1
+	cmp	%g1, %g7
+	be,pn	%xcc, sparc64_boot_after_remap
+	 mov	%o4, %l7
+
+	/* We need to remap the kernel.  Use position independant
+	 * code to remap us to KERNBASE.
 	 *
-	 * PROM entry point is on %o4
+	 * SILO can invoke us with 32-bit address masking enabled,
+	 * so make sure that's clear.
 	 */
-sparc64_boot:
+	rdpr	%pstate, %g1
+	andn	%g1, PSTATE_AM, %g1
+	wrpr	%g1, 0x0, %pstate
+	ba,a,pt	%xcc, 1f
+
+	.globl	prom_finddev_name, prom_chosen_path
+	.globl	prom_getprop_name, prom_mmu_name
+	.globl	prom_callmethod_name, prom_translate_name
+	.globl	prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
+	.globl	prom_boot_mapped_pc, prom_boot_mapping_mode
+	.globl	prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
+prom_finddev_name:
+	.asciz	"finddevice"
+prom_chosen_path:
+	.asciz	"/chosen"
+prom_getprop_name:
+	.asciz	"getprop"
+prom_mmu_name:
+	.asciz	"mmu"
+prom_callmethod_name:
+	.asciz	"call-method"
+prom_translate_name:
+	.asciz	"translate"
+prom_map_name:
+	.asciz	"map"
+prom_unmap_name:
+	.asciz	"unmap"
+	.align	4
+prom_mmu_ihandle_cache:
+	.word	0
+prom_boot_mapped_pc:
+	.word	0
+prom_boot_mapping_mode:
+	.word	0
+	.align	8
+prom_boot_mapping_phys_high:
+	.xword	0
+prom_boot_mapping_phys_low:
+	.xword	0
+1:
+	rd	%pc, %l0
+	mov	(1b - prom_finddev_name), %l1
+	mov	(1b - prom_chosen_path), %l2
+	mov	(1b - prom_boot_mapped_pc), %l3
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l3, %l3
+	stw	%l0, [%l3]
+	sub	%sp, (192 + 128), %sp
+
+	/* chosen_node = prom_finddevice("/chosen") */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "finddevice"
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, "/chosen"
+	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x20], %l4	! chosen device node
+
+	mov	(1b - prom_getprop_name), %l1
+	mov	(1b - prom_mmu_name), %l2
+	mov	(1b - prom_mmu_ihandle_cache), %l5
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l5, %l5
+
+	/* prom_mmu_ihandle_cache = prom_getint(chosen_node, "mmu") */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, chosen_node
+	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "mmu"
+	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_mmu_ihandle_cache
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, sizeof(arg3)
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	mov	(1b - prom_callmethod_name), %l1
+	mov	(1b - prom_translate_name), %l2
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	lduw	[%l5], %l5			! prom_mmu_ihandle_cache
+
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "call-method"
+	mov	3, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 3
+	mov	5, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 5
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1: "translate"
+	stx	%l5, [%sp + 2047 + 128 + 0x20]	! arg2: prom_mmu_ihandle_cache
+	srlx	%l0, 22, %l3
+	sllx	%l3, 22, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x28]	! arg3: vaddr, our PC
+	stx	%g0, [%sp + 2047 + 128 + 0x30]	! res1
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! res2
+	stx	%g0, [%sp + 2047 + 128 + 0x40]	! res3
+	stx	%g0, [%sp + 2047 + 128 + 0x48]	! res4
+	stx	%g0, [%sp + 2047 + 128 + 0x50]	! res5
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x40], %l1	! translation mode
+	mov	(1b - prom_boot_mapping_mode), %l4
+	sub	%l0, %l4, %l4
+	stw	%l1, [%l4]
+	mov	(1b - prom_boot_mapping_phys_high), %l4
+	sub	%l0, %l4, %l4
+	ldx	[%sp + 2047 + 128 + 0x48], %l2	! physaddr high
+	stx	%l2, [%l4 + 0x0]
+	ldx	[%sp + 2047 + 128 + 0x50], %l3	! physaddr low
+	stx	%l3, [%l4 + 0x8]
+
+	/* Leave service as-is, "call-method" */
+	mov	7, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 7
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	mov	(1b - prom_map_name), %l3
+	sub	%l0, %l3, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x18]	! arg1: "map"
+	/* Leave arg2 as-is, prom_mmu_ihandle_cache */
+	mov	-1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x28]	! arg3: mode (-1 default)
+	sethi	%hi(8 * 1024 * 1024), %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4: size (8MB)
+	sethi	%hi(KERNBASE), %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x38]	! arg5: vaddr (KERNBASE)
+	stx	%g0, [%sp + 2047 + 128 + 0x40]	! arg6: empty
+	mov	(1b - prom_boot_mapping_phys_low), %l3
+	sub	%l0, %l3, %l3
+	ldx	[%l3], %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x48]	! arg7: phys addr
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	add	%sp, (192 + 128), %sp
+
+sparc64_boot_after_remap:
 	BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
 	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
 	ba,pt	%xcc, spitfire_boot
@@ -125,185 +275,7 @@ cheetah_generic_boot:
 	stxa	%g0, [%g3] ASI_IMMU
 	membar	#Sync
 
-	wrpr    %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate
-	wr	%g0, 0, %fprs
-
-	/* Just like for Spitfire, we probe itlb-2 for a mapping which
-	 * matches our current %pc.  We take the physical address in
-	 * that mapping and use it to make our own.
-	 */
-
-	/* %g5 holds the tlb data */
-        sethi   %uhi(_PAGE_VALID | _PAGE_SZ4MB), %g5
-        sllx    %g5, 32, %g5
-        or      %g5, (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W | _PAGE_G), %g5
-
-	/* Put PADDR tlb data mask into %g3. */
-	sethi	%uhi(_PAGE_PADDR), %g3
-	or	%g3, %ulo(_PAGE_PADDR), %g3
-	sllx	%g3, 32, %g3
-	sethi	%hi(_PAGE_PADDR), %g7
-	or	%g7, %lo(_PAGE_PADDR), %g7
-	or	%g3, %g7, %g3
-
-	set	2 << 16, %l0		/* TLB entry walker. */
-	set	0x1fff, %l2		/* Page mask. */
-	rd	%pc, %l3
-	andn	%l3, %l2, %g2		/* vaddr comparator */
-
-1:	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g2
-	be,pn	%xcc, cheetah_got_tlbentry
-	 nop
-	and	%l0, (127 << 3), %g1
-	cmp	%g1, (127 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* Search the small TLB.  OBP never maps us like that but
-	 * newer SILO can.
-	 */
-	clr	%l0
-
-1:	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g2
-	be,pn	%xcc, cheetah_got_tlbentry
-	 nop
-	cmp	%l0, (15 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* BUG() if we get here... */
-	ta	0x5
-
-cheetah_got_tlbentry:
-	ldxa	[%l0] ASI_ITLB_DATA_ACCESS, %g0
-	ldxa	[%l0] ASI_ITLB_DATA_ACCESS, %g1
-	membar	#Sync
-	and	%g1, %g3, %g1
-	set	0x5fff, %l0
-	andn	%g1, %l0, %g1
-	or	%g5, %g1, %g5
-
-	/* Clear out any KERNBASE area entries. */
-	set	2 << 16, %l0
-	sethi	%hi(KERNBASE), %g3
-	sethi	%hi(KERNBASE<<1), %g7
-	mov	TLB_TAG_ACCESS, %l7
-
-	/* First, check ITLB */
-1:	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_IMMU
-	membar	#Sync
-	stxa	%g0, [%l0] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-
-2:	and	%l0, (127 << 3), %g1
-	cmp	%g1, (127 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* Next, check DTLB */
-	set	2 << 16, %l0
-1:	ldxa	[%l0] ASI_DTLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_DMMU
-	membar	#Sync
-	stxa	%g0, [%l0] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	
-2:	and	%l0, (511 << 3), %g1
-	cmp	%g1, (511 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* On Cheetah+, have to check second DTLB.  */
-	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,l0,2f)
-	ba,pt	%xcc, 9f
-	 nop
-
-2:	set	3 << 16, %l0
-1:	ldxa	[%l0] ASI_DTLB_TAG_READ, %g1
-	membar	#Sync
-	andn	%g1, %l2, %g1
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_DMMU
-	membar	#Sync
-	stxa	%g0, [%l0] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	
-2:	and	%l0, (511 << 3), %g1
-	cmp	%g1, (511 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-9:
-
-	/* Now lock the TTE we created into ITLB-0 and DTLB-0,
-	 * entry 15 (and maybe 14 too).
-	 */
-	sethi	%hi(KERNBASE), %g3
-	set	(0 << 16) | (15 << 3), %g7
-	stxa	%g3, [%l7] ASI_DMMU
-	membar	#Sync
-	stxa	%g5, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	stxa	%g3, [%l7] ASI_IMMU
-	membar	#Sync
-	stxa	%g5, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-	flush	%g3
-	membar	#Sync
-	sethi	%hi(_end), %g3			/* Check for bigkernel case */
-	or	%g3, %lo(_end), %g3
-	srl	%g3, 23, %g3			/* Check if _end > 8M */
-	brz,pt	%g3, 1f
-	 sethi	%hi(KERNBASE), %g3		/* Restore for fixup code below */
-	sethi	%hi(0x400000), %g3
-	or	%g3, %lo(0x400000), %g3
-	add	%g5, %g3, %g5			/* New tte data */
-	andn	%g5, (_PAGE_G), %g5
-	sethi	%hi(KERNBASE+0x400000), %g3
-	or	%g3, %lo(KERNBASE+0x400000), %g3
-	set	(0 << 16) | (14 << 3), %g7
-	stxa	%g3, [%l7] ASI_DMMU
-	membar	#Sync
-	stxa	%g5, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	stxa	%g3, [%l7] ASI_IMMU
-	membar	#Sync
-	stxa	%g5, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-	flush	%g3
-	membar	#Sync
-	sethi	%hi(KERNBASE), %g3		/* Restore for fixup code below */
-	ba,pt	%xcc, 1f
-	 nop
-
-1:	set	sun4u_init, %g2
-	jmpl    %g2 + %g0, %g0
-	 nop
+	ba,a,pt	%xcc, jump_to_sun4u_init
 
 spitfire_boot:
 	/* Typically PROM has already enabled both MMU's and both on-chip
@@ -313,6 +285,7 @@ spitfire_boot:
 	stxa	%g1, [%g0] ASI_LSU_CONTROL
 	membar	#Sync
 
+jump_to_sun4u_init:
 	/*
 	 * Make sure we are in privileged mode, have address masking,
          * using the ordinary globals and have enabled floating
@@ -324,151 +297,6 @@ spitfire_boot:
 	wrpr    %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate
 	wr	%g0, 0, %fprs
 
-spitfire_create_mappings:
-	/* %g5 holds the tlb data */
-        sethi   %uhi(_PAGE_VALID | _PAGE_SZ4MB), %g5
-        sllx    %g5, 32, %g5
-        or      %g5, (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W | _PAGE_G), %g5
-
-	/* Base of physical memory cannot reliably be assumed to be
-	 * at 0x0!  Figure out where it happens to be. -DaveM
-	 */
-
-	/* Put PADDR tlb data mask into %g3. */
-	sethi	%uhi(_PAGE_PADDR_SF), %g3
-	or	%g3, %ulo(_PAGE_PADDR_SF), %g3
-	sllx	%g3, 32, %g3
-	sethi	%hi(_PAGE_PADDR_SF), %g7
-	or	%g7, %lo(_PAGE_PADDR_SF), %g7
-	or	%g3, %g7, %g3
-
-	/* Walk through entire ITLB, looking for entry which maps
-	 * our %pc currently, stick PADDR from there into %g5 tlb data.
-	 */
-	clr	%l0			/* TLB entry walker. */
-	set	0x1fff, %l2		/* Page mask. */
-	rd	%pc, %l3
-	andn	%l3, %l2, %g2		/* vaddr comparator */
-1:
-	/* Yes, the nops seem to be necessary for now, don't ask me why. -DaveM */
-	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	nop
-	nop
-	nop
-	andn	%g1, %l2, %g1		/* Get vaddr */
-	cmp	%g1, %g2
-	be,a,pn	%xcc, spitfire_got_tlbentry
-	 ldxa	[%l0] ASI_ITLB_DATA_ACCESS, %g1
-	cmp	%l0, (63 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	/* BUG() if we get here... */
-	ta	0x5
-
-spitfire_got_tlbentry:
-	/* Nops here again, perhaps Cheetah/Blackbird are better behaved... */
-	nop
-	nop
-	nop
-	and	%g1, %g3, %g1		/* Mask to just get paddr bits.       */
-	set	0x5fff, %l3		/* Mask offset to get phys base.      */
-	andn	%g1, %l3, %g1
-
-	/* NOTE: We hold on to %g1 paddr base as we need it below to lock
-	 * NOTE: the PROM cif code into the TLB.
-	 */
-
-	or	%g5, %g1, %g5		/* Or it into TAG being built.        */
-
-	clr	%l0			/* TLB entry walker. */
-	sethi	%hi(KERNBASE), %g3	/* 4M lower limit */
-	sethi	%hi(KERNBASE<<1), %g7	/* 8M upper limit */
-	mov	TLB_TAG_ACCESS, %l7
-1:
-	/* Yes, the nops seem to be necessary for now, don't ask me why. -DaveM */
-	ldxa	[%l0] ASI_ITLB_TAG_READ, %g1
-	nop
-	nop
-	nop
-	andn	%g1, %l2, %g1		/* Get vaddr */
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_IMMU
-	stxa	%g0, [%l0] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-2:
-	cmp	%l0, (63 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	nop; nop; nop
-
-	clr	%l0			/* TLB entry walker. */
-1:
-	/* Yes, the nops seem to be necessary for now, don't ask me why. -DaveM */
-	ldxa	[%l0] ASI_DTLB_TAG_READ, %g1
-	nop
-	nop
-	nop
-	andn	%g1, %l2, %g1		/* Get vaddr */
-	cmp	%g1, %g3
-	blu,pn	%xcc, 2f
-	 cmp	%g1, %g7
-	bgeu,pn	%xcc, 2f
-	 nop
-	stxa	%g0, [%l7] ASI_DMMU
-	stxa	%g0, [%l0] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-2:
-	cmp	%l0, (63 << 3)
-	blu,pt	%xcc, 1b
-	 add	%l0, (1 << 3), %l0
-
-	nop; nop; nop
-
-
-	/* PROM never puts any TLB entries into the MMU with the lock bit
-	 * set.  So we gladly use tlb entry 63 for KERNBASE. And maybe 62 too.
-	 */
-
-	sethi	%hi(KERNBASE), %g3
-	mov	(63 << 3), %g7
-	stxa	%g3, [%l7] ASI_DMMU		/* KERNBASE into TLB TAG	*/
-	stxa	%g5, [%g7] ASI_DTLB_DATA_ACCESS	/* TTE into TLB DATA		*/
-	membar	#Sync
-	stxa	%g3, [%l7] ASI_IMMU		/* KERNBASE into TLB TAG	*/
-	stxa	%g5, [%g7] ASI_ITLB_DATA_ACCESS	/* TTE into TLB DATA		*/
-	membar	#Sync
-	flush	%g3
-	membar	#Sync
-	sethi	%hi(_end), %g3			/* Check for bigkernel case */
-	or	%g3, %lo(_end), %g3
-	srl	%g3, 23, %g3			/* Check if _end > 8M */
-	brz,pt	%g3, 2f
-	 sethi	%hi(KERNBASE), %g3		/* Restore for fixup code below */
-	sethi	%hi(0x400000), %g3
-	or	%g3, %lo(0x400000), %g3
-	add	%g5, %g3, %g5			/* New tte data */
-	andn	%g5, (_PAGE_G), %g5
-	sethi	%hi(KERNBASE+0x400000), %g3
-	or	%g3, %lo(KERNBASE+0x400000), %g3
-	mov	(62 << 3), %g7
-	stxa	%g3, [%l7] ASI_DMMU
-	stxa	%g5, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-	stxa	%g3, [%l7] ASI_IMMU
-	stxa	%g5, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-	flush	%g3
-	membar	#Sync
-	sethi	%hi(KERNBASE), %g3		/* Restore for fixup code below */
-2:	ba,pt	%xcc, 1f
-	 nop
-1:
 	set	sun4u_init, %g2
 	jmpl    %g2 + %g0, %g0
 	 nop
@@ -483,38 +311,12 @@ sun4u_init:
 	stxa	%g0, [%g7] ASI_DMMU
 	membar	#Sync
 
-	/* We are now safely (we hope) in Nucleus context (0), rewrite
-	 * the KERNBASE TTE's so they no longer have the global bit set.
-	 * Don't forget to setup TAG_ACCESS first 8-)
-	 */
-	mov	TLB_TAG_ACCESS, %g2
-	stxa	%g3, [%g2] ASI_IMMU
-	stxa	%g3, [%g2] ASI_DMMU
-	membar	#Sync
-
 	BRANCH_IF_ANY_CHEETAH(g1,g7,cheetah_tlb_fixup)
 
 	ba,pt	%xcc, spitfire_tlb_fixup
 	 nop
 
 cheetah_tlb_fixup:
-	set	(0 << 16) | (15 << 3), %g7
-	ldxa	[%g7] ASI_ITLB_DATA_ACCESS, %g0
-	ldxa	[%g7] ASI_ITLB_DATA_ACCESS, %g1
-	andn	%g1, (_PAGE_G), %g1
-	stxa	%g1, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-
-	ldxa	[%g7] ASI_DTLB_DATA_ACCESS, %g0
-	ldxa	[%g7] ASI_DTLB_DATA_ACCESS, %g1
-	andn	%g1, (_PAGE_G), %g1
-	stxa	%g1, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-
-	/* Kill instruction prefetch queues. */
-	flush	%g3
-	membar	#Sync
-
 	mov	2, %g2		/* Set TLB type to cheetah+. */
 	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
 
@@ -551,21 +353,6 @@ cheetah_tlb_fixup:
 	 nop
 
 spitfire_tlb_fixup:
-	mov	(63 << 3), %g7
-	ldxa	[%g7] ASI_ITLB_DATA_ACCESS, %g1
-	andn	%g1, (_PAGE_G), %g1
-	stxa	%g1, [%g7] ASI_ITLB_DATA_ACCESS
-	membar	#Sync
-
-	ldxa	[%g7] ASI_DTLB_DATA_ACCESS, %g1
-	andn	%g1, (_PAGE_G), %g1
-	stxa	%g1, [%g7] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-
-	/* Kill instruction prefetch queues. */
-	flush	%g3
-	membar	#Sync
-
 	/* Set TLB type to spitfire. */
 	mov	0, %g2
 	sethi	%hi(tlb_type), %g1
@@ -578,24 +365,6 @@ tlb_fixup_done:
 	mov	%sp, %l6
 	mov	%o4, %l7
 
-#if 0	/* We don't do it like this anymore, but for historical hack value
-	 * I leave this snippet here to show how crazy we can be sometimes. 8-)
-	 */
-
-	/* Setup "Linux Current Register", thanks Sun 8-) */
-	wr	%g0, 0x1, %pcr
-
-	/* Blackbird errata workaround.  See commentary in
-	 * smp.c:smp_percpu_timer_interrupt() for more
-	 * information.
-	 */
-	ba,pt	%xcc, 99f
-	 nop
-	.align	64
-99:	wr	%g6, %g0, %pic
-	rd	%pic, %g0
-#endif
-
 	wr	%g0, ASI_P, %asi
 	mov	1, %g1
 	sllx	%g1, THREAD_SHIFT, %g1
@@ -756,12 +525,7 @@ bootup_user_stack_end:
 
 #include "ttable.S"
 #include "systbls.S"
-
-	.align	1024
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.word	0
-
+#include "ktlb.S"
 #include "etrap.S"
 #include "rtrap.S"
 #include "winfixup.S"
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
new file mode 100644
index 0000000000000000000000000000000000000000..7796b37f478cc6c07806f7e920dabb60a1289db4
--- /dev/null
+++ b/arch/sparc64/kernel/ktlb.S
@@ -0,0 +1,198 @@
+/* arch/sparc64/kernel/ktlb.S: Kernel mapping TLB miss handling.
+ *
+ * Copyright (C) 1995, 1997, 2005 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 1996 Eddie C. Dost        (ecd@brainaid.de)
+ * Copyright (C) 1996 Miguel de Icaza      (miguel@nuclecu.unam.mx)
+ * Copyright (C) 1996,98,99 Jakub Jelinek  (jj@sunsite.mff.cuni.cz)
+*/
+
+#include <linux/config.h>
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+	.text
+	.align		32
+
+/*
+ * On a second level vpte miss, check whether the original fault is to the OBP 
+ * range (note that this is only possible for instruction miss, data misses to
+ * obp range do not use vpte). If so, go back directly to the faulting address.
+ * This is because we want to read the tpc, otherwise we have no way of knowing
+ * the 8k aligned faulting address if we are using >8k kernel pagesize. This
+ * also ensures no vpte range addresses are dropped into tlb while obp is
+ * executing (see inherit_locked_prom_mappings() rant).
+ */
+sparc64_vpte_nucleus:
+	/* Note that kvmap below has verified that the address is
+	 * in the range MODULES_VADDR --> VMALLOC_END already.  So
+	 * here we need only check if it is an OBP address or not.
+	 */
+	sethi		%hi(LOW_OBP_ADDRESS), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kern_vpte
+	 mov		0x1, %g5
+	sllx		%g5, 32, %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, vpte_insn_obp
+	 nop
+
+	/* These two instructions are patched by paginig_init().  */
+kern_vpte:
+	sethi		%hi(swapper_pgd_zero), %g5
+	lduw		[%g5 + %lo(swapper_pgd_zero)], %g5
+
+	/* With kernel PGD in %g5, branch back into dtlb_backend.  */
+	ba,pt		%xcc, sparc64_kpte_continue
+	 andn		%g1, 0x3, %g1	/* Finish PMD offset adjustment.  */
+
+vpte_noent:
+	/* Restore previous TAG_ACCESS, %g5 is zero, and we will
+	 * skip over the trap instruction so that the top level
+	 * TLB miss handler will thing this %g5 value is just an
+	 * invalid PTE, thus branching to full fault processing.
+	 */
+	mov		TLB_SFSR, %g1
+	stxa		%g4, [%g1 + %g1] ASI_DMMU
+	done
+
+vpte_insn_obp:
+	sethi		%hi(prom_pmd_phys), %g5
+	ldx		[%g5 + %lo(prom_pmd_phys)], %g5
+
+	/* Behave as if we are at TL0.  */
+	wrpr		%g0, 1, %tl
+	rdpr		%tpc, %g4	/* Find original faulting iaddr */
+	srlx		%g4, 13, %g4	/* Throw out context bits */
+	sllx		%g4, 13, %g4	/* g4 has vpn + ctx0 now */
+
+	/* Restore previous TAG_ACCESS.  */
+	mov		TLB_SFSR, %g1
+	stxa		%g4, [%g1 + %g1] ASI_IMMU
+
+	/* Get PMD offset.  */
+	srlx		%g4, 23, %g6
+	and		%g6, 0x7ff, %g6
+	sllx		%g6, 2, %g6
+
+	/* Load PMD, is it valid?  */
+	lduwa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
+	brz,pn		%g5, longpath
+	 sllx		%g5, 11, %g5
+
+	/* Get PTE offset.  */
+	srlx		%g4, 13, %g6
+	and		%g6, 0x3ff, %g6
+	sllx		%g6, 3, %g6
+
+	/* Load PTE.  */
+	ldxa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
+	brgez,pn	%g5, longpath
+	 nop
+
+	/* TLB load and return from trap.  */
+	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
+	retry
+
+kvmap_do_obp:
+	sethi		%hi(prom_pmd_phys), %g5
+	ldx		[%g5 + %lo(prom_pmd_phys)], %g5
+
+	/* Get PMD offset.  */
+	srlx		%g4, 23, %g6
+	and		%g6, 0x7ff, %g6
+	sllx		%g6, 2, %g6
+
+	/* Load PMD, is it valid?  */
+	lduwa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
+	brz,pn		%g5, longpath
+	 sllx		%g5, 11, %g5
+
+	/* Get PTE offset.  */
+	srlx		%g4, 13, %g6
+	and		%g6, 0x3ff, %g6
+	sllx		%g6, 3, %g6
+
+	/* Load PTE.  */
+	ldxa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
+	brgez,pn	%g5, longpath
+	 nop
+
+	/* TLB load and return from trap.  */
+	stxa		%g5, [%g0] ASI_DTLB_DATA_IN
+	retry
+
+/*
+ * On a first level data miss, check whether this is to the OBP range (note
+ * that such accesses can be made by prom, as well as by kernel using
+ * prom_getproperty on "address"), and if so, do not use vpte access ...
+ * rather, use information saved during inherit_prom_mappings() using 8k
+ * pagesize.
+ */
+	.align		32
+kvmap:
+	brgez,pn	%g4, kvmap_nonlinear
+	 nop
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	.globl		kvmap_linear_patch
+kvmap_linear_patch:
+#endif
+	ba,pt		%xcc, kvmap_load
+	 xor		%g2, %g4, %g5
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	sethi		%hi(swapper_pg_dir), %g5
+	or		%g5, %lo(swapper_pg_dir), %g5
+	sllx		%g4, 64 - (PGDIR_SHIFT + PGDIR_BITS), %g6
+	srlx		%g6, 64 - PAGE_SHIFT, %g6
+	andn		%g6, 0x3, %g6
+	lduw		[%g5 + %g6], %g5
+	brz,pn		%g5, longpath
+	 sllx		%g4, 64 - (PMD_SHIFT + PMD_BITS), %g6
+	srlx		%g6, 64 - PAGE_SHIFT, %g6
+	sllx		%g5, 11, %g5
+	andn		%g6, 0x3, %g6
+	lduwa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
+	brz,pn		%g5, longpath
+	 sllx		%g4, 64 - PMD_SHIFT, %g6
+	srlx		%g6, 64 - PAGE_SHIFT, %g6
+	sllx		%g5, 11, %g5
+	andn		%g6, 0x7, %g6
+	ldxa		[%g5 + %g6] ASI_PHYS_USE_EC, %g5
+	brz,pn		%g5, longpath
+	 nop
+	ba,a,pt		%xcc, kvmap_load
+#endif
+
+kvmap_nonlinear:
+	sethi		%hi(MODULES_VADDR), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, longpath
+	 mov		(VMALLOC_END >> 24), %g5
+	sllx		%g5, 24, %g5
+	cmp		%g4, %g5
+	bgeu,pn		%xcc, longpath
+	 nop
+
+kvmap_check_obp:
+	sethi		%hi(LOW_OBP_ADDRESS), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kvmap_vmalloc_addr
+	 mov		0x1, %g5
+	sllx		%g5, 32, %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kvmap_do_obp
+	 nop
+
+kvmap_vmalloc_addr:
+	/* If we get here, a vmalloc addr was accessed, load kernel VPTE.  */
+	ldxa		[%g3 + %g6] ASI_N, %g5
+	brgez,pn	%g5, longpath
+	 nop
+
+kvmap_load:
+	/* PTE is valid, load into TLB and return from trap.  */
+	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
+	retry
diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c
index 331382e1a75d40d80680206b31b1c3eb26f129d3..cae5b61fe2f0ed76e9afbad05121fb08605c15e7 100644
--- a/arch/sparc64/kernel/pci_schizo.c
+++ b/arch/sparc64/kernel/pci_schizo.c
@@ -330,7 +330,7 @@ static int schizo_ino_to_pil(struct pci_dev *pdev, unsigned int ino)
 static void tomatillo_wsync_handler(struct ino_bucket *bucket, void *_arg1, void *_arg2)
 {
 	unsigned long sync_reg = (unsigned long) _arg2;
-	u64 mask = 1 << (__irq_ino(__irq(bucket)) & IMAP_INO);
+	u64 mask = 1UL << (__irq_ino(__irq(bucket)) & IMAP_INO);
 	u64 val;
 	int limit;
 
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index ddbed3341a232770b7c929291fc4f959862d9a17..8e8baf2354df6789ef3878f97e0d45765d29e4d3 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -496,7 +496,6 @@ extern void paging_init(void);
 
 void __init setup_arch(char **cmdline_p)
 {
-	unsigned long highest_paddr;
 	int i;
 
 	/* Initialize PROM console and command line. */
@@ -519,11 +518,7 @@ void __init setup_arch(char **cmdline_p)
 	idprom_init();
 	(void) prom_probe_memory();
 
-	/* In paging_init() we tip off this value to see if we need
-	 * to change init_mm.pgd to point to the real alias mapping.
-	 */
 	phys_base = 0xffffffffffffffffUL;
-	highest_paddr = 0UL;
 	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
 		unsigned long top;
 
@@ -531,25 +526,10 @@ void __init setup_arch(char **cmdline_p)
 			phys_base = sp_banks[i].base_addr;
 		top = sp_banks[i].base_addr +
 			sp_banks[i].num_bytes;
-		if (highest_paddr < top)
-			highest_paddr = top;
 	}
 	pfn_base = phys_base >> PAGE_SHIFT;
 
-	switch (tlb_type) {
-	default:
-	case spitfire:
-		kern_base = spitfire_get_itlb_data(sparc64_highest_locked_tlbent());
-		kern_base &= _PAGE_PADDR_SF;
-		break;
-
-	case cheetah:
-	case cheetah_plus:
-		kern_base = cheetah_get_litlb_data(sparc64_highest_locked_tlbent());
-		kern_base &= _PAGE_PADDR;
-		break;
-	};
-
+	kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
 	kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
 
 	if (!root_flags)
@@ -625,6 +605,9 @@ extern void smp_info(struct seq_file *);
 extern void smp_bogo(struct seq_file *);
 extern void mmu_info(struct seq_file *);
 
+unsigned int dcache_parity_tl1_occurred;
+unsigned int icache_parity_tl1_occurred;
+
 static int show_cpuinfo(struct seq_file *m, void *__unused)
 {
 	seq_printf(m, 
@@ -635,6 +618,8 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
 		   "type\t\t: sun4u\n"
 		   "ncpus probed\t: %ld\n"
 		   "ncpus active\t: %ld\n"
+		   "D$ parity tl1\t: %u\n"
+		   "I$ parity tl1\t: %u\n"
 #ifndef CONFIG_SMP
 		   "Cpu0Bogo\t: %lu.%02lu\n"
 		   "Cpu0ClkTck\t: %016lx\n"
@@ -647,7 +632,9 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
 		   (prom_prev >> 8) & 0xff,
 		   prom_prev & 0xff,
 		   (long)num_possible_cpus(),
-		   (long)num_online_cpus()
+		   (long)num_online_cpus(),
+		   dcache_parity_tl1_occurred,
+		   icache_parity_tl1_occurred
 #ifndef CONFIG_SMP
 		   , cpu_data(0).udelay_val/(500000/HZ),
 		   (cpu_data(0).udelay_val/(5000/HZ)) % 100,
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index b4fc6a5462b2192065052a1693d194e375066910..590df5a16f5a2d11a56c138b3e8e43d2017f9dd9 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -93,6 +93,27 @@ void __init smp_store_cpu_info(int id)
 	cpu_data(id).pte_cache[1]		= NULL;
 	cpu_data(id).pgd_cache			= NULL;
 	cpu_data(id).idle_volume		= 1;
+
+	cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size",
+						      16 * 1024);
+	cpu_data(id).dcache_line_size =
+		prom_getintdefault(cpu_node, "dcache-line-size", 32);
+	cpu_data(id).icache_size = prom_getintdefault(cpu_node, "icache-size",
+						      16 * 1024);
+	cpu_data(id).icache_line_size =
+		prom_getintdefault(cpu_node, "icache-line-size", 32);
+	cpu_data(id).ecache_size = prom_getintdefault(cpu_node, "ecache-size",
+						      4 * 1024 * 1024);
+	cpu_data(id).ecache_line_size =
+		prom_getintdefault(cpu_node, "ecache-line-size", 64);
+	printk("CPU[%d]: Caches "
+	       "D[sz(%d):line_sz(%d)] "
+	       "I[sz(%d):line_sz(%d)] "
+	       "E[sz(%d):line_sz(%d)]\n",
+	       id,
+	       cpu_data(id).dcache_size, cpu_data(id).dcache_line_size,
+	       cpu_data(id).icache_size, cpu_data(id).icache_line_size,
+	       cpu_data(id).ecache_size, cpu_data(id).ecache_line_size);
 }
 
 static void smp_setup_percpu_timer(void);
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 3a145fc39cf2d4b4b619bbb1038e50bd383f2bd1..89f2fcfcd662e1b6773fca85030dcfe1cb547caf 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -119,8 +119,8 @@ startup_continue:
 	sethi		%hi(itlb_load), %g2
 	or		%g2, %lo(itlb_load), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(mmu_ihandle_cache)], %g2
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x20]
 	sethi		%hi(KERNBASE), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x28]
@@ -156,8 +156,8 @@ startup_continue:
 	sethi		%hi(itlb_load), %g2
 	or		%g2, %lo(itlb_load), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(mmu_ihandle_cache)], %g2
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x20]
 	sethi		%hi(KERNBASE + 0x400000), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x28]
@@ -190,8 +190,8 @@ do_dtlb:
 	sethi		%hi(dtlb_load), %g2
 	or		%g2, %lo(dtlb_load), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(mmu_ihandle_cache)], %g2
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x20]
 	sethi		%hi(KERNBASE), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x28]
@@ -228,8 +228,8 @@ do_dtlb:
 	sethi		%hi(dtlb_load), %g2
 	or		%g2, %lo(dtlb_load), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(mmu_ihandle_cache)], %g2
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x20]
 	sethi		%hi(KERNBASE + 0x400000), %g2
 	stx		%g2, [%sp + 2047 + 128 + 0x28]
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index b280b2ef674f4a80b57cfaf36dec0140be727a66..f8e7005fede9ab8c5df7a9b6fa73f14216dca017 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -869,14 +869,19 @@ static void cheetah_flush_ecache_line(unsigned long physaddr)
  */
 static void __cheetah_flush_icache(void)
 {
-	unsigned long i;
+	unsigned int icache_size, icache_line_size;
+	unsigned long addr;
+
+	icache_size = local_cpu_data().icache_size;
+	icache_line_size = local_cpu_data().icache_line_size;
 
 	/* Clear the valid bits in all the tags. */
-	for (i = 0; i < (1 << 15); i += (1 << 5)) {
+	for (addr = 0; addr < icache_size; addr += icache_line_size) {
 		__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
 				     "membar #Sync"
 				     : /* no outputs */
-				     : "r" (i | (2 << 3)), "i" (ASI_IC_TAG));
+				     : "r" (addr | (2 << 3)),
+				       "i" (ASI_IC_TAG));
 	}
 }
 
@@ -904,13 +909,17 @@ static void cheetah_flush_icache(void)
 
 static void cheetah_flush_dcache(void)
 {
-	unsigned long i;
+	unsigned int dcache_size, dcache_line_size;
+	unsigned long addr;
+
+	dcache_size = local_cpu_data().dcache_size;
+	dcache_line_size = local_cpu_data().dcache_line_size;
 
-	for (i = 0; i < (1 << 16); i += (1 << 5)) {
+	for (addr = 0; addr < dcache_size; addr += dcache_line_size) {
 		__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
 				     "membar #Sync"
 				     : /* no outputs */
-				     : "r" (i), "i" (ASI_DCACHE_TAG));
+				     : "r" (addr), "i" (ASI_DCACHE_TAG));
 	}
 }
 
@@ -921,24 +930,29 @@ static void cheetah_flush_dcache(void)
  */
 static void cheetah_plus_zap_dcache_parity(void)
 {
-	unsigned long i;
+	unsigned int dcache_size, dcache_line_size;
+	unsigned long addr;
+
+	dcache_size = local_cpu_data().dcache_size;
+	dcache_line_size = local_cpu_data().dcache_line_size;
 
-	for (i = 0; i < (1 << 16); i += (1 << 5)) {
-		unsigned long tag = (i >> 14);
-		unsigned long j;
+	for (addr = 0; addr < dcache_size; addr += dcache_line_size) {
+		unsigned long tag = (addr >> 14);
+		unsigned long line;
 
 		__asm__ __volatile__("membar	#Sync\n\t"
 				     "stxa	%0, [%1] %2\n\t"
 				     "membar	#Sync"
 				     : /* no outputs */
-				     : "r" (tag), "r" (i),
+				     : "r" (tag), "r" (addr),
 				       "i" (ASI_DCACHE_UTAG));
-		for (j = i; j < i + (1 << 5); j += (1 << 3))
+		for (line = addr; line < addr + dcache_line_size; line += 8)
 			__asm__ __volatile__("membar	#Sync\n\t"
 					     "stxa	%%g0, [%0] %1\n\t"
 					     "membar	#Sync"
 					     : /* no outputs */
-					     : "r" (j), "i" (ASI_DCACHE_DATA));
+					     : "r" (line),
+					       "i" (ASI_DCACHE_DATA));
 	}
 }
 
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index f47d0be39378a2a7603e4470d0bb5962c1e2dc9e..2af0cf0a86409fbc288f61061ebd953c19d91950 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -9,8 +9,7 @@ ENTRY(_start)
 jiffies = jiffies_64;
 SECTIONS
 {
-  swapper_pmd_dir = 0x0000000000402000;
-  empty_pg_dir = 0x0000000000403000;
+  swapper_low_pmd_dir = 0x0000000000402000;
   . = 0x4000;
   .text 0x0000000000404000 :
   {
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index fdb1ebb308c95bad153283b87e8665b177e0df2d..9f6ca624892d78ccdbd1af9cbefa499f7191ce4e 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -20,6 +20,7 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/kprobes.h>
+#include <linux/cache.h>
 
 #include <asm/head.h>
 #include <asm/system.h>
@@ -42,22 +43,13 @@ extern void device_scan(void);
 
 struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
 
-unsigned long *sparc64_valid_addr_bitmap;
+unsigned long *sparc64_valid_addr_bitmap __read_mostly;
 
 /* Ugly, but necessary... -DaveM */
-unsigned long phys_base;
-unsigned long kern_base;
-unsigned long kern_size;
-unsigned long pfn_base;
-
-/* This is even uglier. We have a problem where the kernel may not be
- * located at phys_base. However, initial __alloc_bootmem() calls need to
- * be adjusted to be within the 4-8Megs that the kernel is mapped to, else
- * those page mappings wont work. Things are ok after inherit_prom_mappings
- * is called though. Dave says he'll clean this up some other time.
- * -- BenC
- */
-static unsigned long bootmap_base;
+unsigned long phys_base __read_mostly;
+unsigned long kern_base __read_mostly;
+unsigned long kern_size __read_mostly;
+unsigned long pfn_base __read_mostly;
 
 /* get_new_mmu_context() uses "cache + 1".  */
 DEFINE_SPINLOCK(ctx_alloc_lock);
@@ -73,7 +65,7 @@ extern unsigned long sparc_ramdisk_image64;
 extern unsigned int sparc_ramdisk_image;
 extern unsigned int sparc_ramdisk_size;
 
-struct page *mem_map_zero;
+struct page *mem_map_zero __read_mostly;
 
 int bigkernel = 0;
 
@@ -179,8 +171,6 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
 			     : "g1", "g7");
 }
 
-extern void __update_mmu_cache(unsigned long mmu_context_hw, unsigned long address, pte_t pte, int code);
-
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
 	struct page *page;
@@ -207,10 +197,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
 
 		put_cpu();
 	}
-
-	if (get_thread_fault_code())
-		__update_mmu_cache(CTX_NRBITS(vma->vm_mm->context),
-				   address, pte, get_thread_fault_code());
 }
 
 void flush_dcache_page(struct page *page)
@@ -309,6 +295,7 @@ struct linux_prom_translation {
 	unsigned long size;
 	unsigned long data;
 };
+static struct linux_prom_translation prom_trans[512] __initdata;
 
 extern unsigned long prom_boot_page;
 extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle);
@@ -318,14 +305,63 @@ extern void register_prom_callbacks(void);
 /* Exported for SMP bootup purposes. */
 unsigned long kern_locked_tte_data;
 
-void __init early_pgtable_allocfail(char *type)
+/* Exported for kernel TLB miss handling in ktlb.S */
+unsigned long prom_pmd_phys __read_mostly;
+unsigned int swapper_pgd_zero __read_mostly;
+
+/* Allocate power-of-2 aligned chunks from the end of the
+ * kernel image.  Return physical address.
+ */
+static inline unsigned long early_alloc_phys(unsigned long size)
+{
+	unsigned long base;
+
+	BUILD_BUG_ON(size & (size - 1));
+
+	kern_size = (kern_size + (size - 1)) & ~(size - 1);
+	base = kern_base + kern_size;
+	kern_size += size;
+
+	return base;
+}
+
+static inline unsigned long load_phys32(unsigned long pa)
+{
+	unsigned long val;
+
+	__asm__ __volatile__("lduwa	[%1] %2, %0"
+			     : "=&r" (val)
+			     : "r" (pa), "i" (ASI_PHYS_USE_EC));
+
+	return val;
+}
+
+static inline unsigned long load_phys64(unsigned long pa)
+{
+	unsigned long val;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=&r" (val)
+			     : "r" (pa), "i" (ASI_PHYS_USE_EC));
+
+	return val;
+}
+
+static inline void store_phys32(unsigned long pa, unsigned long val)
+{
+	__asm__ __volatile__("stwa	%0, [%1] %2"
+			     : /* no outputs */
+			     : "r" (val), "r" (pa), "i" (ASI_PHYS_USE_EC));
+}
+
+static inline void store_phys64(unsigned long pa, unsigned long val)
 {
-	prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type);
-	prom_halt();
+	__asm__ __volatile__("stxa	%0, [%1] %2"
+			     : /* no outputs */
+			     : "r" (val), "r" (pa), "i" (ASI_PHYS_USE_EC));
 }
 
 #define BASE_PAGE_SIZE 8192
-static pmd_t *prompmd;
 
 /*
  * Translate PROM's mapping we capture at boot time into physical address.
@@ -333,278 +369,172 @@ static pmd_t *prompmd;
  */
 unsigned long prom_virt_to_phys(unsigned long promva, int *error)
 {
-	pmd_t *pmdp = prompmd + ((promva >> 23) & 0x7ff);
-	pte_t *ptep;
+	unsigned long pmd_phys = (prom_pmd_phys +
+				  ((promva >> 23) & 0x7ff) * sizeof(pmd_t));
+	unsigned long pte_phys;
+	pmd_t pmd_ent;
+	pte_t pte_ent;
 	unsigned long base;
 
-	if (pmd_none(*pmdp)) {
+	pmd_val(pmd_ent) = load_phys32(pmd_phys);
+	if (pmd_none(pmd_ent)) {
 		if (error)
 			*error = 1;
-		return(0);
+		return 0;
 	}
-	ptep = (pte_t *)__pmd_page(*pmdp) + ((promva >> 13) & 0x3ff);
-	if (!pte_present(*ptep)) {
+
+	pte_phys = (unsigned long)pmd_val(pmd_ent) << 11UL;
+	pte_phys += ((promva >> 13) & 0x3ff) * sizeof(pte_t);
+	pte_val(pte_ent) = load_phys64(pte_phys);
+	if (!pte_present(pte_ent)) {
 		if (error)
 			*error = 1;
-		return(0);
+		return 0;
 	}
 	if (error) {
 		*error = 0;
-		return(pte_val(*ptep));
+		return pte_val(pte_ent);
 	}
-	base = pte_val(*ptep) & _PAGE_PADDR;
-	return(base + (promva & (BASE_PAGE_SIZE - 1)));
+	base = pte_val(pte_ent) & _PAGE_PADDR;
+	return (base + (promva & (BASE_PAGE_SIZE - 1)));
 }
 
-static void inherit_prom_mappings(void)
+/* The obp translations are saved based on 8k pagesize, since obp can
+ * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
+ * HI_OBP_ADDRESS range are handled in entry.S and do not use the vpte
+ * scheme (also, see rant in inherit_locked_prom_mappings()).
+ */
+static void __init build_obp_range(unsigned long start, unsigned long end, unsigned long data)
 {
-	struct linux_prom_translation *trans;
-	unsigned long phys_page, tte_vaddr, tte_data;
-	void (*remap_func)(unsigned long, unsigned long, int);
-	pmd_t *pmdp;
-	pte_t *ptep;
-	int node, n, i, tsz;
-	extern unsigned int obp_iaddr_patch[2], obp_daddr_patch[2];
+	unsigned long vaddr;
 
-	node = prom_finddevice("/virtual-memory");
-	n = prom_getproplen(node, "translations");
-	if (n == 0 || n == -1) {
-		prom_printf("Couldn't get translation property\n");
-		prom_halt();
-	}
-	n += 5 * sizeof(struct linux_prom_translation);
-	for (tsz = 1; tsz < n; tsz <<= 1)
-		/* empty */;
-	trans = __alloc_bootmem(tsz, SMP_CACHE_BYTES, bootmap_base);
-	if (trans == NULL) {
-		prom_printf("inherit_prom_mappings: Cannot alloc translations.\n");
-		prom_halt();
-	}
-	memset(trans, 0, tsz);
+	for (vaddr = start; vaddr < end; vaddr += BASE_PAGE_SIZE) {
+		unsigned long val, pte_phys, pmd_phys;
+		pmd_t pmd_ent;
+		int i;
 
-	if ((n = prom_getproperty(node, "translations", (char *)trans, tsz)) == -1) {
-		prom_printf("Couldn't get translation property\n");
-		prom_halt();
-	}
-	n = n / sizeof(*trans);
+		pmd_phys = (prom_pmd_phys +
+			    (((vaddr >> 23) & 0x7ff) * sizeof(pmd_t)));
+		pmd_val(pmd_ent) = load_phys32(pmd_phys);
+		if (pmd_none(pmd_ent)) {
+			pte_phys = early_alloc_phys(BASE_PAGE_SIZE);
 
-	/*
-	 * The obp translations are saved based on 8k pagesize, since obp can
-	 * use a mixture of pagesizes. Misses to the 0xf0000000 - 0x100000000,
-	 * ie obp range, are handled in entry.S and do not use the vpte scheme
-	 * (see rant in inherit_locked_prom_mappings()).
-	 */
-#define OBP_PMD_SIZE 2048
-	prompmd = __alloc_bootmem(OBP_PMD_SIZE, OBP_PMD_SIZE, bootmap_base);
-	if (prompmd == NULL)
-		early_pgtable_allocfail("pmd");
-	memset(prompmd, 0, OBP_PMD_SIZE);
-	for (i = 0; i < n; i++) {
-		unsigned long vaddr;
-
-		if (trans[i].virt >= LOW_OBP_ADDRESS && trans[i].virt < HI_OBP_ADDRESS) {
-			for (vaddr = trans[i].virt;
-			     ((vaddr < trans[i].virt + trans[i].size) && 
-			     (vaddr < HI_OBP_ADDRESS));
-			     vaddr += BASE_PAGE_SIZE) {
-				unsigned long val;
-
-				pmdp = prompmd + ((vaddr >> 23) & 0x7ff);
-				if (pmd_none(*pmdp)) {
-					ptep = __alloc_bootmem(BASE_PAGE_SIZE,
-							       BASE_PAGE_SIZE,
-							       bootmap_base);
-					if (ptep == NULL)
-						early_pgtable_allocfail("pte");
-					memset(ptep, 0, BASE_PAGE_SIZE);
-					pmd_set(pmdp, ptep);
-				}
-				ptep = (pte_t *)__pmd_page(*pmdp) +
-						((vaddr >> 13) & 0x3ff);
+			for (i = 0; i < BASE_PAGE_SIZE / sizeof(pte_t); i++)
+				store_phys64(pte_phys+i*sizeof(pte_t),0);
 
-				val = trans[i].data;
+			pmd_val(pmd_ent) = pte_phys >> 11UL;
+			store_phys32(pmd_phys, pmd_val(pmd_ent));
+		}
 
-				/* Clear diag TTE bits. */
-				if (tlb_type == spitfire)
-					val &= ~0x0003fe0000000000UL;
+		pte_phys = (unsigned long)pmd_val(pmd_ent) << 11UL;
+		pte_phys += (((vaddr >> 13) & 0x3ff) * sizeof(pte_t));
 
-				set_pte_at(&init_mm, vaddr,
-					   ptep, __pte(val | _PAGE_MODIFIED));
-				trans[i].data += BASE_PAGE_SIZE;
-			}
-		}
-	}
-	phys_page = __pa(prompmd);
-	obp_iaddr_patch[0] |= (phys_page >> 10);
-	obp_iaddr_patch[1] |= (phys_page & 0x3ff);
-	flushi((long)&obp_iaddr_patch[0]);
-	obp_daddr_patch[0] |= (phys_page >> 10);
-	obp_daddr_patch[1] |= (phys_page & 0x3ff);
-	flushi((long)&obp_daddr_patch[0]);
+		val = data;
 
-	/* Now fixup OBP's idea about where we really are mapped. */
-	prom_printf("Remapping the kernel... ");
+		/* Clear diag TTE bits. */
+		if (tlb_type == spitfire)
+			val &= ~0x0003fe0000000000UL;
 
-	/* Spitfire Errata #32 workaround */
-	/* NOTE: Using plain zero for the context value is
-	 *       correct here, we are not using the Linux trap
-	 *       tables yet so we should not use the special
-	 *       UltraSPARC-III+ page size encodings yet.
-	 */
-	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
-			     "flush	%%g6"
-			     : /* No outputs */
-			     : "r" (0), "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
-	switch (tlb_type) {
-	default:
-	case spitfire:
-		phys_page = spitfire_get_dtlb_data(sparc64_highest_locked_tlbent());
-		break;
-
-	case cheetah:
-	case cheetah_plus:
-		phys_page = cheetah_get_litlb_data(sparc64_highest_locked_tlbent());
-		break;
-	};
-
-	phys_page &= _PAGE_PADDR;
-	phys_page += ((unsigned long)&prom_boot_page -
-		      (unsigned long)KERNBASE);
+		store_phys64(pte_phys, val | _PAGE_MODIFIED);
 
-	if (tlb_type == spitfire) {
-		/* Lock this into i/d tlb entry 59 */
-		__asm__ __volatile__(
-			"stxa	%%g0, [%2] %3\n\t"
-			"stxa	%0, [%1] %4\n\t"
-			"membar	#Sync\n\t"
-			"flush	%%g6\n\t"
-			"stxa	%%g0, [%2] %5\n\t"
-			"stxa	%0, [%1] %6\n\t"
-			"membar	#Sync\n\t"
-			"flush	%%g6"
-			: : "r" (phys_page | _PAGE_VALID | _PAGE_SZ8K | _PAGE_CP |
-				 _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W),
-			"r" (59 << 3), "r" (TLB_TAG_ACCESS),
-			"i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS),
-			"i" (ASI_IMMU), "i" (ASI_ITLB_DATA_ACCESS)
-			: "memory");
-	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-		/* Lock this into i/d tlb-0 entry 11 */
-		__asm__ __volatile__(
-			"stxa	%%g0, [%2] %3\n\t"
-			"stxa	%0, [%1] %4\n\t"
-			"membar	#Sync\n\t"
-			"flush	%%g6\n\t"
-			"stxa	%%g0, [%2] %5\n\t"
-			"stxa	%0, [%1] %6\n\t"
-			"membar	#Sync\n\t"
-			"flush	%%g6"
-			: : "r" (phys_page | _PAGE_VALID | _PAGE_SZ8K | _PAGE_CP |
-				 _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W),
-			"r" ((0 << 16) | (11 << 3)), "r" (TLB_TAG_ACCESS),
-			"i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS),
-			"i" (ASI_IMMU), "i" (ASI_ITLB_DATA_ACCESS)
-			: "memory");
-	} else {
-		/* Implement me :-) */
-		BUG();
+		data += BASE_PAGE_SIZE;
 	}
+}
 
-	tte_vaddr = (unsigned long) KERNBASE;
+static inline int in_obp_range(unsigned long vaddr)
+{
+	return (vaddr >= LOW_OBP_ADDRESS &&
+		vaddr < HI_OBP_ADDRESS);
+}
 
-	/* Spitfire Errata #32 workaround */
-	/* NOTE: Using plain zero for the context value is
-	 *       correct here, we are not using the Linux trap
-	 *       tables yet so we should not use the special
-	 *       UltraSPARC-III+ page size encodings yet.
-	 */
-	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
-			     "flush	%%g6"
-			     : /* No outputs */
-			     : "r" (0),
-			     "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
-	if (tlb_type == spitfire)
-		tte_data = spitfire_get_dtlb_data(sparc64_highest_locked_tlbent());
-	else
-		tte_data = cheetah_get_ldtlb_data(sparc64_highest_locked_tlbent());
+#define OBP_PMD_SIZE 2048
+static void __init build_obp_pgtable(int prom_trans_ents)
+{
+	unsigned long i;
 
-	kern_locked_tte_data = tte_data;
+	prom_pmd_phys = early_alloc_phys(OBP_PMD_SIZE);
+	for (i = 0; i < OBP_PMD_SIZE; i += 4)
+		store_phys32(prom_pmd_phys + i, 0);
+
+	for (i = 0; i < prom_trans_ents; i++) {
+		unsigned long start, end;
 
-	remap_func = (void *)  ((unsigned long) &prom_remap -
-				(unsigned long) &prom_boot_page);
+		if (!in_obp_range(prom_trans[i].virt))
+			continue;
 
+		start = prom_trans[i].virt;
+		end = start + prom_trans[i].size;
+		if (end > HI_OBP_ADDRESS)
+			end = HI_OBP_ADDRESS;
 
-	/* Spitfire Errata #32 workaround */
-	/* NOTE: Using plain zero for the context value is
-	 *       correct here, we are not using the Linux trap
-	 *       tables yet so we should not use the special
-	 *       UltraSPARC-III+ page size encodings yet.
-	 */
-	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
-			     "flush	%%g6"
-			     : /* No outputs */
-			     : "r" (0),
-			     "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
-	remap_func((tlb_type == spitfire ?
-		    (spitfire_get_dtlb_data(sparc64_highest_locked_tlbent()) & _PAGE_PADDR) :
-		    (cheetah_get_litlb_data(sparc64_highest_locked_tlbent()) & _PAGE_PADDR)),
-		   (unsigned long) KERNBASE,
-		   prom_get_mmu_ihandle());
-
-	if (bigkernel)
-		remap_func(((tte_data + 0x400000) & _PAGE_PADDR),
-			(unsigned long) KERNBASE + 0x400000, prom_get_mmu_ihandle());
-
-	/* Flush out that temporary mapping. */
-	spitfire_flush_dtlb_nucleus_page(0x0);
-	spitfire_flush_itlb_nucleus_page(0x0);
-
-	/* Now lock us back into the TLBs via OBP. */
-	prom_dtlb_load(sparc64_highest_locked_tlbent(), tte_data, tte_vaddr);
-	prom_itlb_load(sparc64_highest_locked_tlbent(), tte_data, tte_vaddr);
-	if (bigkernel) {
-		prom_dtlb_load(sparc64_highest_locked_tlbent()-1, tte_data + 0x400000, 
-								tte_vaddr + 0x400000);
-		prom_itlb_load(sparc64_highest_locked_tlbent()-1, tte_data + 0x400000, 
-								tte_vaddr + 0x400000);
+		build_obp_range(start, end, prom_trans[i].data);
 	}
+}
 
-	/* Re-read translations property. */
-	if ((n = prom_getproperty(node, "translations", (char *)trans, tsz)) == -1) {
-		prom_printf("Couldn't get translation property\n");
+/* Read OBP translations property into 'prom_trans[]'.
+ * Return the number of entries.
+ */
+static int __init read_obp_translations(void)
+{
+	int n, node;
+
+	node = prom_finddevice("/virtual-memory");
+	n = prom_getproplen(node, "translations");
+	if (unlikely(n == 0 || n == -1)) {
+		prom_printf("prom_mappings: Couldn't get size.\n");
+		prom_halt();
+	}
+	if (unlikely(n > sizeof(prom_trans))) {
+		prom_printf("prom_mappings: Size %Zd is too big.\n", n);
 		prom_halt();
 	}
-	n = n / sizeof(*trans);
 
-	for (i = 0; i < n; i++) {
-		unsigned long vaddr = trans[i].virt;
-		unsigned long size = trans[i].size;
+	if ((n = prom_getproperty(node, "translations",
+				  (char *)&prom_trans[0],
+				  sizeof(prom_trans))) == -1) {
+		prom_printf("prom_mappings: Couldn't get property.\n");
+		prom_halt();
+	}
+	n = n / sizeof(struct linux_prom_translation);
+	return n;
+}
 
-		if (vaddr < 0xf0000000UL) {
-			unsigned long avoid_start = (unsigned long) KERNBASE;
-			unsigned long avoid_end = avoid_start + (4 * 1024 * 1024);
+static void __init remap_kernel(void)
+{
+	unsigned long phys_page, tte_vaddr, tte_data;
+	int tlb_ent = sparc64_highest_locked_tlbent();
 
-			if (bigkernel)
-				avoid_end += (4 * 1024 * 1024);
-			if (vaddr < avoid_start) {
-				unsigned long top = vaddr + size;
+	tte_vaddr = (unsigned long) KERNBASE;
+	phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
+	tte_data = (phys_page | (_PAGE_VALID | _PAGE_SZ4MB |
+				 _PAGE_CP | _PAGE_CV | _PAGE_P |
+				 _PAGE_L | _PAGE_W));
 
-				if (top > avoid_start)
-					top = avoid_start;
-				prom_unmap(top - vaddr, vaddr);
-			}
-			if ((vaddr + size) > avoid_end) {
-				unsigned long bottom = vaddr;
+	kern_locked_tte_data = tte_data;
 
-				if (bottom < avoid_end)
-					bottom = avoid_end;
-				prom_unmap((vaddr + size) - bottom, bottom);
-			}
-		}
+	/* Now lock us into the TLBs via OBP. */
+	prom_dtlb_load(tlb_ent, tte_data, tte_vaddr);
+	prom_itlb_load(tlb_ent, tte_data, tte_vaddr);
+	if (bigkernel) {
+		prom_dtlb_load(tlb_ent - 1,
+			       tte_data + 0x400000, 
+			       tte_vaddr + 0x400000);
+		prom_itlb_load(tlb_ent - 1,
+			       tte_data + 0x400000, 
+			       tte_vaddr + 0x400000);
 	}
+}
+
+static void __init inherit_prom_mappings(void)
+{
+	int n;
+
+	n = read_obp_translations();
+	build_obp_pgtable(n);
+
+	/* Now fixup OBP's idea about where we really are mapped. */
+	prom_printf("Remapping the kernel... ");
+	remap_kernel();
 
 	prom_printf("done.\n");
 
@@ -1347,8 +1277,6 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 #endif
 	bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, end_pfn);
 
-	bootmap_base = bootmap_pfn << PAGE_SHIFT;
-
 	/* Now register the available physical memory with the
 	 * allocator.
 	 */
@@ -1398,120 +1326,142 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 	return end_pfn;
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot)
+{
+	unsigned long vstart = PAGE_OFFSET + pstart;
+	unsigned long vend = PAGE_OFFSET + pend;
+	unsigned long alloc_bytes = 0UL;
+
+	if ((vstart & ~PAGE_MASK) || (vend & ~PAGE_MASK)) {
+		prom_printf("kernel_map: Unaligned sp_banks[%lx:%lx]\n",
+			    vstart, vend);
+		prom_halt();
+	}
+
+	while (vstart < vend) {
+		unsigned long this_end, paddr = __pa(vstart);
+		pgd_t *pgd = pgd_offset_k(vstart);
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		pud = pud_offset(pgd, vstart);
+		if (pud_none(*pud)) {
+			pmd_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			alloc_bytes += PAGE_SIZE;
+			pud_populate(&init_mm, pud, new);
+		}
+
+		pmd = pmd_offset(pud, vstart);
+		if (!pmd_present(*pmd)) {
+			pte_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			alloc_bytes += PAGE_SIZE;
+			pmd_populate_kernel(&init_mm, pmd, new);
+		}
+
+		pte = pte_offset_kernel(pmd, vstart);
+		this_end = (vstart + PMD_SIZE) & PMD_MASK;
+		if (this_end > vend)
+			this_end = vend;
+
+		while (vstart < this_end) {
+			pte_val(*pte) = (paddr | pgprot_val(prot));
+
+			vstart += PAGE_SIZE;
+			paddr += PAGE_SIZE;
+			pte++;
+		}
+	}
+
+	return alloc_bytes;
+}
+
+extern struct linux_mlist_p1275 *prom_ptot_ptr;
+extern unsigned int kvmap_linear_patch[1];
+
+static void __init kernel_physical_mapping_init(void)
+{
+	struct linux_mlist_p1275 *p = prom_ptot_ptr;
+	unsigned long mem_alloced = 0UL;
+
+	while (p) {
+		unsigned long phys_start, phys_end;
+
+		phys_start = p->start_adr;
+		phys_end = phys_start + p->num_bytes;
+		mem_alloced += kernel_map_range(phys_start, phys_end,
+						PAGE_KERNEL);
+
+		p = p->theres_more;
+	}
+
+	printk("Allocated %ld bytes for kernel page tables.\n",
+	       mem_alloced);
+
+	kvmap_linear_patch[0] = 0x01000000; /* nop */
+	flushi(&kvmap_linear_patch[0]);
+
+	__flush_tlb_all();
+}
+
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
+	unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
+
+	kernel_map_range(phys_start, phys_end,
+			 (enable ? PAGE_KERNEL : __pgprot(0)));
+
+	/* we should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu.
+	 */
+	__flush_tlb_kernel_range(PAGE_OFFSET + phys_start,
+				 PAGE_OFFSET + phys_end);
+}
+#endif
+
 /* paging_init() sets up the page tables */
 
 extern void cheetah_ecache_flush_init(void);
 
 static unsigned long last_valid_pfn;
+pgd_t swapper_pg_dir[2048];
 
 void __init paging_init(void)
 {
-	extern pmd_t swapper_pmd_dir[1024];
-	extern unsigned int sparc64_vpte_patchme1[1];
-	extern unsigned int sparc64_vpte_patchme2[1];
-	unsigned long alias_base = kern_base + PAGE_OFFSET;
-	unsigned long second_alias_page = 0;
-	unsigned long pt, flags, end_pfn, pages_avail;
-	unsigned long shift = alias_base - ((unsigned long)KERNBASE);
+	unsigned long end_pfn, pages_avail, shift;
 	unsigned long real_end;
 
 	set_bit(0, mmu_context_bmap);
 
+	shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
+
 	real_end = (unsigned long)_end;
 	if ((real_end > ((unsigned long)KERNBASE + 0x400000)))
 		bigkernel = 1;
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (sparc_ramdisk_image || sparc_ramdisk_image64)
-		real_end = (PAGE_ALIGN(real_end) + PAGE_ALIGN(sparc_ramdisk_size));
-#endif
-
-	/* We assume physical memory starts at some 4mb multiple,
-	 * if this were not true we wouldn't boot up to this point
-	 * anyways.
-	 */
-	pt  = kern_base | _PAGE_VALID | _PAGE_SZ4MB;
-	pt |= _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W;
-	local_irq_save(flags);
-	if (tlb_type == spitfire) {
-		__asm__ __volatile__(
-	"	stxa	%1, [%0] %3\n"
-	"	stxa	%2, [%5] %4\n"
-	"	membar	#Sync\n"
-	"	flush	%%g6\n"
-	"	nop\n"
-	"	nop\n"
-	"	nop\n"
-		: /* No outputs */
-		: "r" (TLB_TAG_ACCESS), "r" (alias_base), "r" (pt),
-		  "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS), "r" (61 << 3)
-		: "memory");
-		if (real_end >= KERNBASE + 0x340000) {
-			second_alias_page = alias_base + 0x400000;
-			__asm__ __volatile__(
-		"	stxa	%1, [%0] %3\n"
-		"	stxa	%2, [%5] %4\n"
-		"	membar	#Sync\n"
-		"	flush	%%g6\n"
-		"	nop\n"
-		"	nop\n"
-		"	nop\n"
-			: /* No outputs */
-			: "r" (TLB_TAG_ACCESS), "r" (second_alias_page), "r" (pt + 0x400000),
-			  "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS), "r" (60 << 3)
-			: "memory");
-		}
-	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-		__asm__ __volatile__(
-	"	stxa	%1, [%0] %3\n"
-	"	stxa	%2, [%5] %4\n"
-	"	membar	#Sync\n"
-	"	flush	%%g6\n"
-	"	nop\n"
-	"	nop\n"
-	"	nop\n"
-		: /* No outputs */
-		: "r" (TLB_TAG_ACCESS), "r" (alias_base), "r" (pt),
-		  "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS), "r" ((0<<16) | (13<<3))
-		: "memory");
-		if (real_end >= KERNBASE + 0x340000) {
-			second_alias_page = alias_base + 0x400000;
-			__asm__ __volatile__(
-		"	stxa	%1, [%0] %3\n"
-		"	stxa	%2, [%5] %4\n"
-		"	membar	#Sync\n"
-		"	flush	%%g6\n"
-		"	nop\n"
-		"	nop\n"
-		"	nop\n"
-			: /* No outputs */
-			: "r" (TLB_TAG_ACCESS), "r" (second_alias_page), "r" (pt + 0x400000),
-			  "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS), "r" ((0<<16) | (12<<3))
-			: "memory");
-		}
+	if ((real_end > ((unsigned long)KERNBASE + 0x800000))) {
+		prom_printf("paging_init: Kernel > 8MB, too large.\n");
+		prom_halt();
 	}
-	local_irq_restore(flags);
-	
-	/* Now set kernel pgd to upper alias so physical page computations
+
+	/* Set kernel pgd to upper alias so physical page computations
 	 * work.
 	 */
 	init_mm.pgd += ((shift) / (sizeof(pgd_t)));
 	
-	memset(swapper_pmd_dir, 0, sizeof(swapper_pmd_dir));
+	memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir));
 
 	/* Now can init the kernel/bad page tables. */
 	pud_set(pud_offset(&swapper_pg_dir[0], 0),
-		swapper_pmd_dir + (shift / sizeof(pgd_t)));
+		swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
 	
-	sparc64_vpte_patchme1[0] |=
-		(((unsigned long)pgd_val(init_mm.pgd[0])) >> 10);
-	sparc64_vpte_patchme2[0] |=
-		(((unsigned long)pgd_val(init_mm.pgd[0])) & 0x3ff);
-	flushi((long)&sparc64_vpte_patchme1[0]);
+	swapper_pgd_zero = pgd_val(swapper_pg_dir[0]);
 	
-	/* Setup bootmem... */
-	pages_avail = 0;
-	last_valid_pfn = end_pfn = bootmem_init(&pages_avail);
-
 	/* Inherit non-locked OBP mappings. */
 	inherit_prom_mappings();
 	
@@ -1527,13 +1477,16 @@ void __init paging_init(void)
 
 	inherit_locked_prom_mappings(1);
 
-	/* We only created DTLB mapping of this stuff. */
-	spitfire_flush_dtlb_nucleus_page(alias_base);
-	if (second_alias_page)
-		spitfire_flush_dtlb_nucleus_page(second_alias_page);
-
 	__flush_tlb_all();
 
+	/* Setup bootmem... */
+	pages_avail = 0;
+	last_valid_pfn = end_pfn = bootmem_init(&pages_avail);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	kernel_physical_mapping_init();
+#endif
+
 	{
 		unsigned long zones_size[MAX_NR_ZONES];
 		unsigned long zholes_size[MAX_NR_ZONES];
@@ -1695,8 +1648,7 @@ void __init mem_init(void)
 
 	i = last_valid_pfn >> ((22 - PAGE_SHIFT) + 6);
 	i += 1;
-	sparc64_valid_addr_bitmap = (unsigned long *)
-		__alloc_bootmem(i << 3, SMP_CACHE_BYTES, bootmap_base);
+	sparc64_valid_addr_bitmap = (unsigned long *) alloc_bootmem(i << 3);
 	if (sparc64_valid_addr_bitmap == NULL) {
 		prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
 		prom_halt();
@@ -1749,7 +1701,7 @@ void __init mem_init(void)
 		cheetah_ecache_flush_init();
 }
 
-void free_initmem (void)
+void free_initmem(void)
 {
 	unsigned long addr, initend;
 
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index b2ee9b53227f5746b19351dc20ea873231831a58..058b8126c1a72793d939d72b92440f56d806fa59 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -144,42 +144,29 @@ __flush_icache_page:	/* %o0 = phys_page */
 
 #define DTAG_MASK 0x3
 
+	/* This routine is Spitfire specific so the hardcoded
+	 * D-cache size and line-size are OK.
+	 */
 	.align		64
 	.globl		__flush_dcache_page
 __flush_dcache_page:	/* %o0=kaddr, %o1=flush_icache */
 	sethi		%uhi(PAGE_OFFSET), %g1
 	sllx		%g1, 32, %g1
-	sub		%o0, %g1, %o0
-	clr		%o4
-	srlx		%o0, 11, %o0
-	sethi		%hi(1 << 14), %o2
-1:	ldxa		[%o4] ASI_DCACHE_TAG, %o3	! LSU	Group
-	add		%o4, (1 << 5), %o4		! IEU0
-	ldxa		[%o4] ASI_DCACHE_TAG, %g1	! LSU	Group
-	add		%o4, (1 << 5), %o4		! IEU0
-	ldxa		[%o4] ASI_DCACHE_TAG, %g2	! LSU	Group	o3 available
-	add		%o4, (1 << 5), %o4		! IEU0
-	andn		%o3, DTAG_MASK, %o3		! IEU1
-	ldxa		[%o4] ASI_DCACHE_TAG, %g3	! LSU	Group
-	add		%o4, (1 << 5), %o4		! IEU0
-	andn		%g1, DTAG_MASK, %g1		! IEU1
-	cmp		%o0, %o3			! IEU1	Group
-	be,a,pn		%xcc, dflush1			! CTI
-	 sub		%o4, (4 << 5), %o4		! IEU0	(Group)
-	cmp		%o0, %g1			! IEU1	Group
-	andn		%g2, DTAG_MASK, %g2		! IEU0
-	be,a,pn		%xcc, dflush2			! CTI
-	 sub		%o4, (3 << 5), %o4		! IEU0	(Group)
-	cmp		%o0, %g2			! IEU1	Group
-	andn		%g3, DTAG_MASK, %g3		! IEU0
-	be,a,pn		%xcc, dflush3			! CTI
-	 sub		%o4, (2 << 5), %o4		! IEU0	(Group)
-	cmp		%o0, %g3			! IEU1	Group
-	be,a,pn		%xcc, dflush4			! CTI
-	 sub		%o4, (1 << 5), %o4		! IEU0
-2:	cmp		%o4, %o2			! IEU1	Group
-	bne,pt		%xcc, 1b			! CTI
-	 nop						! IEU0
+	sub		%o0, %g1, %o0			! physical address
+	srlx		%o0, 11, %o0			! make D-cache TAG
+	sethi		%hi(1 << 14), %o2		! D-cache size
+	sub		%o2, (1 << 5), %o2		! D-cache line size
+1:	ldxa		[%o2] ASI_DCACHE_TAG, %o3	! load D-cache TAG
+	andcc		%o3, DTAG_MASK, %g0		! Valid?
+	be,pn		%xcc, 2f			! Nope, branch
+	 andn		%o3, DTAG_MASK, %o3		! Clear valid bits
+	cmp		%o3, %o0			! TAG match?
+	bne,pt		%xcc, 2f			! Nope, branch
+	 nop
+	stxa		%g0, [%o2] ASI_DCACHE_TAG	! Invalidate TAG
+	membar		#Sync
+2:	brnz,pt		%o2, 1b
+	 sub		%o2, (1 << 5), %o2		! D-cache line size
 
 	/* The I-cache does not snoop local stores so we
 	 * better flush that too when necessary.
@@ -189,48 +176,9 @@ __flush_dcache_page:	/* %o0=kaddr, %o1=flush_icache */
 	retl
 	 nop
 
-dflush1:stxa		%g0, [%o4] ASI_DCACHE_TAG
-	add		%o4, (1 << 5), %o4
-dflush2:stxa		%g0, [%o4] ASI_DCACHE_TAG
-	add		%o4, (1 << 5), %o4
-dflush3:stxa		%g0, [%o4] ASI_DCACHE_TAG
-	add		%o4, (1 << 5), %o4
-dflush4:stxa		%g0, [%o4] ASI_DCACHE_TAG
-	add		%o4, (1 << 5), %o4
-	membar		#Sync
-	ba,pt		%xcc, 2b
-	 nop
 #endif /* DCACHE_ALIASING_POSSIBLE */
 
-	.previous .text
-	.align		32
-__prefill_dtlb:
-	rdpr		%pstate, %g7
-	wrpr		%g7, PSTATE_IE, %pstate
-	mov		TLB_TAG_ACCESS, %g1
-	stxa		%o5, [%g1] ASI_DMMU
-	stxa		%o2, [%g0] ASI_DTLB_DATA_IN
-	flush		%g6
-	retl
-	 wrpr		%g7, %pstate
-__prefill_itlb:
-	rdpr		%pstate, %g7
-	wrpr		%g7, PSTATE_IE, %pstate
-	mov		TLB_TAG_ACCESS, %g1
-	stxa		%o5, [%g1] ASI_IMMU
-	stxa		%o2, [%g0] ASI_ITLB_DATA_IN
-	flush		%g6
-	retl
-	 wrpr		%g7, %pstate
-
-	.globl		__update_mmu_cache
-__update_mmu_cache:	/* %o0=hw_context, %o1=address, %o2=pte, %o3=fault_code */
-	srlx		%o1, PAGE_SHIFT, %o1
-	andcc		%o3, FAULT_CODE_DTLB, %g0
-	sllx		%o1, PAGE_SHIFT, %o5
-	bne,pt		%xcc, __prefill_dtlb
-	 or		%o5, %o0, %o5
-	ba,a,pt		%xcc, __prefill_itlb
+	.previous
 
 	/* Cheetah specific versions, patched at boot time. */
 __cheetah_flush_tlb_mm: /* 18 insns */
@@ -283,7 +231,7 @@ __cheetah_flush_tlb_pending:	/* 26 insns */
 	 wrpr		%g7, 0x0, %pstate
 
 #ifdef DCACHE_ALIASING_POSSIBLE
-flush_dcpage_cheetah: /* 11 insns */
+__cheetah_flush_dcache_page: /* 11 insns */
 	sethi		%uhi(PAGE_OFFSET), %g1
 	sllx		%g1, 32, %g1
 	sub		%o0, %g1, %o0
@@ -329,8 +277,8 @@ cheetah_patch_cachetlbops:
 #ifdef DCACHE_ALIASING_POSSIBLE
 	sethi		%hi(__flush_dcache_page), %o0
 	or		%o0, %lo(__flush_dcache_page), %o0
-	sethi		%hi(flush_dcpage_cheetah), %o1
-	or		%o1, %lo(flush_dcpage_cheetah), %o1
+	sethi		%hi(__cheetah_flush_dcache_page), %o1
+	or		%o1, %lo(__cheetah_flush_dcache_page), %o1
 	call		cheetah_patch_one
 	 mov		11, %o2
 #endif /* DCACHE_ALIASING_POSSIBLE */
diff --git a/arch/sparc64/prom/Makefile b/arch/sparc64/prom/Makefile
index 8f2420d9e9e60a6ece3af6a07cb722db50ceed1f..c7898a5ee456ba0838afb6fc567551fdde3489b1 100644
--- a/arch/sparc64/prom/Makefile
+++ b/arch/sparc64/prom/Makefile
@@ -7,4 +7,4 @@ EXTRA_AFLAGS := -ansi
 EXTRA_CFLAGS := -Werror
 
 lib-y   := bootstr.o devops.o init.o memory.o misc.o \
-	   tree.o console.o printf.o p1275.o map.o cif.o
+	   tree.o console.o printf.o p1275.o cif.o
diff --git a/arch/sparc64/prom/console.c b/arch/sparc64/prom/console.c
index 028a53fcb1ec20e949cc925302327c9d03028d62..eae5db8dda56468ecaee4db7dfcf8fd01e60f6df 100644
--- a/arch/sparc64/prom/console.c
+++ b/arch/sparc64/prom/console.c
@@ -67,7 +67,7 @@ prom_putchar(char c)
 }
 
 void
-prom_puts(char *s, int len)
+prom_puts(const char *s, int len)
 {
 	p1275_cmd("write", P1275_ARG(1,P1275_ARG_IN_BUF)|
 			   P1275_INOUT(3,1),
diff --git a/arch/sparc64/prom/devops.c b/arch/sparc64/prom/devops.c
index 2c99b21b6981fac5ecf50b6479787b7192ce72f3..4641839eb39a9184966f14d07c30ab5131f1e706 100644
--- a/arch/sparc64/prom/devops.c
+++ b/arch/sparc64/prom/devops.c
@@ -16,7 +16,7 @@
  * Returns 0 on failure.
  */
 int
-prom_devopen(char *dstr)
+prom_devopen(const char *dstr)
 {
 	return p1275_cmd ("open", P1275_ARG(0,P1275_ARG_IN_STRING)|
 				  P1275_INOUT(1,1),
diff --git a/arch/sparc64/prom/init.c b/arch/sparc64/prom/init.c
index 817faae058cd4116740f9a53f163d47a9beca422..8b4b622d09098aaf439bb40bcd47aea8da8d81fe 100644
--- a/arch/sparc64/prom/init.c
+++ b/arch/sparc64/prom/init.c
@@ -46,7 +46,7 @@ void __init prom_init(void *cif_handler, void *cif_stack)
 	if((prom_root_node == 0) || (prom_root_node == -1))
 		prom_halt();
 
-	prom_chosen_node = prom_finddevice("/chosen");
+	prom_chosen_node = prom_finddevice(prom_chosen_path);
 	if (!prom_chosen_node || prom_chosen_node == -1)
 		prom_halt();
 
diff --git a/arch/sparc64/prom/map.S b/arch/sparc64/prom/map.S
deleted file mode 100644
index 21b3f9c99ea7787d63ae0646166c6bc2290589d0..0000000000000000000000000000000000000000
--- a/arch/sparc64/prom/map.S
+++ /dev/null
@@ -1,72 +0,0 @@
-/* $Id: map.S,v 1.2 1999/11/19 05:53:02 davem Exp $
- * map.S: Tricky coding required to fixup the kernel OBP maps
- *	  properly.
- *
- * Copyright (C) 1999 David S. Miller (davem@redhat.com)
- */
-
-	.text
-	.align	8192
-	.globl	prom_boot_page
-prom_boot_page:
-call_method:
-	.asciz	"call-method"
-	.align	8
-map:
-	.asciz	"map"
-	.align	8
-
-	/* When we are invoked, our caller has remapped us to
-	 * page zero, therefore we must use PC relative addressing
-	 * for everything after we begin performing the unmap/map
-	 * calls.
-	 */
-	.globl	prom_remap
-prom_remap:	/* %o0 = physpage, %o1 = virtpage, %o2 = mmu_ihandle */
-	rd	%pc, %g1
-	srl	%o2, 0, %o2			! kill sign extension
-	sethi	%hi(p1275buf), %g2
-	or	%g2, %lo(p1275buf), %g2
-	ldx	[%g2 + 0x10], %g3		! prom_cif_stack
-	save	%g3, -(192 + 128), %sp
-	ldx	[%g2 + 0x08], %l0		! prom_cif_handler
-	mov	%g6, %i3
-	mov	%g4, %i4
-	mov	%g5, %i5
-	flushw
-
-	sethi	%hi(prom_remap - call_method), %g7
-	or	%g7, %lo(prom_remap - call_method), %g7
-	sub	%g1, %g7, %l2			! call-method string
-	sethi	%hi(prom_remap - map), %g7
-	or	%g7, %lo(prom_remap - map), %g7
-	sub	%g1, %g7, %l4			! map string
-
-	/* OK, map the 4MB region we really live at. */
-	stx	%l2, [%sp + 2047 + 128 + 0x00]	! call-method
-	mov	7, %l5
-	stx	%l5, [%sp + 2047 + 128 + 0x08]	! num_args
-	mov	1, %l5
-	stx	%l5, [%sp + 2047 + 128 + 0x10]	! num_rets
-	stx	%l4, [%sp + 2047 + 128 + 0x18]	! map
-	stx	%i2, [%sp + 2047 + 128 + 0x20]	! mmu_ihandle
-	mov	-1, %l5
-	stx	%l5, [%sp + 2047 + 128 + 0x28]	! mode == default
-	sethi	%hi(4 * 1024 * 1024), %l5
-	stx	%l5, [%sp + 2047 + 128 + 0x30]	! size
-	stx	%i1, [%sp + 2047 + 128 + 0x38]	! vaddr
-	stx	%g0, [%sp + 2047 + 128 + 0x40]	! filler
-	stx	%i0, [%sp + 2047 + 128 + 0x48]	! paddr
-	call	%l0
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	/* Restore hard-coded globals. */
-	mov	%i3, %g6
-	mov	%i4, %g4
-	mov	%i5, %g5
-
-	/* Wheee.... we are done. */
-	ret
-	restore
-
-	.align	8192
diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c
index 19c44e97e9eef88def0a838c4c1fd3f2991dda9b..9b895faf077b8ab02c7206d1356b7fabac6874d2 100644
--- a/arch/sparc64/prom/misc.c
+++ b/arch/sparc64/prom/misc.c
@@ -17,14 +17,14 @@
 #include <asm/system.h>
 
 /* Reset and reboot the machine with the command 'bcommand'. */
-void prom_reboot(char *bcommand)
+void prom_reboot(const char *bcommand)
 {
 	p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) |
 		  P1275_INOUT(1, 0), bcommand);
 }
 
 /* Forth evaluate the expression contained in 'fstring'. */
-void prom_feval(char *fstring)
+void prom_feval(const char *fstring)
 {
 	if (!fstring || fstring[0] == 0)
 		return;
@@ -148,21 +148,19 @@ void prom_set_trap_table(unsigned long tba)
 	p1275_cmd("SUNW,set-trap-table", P1275_INOUT(1, 0), tba);
 }
 
-int mmu_ihandle_cache = 0;
-
 int prom_get_mmu_ihandle(void)
 {
 	int node, ret;
 
-	if (mmu_ihandle_cache != 0)
-		return mmu_ihandle_cache;
+	if (prom_mmu_ihandle_cache != 0)
+		return prom_mmu_ihandle_cache;
 
-	node = prom_finddevice("/chosen");
-	ret = prom_getint(node, "mmu");
+	node = prom_finddevice(prom_chosen_path);
+	ret = prom_getint(node, prom_mmu_name);
 	if (ret == -1 || ret == 0)
-		mmu_ihandle_cache = -1;
+		prom_mmu_ihandle_cache = -1;
 	else
-		mmu_ihandle_cache = ret;
+		prom_mmu_ihandle_cache = ret;
 
 	return ret;
 }
@@ -190,7 +188,7 @@ long prom_itlb_load(unsigned long index,
 		    unsigned long tte_data,
 		    unsigned long vaddr)
 {
-	return p1275_cmd("call-method",
+	return p1275_cmd(prom_callmethod_name,
 			 (P1275_ARG(0, P1275_ARG_IN_STRING) |
 			  P1275_ARG(2, P1275_ARG_IN_64B) |
 			  P1275_ARG(3, P1275_ARG_IN_64B) |
@@ -207,7 +205,7 @@ long prom_dtlb_load(unsigned long index,
 		    unsigned long tte_data,
 		    unsigned long vaddr)
 {
-	return p1275_cmd("call-method",
+	return p1275_cmd(prom_callmethod_name,
 			 (P1275_ARG(0, P1275_ARG_IN_STRING) |
 			  P1275_ARG(2, P1275_ARG_IN_64B) |
 			  P1275_ARG(3, P1275_ARG_IN_64B) |
@@ -223,13 +221,13 @@ long prom_dtlb_load(unsigned long index,
 int prom_map(int mode, unsigned long size,
 	     unsigned long vaddr, unsigned long paddr)
 {
-	int ret = p1275_cmd("call-method",
+	int ret = p1275_cmd(prom_callmethod_name,
 			    (P1275_ARG(0, P1275_ARG_IN_STRING) |
 			     P1275_ARG(3, P1275_ARG_IN_64B) |
 			     P1275_ARG(4, P1275_ARG_IN_64B) |
 			     P1275_ARG(6, P1275_ARG_IN_64B) |
 			     P1275_INOUT(7, 1)),
-			    "map",
+			    prom_map_name,
 			    prom_get_mmu_ihandle(),
 			    mode,
 			    size,
@@ -244,12 +242,12 @@ int prom_map(int mode, unsigned long size,
 
 void prom_unmap(unsigned long size, unsigned long vaddr)
 {
-	p1275_cmd("call-method",
+	p1275_cmd(prom_callmethod_name,
 		  (P1275_ARG(0, P1275_ARG_IN_STRING) |
 		   P1275_ARG(2, P1275_ARG_IN_64B) |
 		   P1275_ARG(3, P1275_ARG_IN_64B) |
 		   P1275_INOUT(4, 0)),
-		  "unmap",
+		  prom_unmap_name,
 		  prom_get_mmu_ihandle(),
 		  size,
 		  vaddr);
@@ -258,7 +256,7 @@ void prom_unmap(unsigned long size, unsigned long vaddr)
 /* Set aside physical memory which is not touched or modified
  * across soft resets.
  */
-unsigned long prom_retain(char *name,
+unsigned long prom_retain(const char *name,
 			  unsigned long pa_low, unsigned long pa_high,
 			  long size, long align)
 {
@@ -290,7 +288,7 @@ int prom_getunumber(int syndrome_code,
 		    unsigned long phys_addr,
 		    char *buf, int buflen)
 {
-	return p1275_cmd("call-method",
+	return p1275_cmd(prom_callmethod_name,
 			 (P1275_ARG(0, P1275_ARG_IN_STRING)	|
 			  P1275_ARG(3, P1275_ARG_OUT_BUF)	|
 			  P1275_ARG(6, P1275_ARG_IN_64B)	|
diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c
index 59fe38bba39e8d47ad56d627cad54f6730b5a86d..a5a7c5712028b2b3401d908baa3b739ca2e0117c 100644
--- a/arch/sparc64/prom/p1275.c
+++ b/arch/sparc64/prom/p1275.c
@@ -46,7 +46,7 @@ static inline unsigned long spitfire_get_primary_context(void)
  */
 DEFINE_SPINLOCK(prom_entry_lock);
 
-long p1275_cmd (char *service, long fmt, ...)
+long p1275_cmd(const char *service, long fmt, ...)
 {
 	char *p, *q;
 	unsigned long flags;
diff --git a/arch/sparc64/prom/printf.c b/arch/sparc64/prom/printf.c
index a6df82cafa0d1f87b19dd0905d3af251feb80b55..660943ee4c2ac7e431822dc1f99cf9e8bac294e7 100644
--- a/arch/sparc64/prom/printf.c
+++ b/arch/sparc64/prom/printf.c
@@ -34,7 +34,7 @@ prom_write(const char *buf, unsigned int n)
 }
 
 void
-prom_printf(char *fmt, ...)
+prom_printf(const char *fmt, ...)
 {
 	va_list args;
 	int i;
diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c
index ccf73258ebf7079c4642c09d0ec82d2108c3ee4a..b1ff9e87dcc6a339b9387f5ae56378adc6274be4 100644
--- a/arch/sparc64/prom/tree.c
+++ b/arch/sparc64/prom/tree.c
@@ -69,7 +69,7 @@ prom_getsibling(int node)
  * Return -1 on error.
  */
 __inline__ int
-prom_getproplen(int node, char *prop)
+prom_getproplen(int node, const char *prop)
 {
 	if((!node) || (!prop)) return -1;
 	return p1275_cmd ("getproplen", 
@@ -83,20 +83,20 @@ prom_getproplen(int node, char *prop)
  * was successful the length will be returned, else -1 is returned.
  */
 __inline__ int
-prom_getproperty(int node, char *prop, char *buffer, int bufsize)
+prom_getproperty(int node, const char *prop, char *buffer, int bufsize)
 {
 	int plen;
 
 	plen = prom_getproplen(node, prop);
-	if((plen > bufsize) || (plen == 0) || (plen == -1))
+	if ((plen > bufsize) || (plen == 0) || (plen == -1)) {
 		return -1;
-	else {
+	} else {
 		/* Ok, things seem all right. */
-		return p1275_cmd ("getprop", 
-				  P1275_ARG(1,P1275_ARG_IN_STRING)|
-				  P1275_ARG(2,P1275_ARG_OUT_BUF)|
-				  P1275_INOUT(4, 1), 
-				  node, prop, buffer, P1275_SIZE(plen));
+		return p1275_cmd(prom_getprop_name, 
+				 P1275_ARG(1,P1275_ARG_IN_STRING)|
+				 P1275_ARG(2,P1275_ARG_OUT_BUF)|
+				 P1275_INOUT(4, 1), 
+				 node, prop, buffer, P1275_SIZE(plen));
 	}
 }
 
@@ -104,7 +104,7 @@ prom_getproperty(int node, char *prop, char *buffer, int bufsize)
  * on failure.
  */
 __inline__ int
-prom_getint(int node, char *prop)
+prom_getint(int node, const char *prop)
 {
 	int intprop;
 
@@ -119,7 +119,7 @@ prom_getint(int node, char *prop)
  */
 
 int
-prom_getintdefault(int node, char *property, int deflt)
+prom_getintdefault(int node, const char *property, int deflt)
 {
 	int retval;
 
@@ -131,7 +131,7 @@ prom_getintdefault(int node, char *property, int deflt)
 
 /* Acquire a boolean property, 1=TRUE 0=FALSE. */
 int
-prom_getbool(int node, char *prop)
+prom_getbool(int node, const char *prop)
 {
 	int retval;
 
@@ -145,7 +145,7 @@ prom_getbool(int node, char *prop)
  * buffer.
  */
 void
-prom_getstring(int node, char *prop, char *user_buf, int ubuf_size)
+prom_getstring(int node, const char *prop, char *user_buf, int ubuf_size)
 {
 	int len;
 
@@ -160,7 +160,7 @@ prom_getstring(int node, char *prop, char *user_buf, int ubuf_size)
  * YES = 1   NO = 0
  */
 int
-prom_nodematch(int node, char *name)
+prom_nodematch(int node, const char *name)
 {
 	char namebuf[128];
 	prom_getproperty(node, "name", namebuf, sizeof(namebuf));
@@ -172,7 +172,7 @@ prom_nodematch(int node, char *name)
  * 'nodename'.  Return node if successful, zero if not.
  */
 int
-prom_searchsiblings(int node_start, char *nodename)
+prom_searchsiblings(int node_start, const char *nodename)
 {
 
 	int thisnode, error;
@@ -294,7 +294,7 @@ prom_firstprop(int node, char *buffer)
  * property types for this node.
  */
 __inline__ char *
-prom_nextprop(int node, char *oprop, char *buffer)
+prom_nextprop(int node, const char *oprop, char *buffer)
 {
 	char buf[32];
 
@@ -314,15 +314,17 @@ prom_nextprop(int node, char *oprop, char *buffer)
 }
 
 int
-prom_finddevice(char *name)
+prom_finddevice(const char *name)
 {
-	if(!name) return 0;
-	return p1275_cmd ("finddevice", P1275_ARG(0,P1275_ARG_IN_STRING)|
-				        P1275_INOUT(1, 1), 
-				        name);
+	if (!name)
+		return 0;
+	return p1275_cmd(prom_finddev_name,
+			 P1275_ARG(0,P1275_ARG_IN_STRING)|
+			 P1275_INOUT(1, 1), 
+			 name);
 }
 
-int prom_node_has_property(int node, char *prop)
+int prom_node_has_property(int node, const char *prop)
 {
 	char buf [32];
         
@@ -339,7 +341,7 @@ int prom_node_has_property(int node, char *prop)
  * of 'size' bytes.  Return the number of bytes the prom accepted.
  */
 int
-prom_setprop(int node, char *pname, char *value, int size)
+prom_setprop(int node, const char *pname, char *value, int size)
 {
 	if(size == 0) return 0;
 	if((pname == 0) || (value == 0)) return 0;
@@ -364,7 +366,7 @@ prom_inst2pkg(int inst)
  * FIXME: Should work for v0 as well
  */
 int
-prom_pathtoinode(char *path)
+prom_pathtoinode(const char *path)
 {
 	int node, inst;
 
diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h
index ededd2659eab7b0a3dc99e4e36bae5e7e70750c7..b3f61659ba81531a4acc77e9f21acbd918fc660c 100644
--- a/include/asm-sparc64/cacheflush.h
+++ b/include/asm-sparc64/cacheflush.h
@@ -66,6 +66,11 @@ extern void flush_ptrace_access(struct vm_area_struct *, struct page *,
 #define flush_cache_vmap(start, end)		do { } while (0)
 #define flush_cache_vunmap(start, end)		do { } while (0)
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+/* internal debugging function */
+void kernel_map_pages(struct page *page, int numpages, int enable);
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _SPARC64_CACHEFLUSH_H */
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index 9a3a81f1cc58ab7ba06c7e4290fc851a44ed570c..74de79dca9152ca44ce0f4ef006e459834d28278 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -22,6 +22,16 @@ typedef struct {
 	unsigned int	__pad1;
 	unsigned long	*pte_cache[2];
 	unsigned long	*pgd_cache;
+
+	/* Dcache line 3, rarely used */
+	unsigned int	dcache_size;
+	unsigned int	dcache_line_size;
+	unsigned int	icache_size;
+	unsigned int	icache_line_size;
+	unsigned int	ecache_size;
+	unsigned int	ecache_line_size;
+	unsigned int	__pad2;
+	unsigned int	__pad3;
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h
index a432d9e7daaa31dbe9730be407051da85cc6b68d..c628189b6c89d6dbc70d59668ac8b2a87dd0f6ba 100644
--- a/include/asm-sparc64/oplib.h
+++ b/include/asm-sparc64/oplib.h
@@ -38,6 +38,20 @@ extern int prom_stdin, prom_stdout;
  */
 extern int prom_chosen_node;
 
+/* Helper values and strings in arch/sparc64/kernel/head.S */
+extern const char prom_finddev_name[];
+extern const char prom_chosen_path[];
+extern const char prom_getprop_name[];
+extern const char prom_mmu_name[];
+extern const char prom_callmethod_name[];
+extern const char prom_translate_name[];
+extern const char prom_map_name[];
+extern const char prom_unmap_name[];
+extern int prom_mmu_ihandle_cache;
+extern unsigned int prom_boot_mapped_pc;
+extern unsigned int prom_boot_mapping_mode;
+extern unsigned long prom_boot_mapping_phys_high, prom_boot_mapping_phys_low;
+
 struct linux_mlist_p1275 {
 	struct linux_mlist_p1275 *theres_more;
 	unsigned long start_adr;
@@ -68,7 +82,7 @@ extern char *prom_getbootargs(void);
  * of the string is different on V0 vs. V2->higher proms.  The caller must
  * know what he/she is doing!  Returns the device descriptor, an int.
  */
-extern int prom_devopen(char *device_string);
+extern int prom_devopen(const char *device_string);
 
 /* Close a previously opened device described by the passed integer
  * descriptor.
@@ -98,10 +112,10 @@ extern struct linux_mem_p1275 *prom_meminfo(void);
 /* Miscellaneous routines, don't really fit in any category per se. */
 
 /* Reboot the machine with the command line passed. */
-extern void prom_reboot(char *boot_command);
+extern void prom_reboot(const char *boot_command);
 
 /* Evaluate the forth string passed. */
-extern void prom_feval(char *forth_string);
+extern void prom_feval(const char *forth_string);
 
 /* Enter the prom, with possibility of continuation with the 'go'
  * command in newer proms.
@@ -154,7 +168,7 @@ extern char prom_getchar(void);
 extern void prom_putchar(char character);
 
 /* Prom's internal routines, don't use in kernel/boot code. */
-extern void prom_printf(char *fmt, ...);
+extern void prom_printf(const char *fmt, ...);
 extern void prom_write(const char *buf, unsigned int len);
 
 /* Query for input device type */
@@ -215,7 +229,7 @@ extern int prom_getunumber(int syndrome_code,
 			   char *buf, int buflen);
 
 /* Retain physical memory to the caller across soft resets. */
-extern unsigned long prom_retain(char *name,
+extern unsigned long prom_retain(const char *name,
 				 unsigned long pa_low, unsigned long pa_high,
 				 long size, long align);
 
@@ -269,28 +283,28 @@ extern int prom_getsibling(int node);
 /* Get the length, at the passed node, of the given property type.
  * Returns -1 on error (ie. no such property at this node).
  */
-extern int prom_getproplen(int thisnode, char *property);
+extern int prom_getproplen(int thisnode, const char *property);
 
 /* Fetch the requested property using the given buffer.  Returns
  * the number of bytes the prom put into your buffer or -1 on error.
  */
-extern int prom_getproperty(int thisnode, char *property,
+extern int prom_getproperty(int thisnode, const char *property,
 			    char *prop_buffer, int propbuf_size);
 
 /* Acquire an integer property. */
-extern int prom_getint(int node, char *property);
+extern int prom_getint(int node, const char *property);
 
 /* Acquire an integer property, with a default value. */
-extern int prom_getintdefault(int node, char *property, int defval);
+extern int prom_getintdefault(int node, const char *property, int defval);
 
 /* Acquire a boolean property, 0=FALSE 1=TRUE. */
-extern int prom_getbool(int node, char *prop);
+extern int prom_getbool(int node, const char *prop);
 
 /* Acquire a string property, null string on error. */
-extern void prom_getstring(int node, char *prop, char *buf, int bufsize);
+extern void prom_getstring(int node, const char *prop, char *buf, int bufsize);
 
 /* Does the passed node have the given "name"? YES=1 NO=0 */
-extern int prom_nodematch(int thisnode, char *name);
+extern int prom_nodematch(int thisnode, const char *name);
 
 /* Puts in buffer a prom name in the form name@x,y or name (x for which_io 
  * and y for first regs phys address
@@ -300,7 +314,7 @@ extern int prom_getname(int node, char *buf, int buflen);
 /* Search all siblings starting at the passed node for "name" matching
  * the given string.  Returns the node on success, zero on failure.
  */
-extern int prom_searchsiblings(int node_start, char *name);
+extern int prom_searchsiblings(int node_start, const char *name);
 
 /* Return the first property type, as a string, for the given node.
  * Returns a null string on error. Buffer should be at least 32B long.
@@ -310,21 +324,21 @@ extern char *prom_firstprop(int node, char *buffer);
 /* Returns the next property after the passed property for the given
  * node.  Returns null string on failure. Buffer should be at least 32B long.
  */
-extern char *prom_nextprop(int node, char *prev_property, char *buffer);
+extern char *prom_nextprop(int node, const char *prev_property, char *buffer);
 
 /* Returns 1 if the specified node has given property. */
-extern int prom_node_has_property(int node, char *property);
+extern int prom_node_has_property(int node, const char *property);
 
 /* Returns phandle of the path specified */
-extern int prom_finddevice(char *name);
+extern int prom_finddevice(const char *name);
 
 /* Set the indicated property at the given node with the passed value.
  * Returns the number of bytes of your value that the prom took.
  */
-extern int prom_setprop(int node, char *prop_name, char *prop_value,
+extern int prom_setprop(int node, const char *prop_name, char *prop_value,
 			int value_size);
 			
-extern int prom_pathtoinode(char *path);
+extern int prom_pathtoinode(const char *path);
 extern int prom_inst2pkg(int);
 
 /* CPU probing helpers.  */
@@ -334,7 +348,7 @@ int cpu_find_by_mid(int mid, int *prom_node);
 /* Client interface level routines. */
 extern void prom_set_trap_table(unsigned long tba);
 
-extern long p1275_cmd (char *, long, ...);
+extern long p1275_cmd(const char *, long, ...);
 				   
 
 #if 0
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index a297f6144f0fa940d36bd674c0a3b181261213e7..43cbb089cde2dfc08e500086b5c251b2d766eea2 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -60,13 +60,13 @@
  * table can map
  */
 #define PMD_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3))
-#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_SIZE	(_AC(1,UL) << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
 #define PMD_BITS	(PAGE_SHIFT - 2)
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
 #define PGDIR_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_SIZE	(_AC(1,UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 #define PGDIR_BITS	(PAGE_SHIFT - 2)
 
@@ -336,7 +336,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *p
 #define pte_clear(mm,addr,ptep)		\
 	set_pte_at((mm), (addr), (ptep), __pte(0UL))
 
-extern pgd_t swapper_pg_dir[1];
+extern pgd_t swapper_pg_dir[2048];
+extern pmd_t swapper_low_pmd_dir[2048];
 
 /* These do nothing with the way I have things setup. */
 #define mmu_lockarea(vaddr, len)		(vaddr)