cpu/intel/slot_1: Switch to different CAR setup

This moves CAR stack under variable MTRRs and removes
old CAR code that used complex fixed MTRRs and placed
stack in low memory.

Change-Id: I75ec842ae3b6771cc3f7ff652adbe386c03b9a5f
Signed-off-by: Kyösti Mälkki <kyosti.malkki@gmail.com>
Signed-off-by: Keith Hui <buurin@gmail.com>
Reviewed-on: https://review.coreboot.org/26586
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Arthur Heymans <arthur@aheymans.xyz>
diff --git a/src/cpu/intel/car/cache_as_ram.inc b/src/cpu/intel/car/cache_as_ram.inc
deleted file mode 100644
index d208cee..0000000
--- a/src/cpu/intel/car/cache_as_ram.inc
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * This file is part of the coreboot project.
- *
- * Copyright (C) 2000, 2007 Ronald G. Minnich <rminnich@gmail.com>
- * Copyright (C) 2005 Eswar Nallusamy, LANL
- * Copyright (C) 2005 Tyan (written by Yinghai Lu for Tyan)
- * Copyright (C) 2007-2010 coresystems GmbH
- * Copyright (C) 2007 Carl-Daniel Hailfinger
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <cpu/x86/mtrr.h>
-#include <cpu/x86/cache.h>
-#include <cpu/x86/lapic_def.h>
-#include <cpu/x86/post_code.h>
-
-	/* Save the BIST result. */
-	movl	%eax, %ebp
-
-CacheAsRam:
-	/* Set the default memory type and enable fixed and variable MTRRs. */
-	movl	$MTRR_DEF_TYPE_MSR, %ecx
-	xorl	%edx, %edx
-	movl	$(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN), %eax
-	wrmsr
-
-	/* Clear all MTRRs. */
-	xorl	%edx, %edx
-	movl	$all_mtrr_msrs, %esi
-
-clear_fixed_var_mtrr:
-	lodsl	(%esi), %eax
-	testl	%eax, %eax
-	jz	clear_fixed_var_mtrr_out
-
-	movl	%eax, %ecx
-	xorl	%eax, %eax
-	wrmsr
-
-	jmp	clear_fixed_var_mtrr
-
-all_mtrr_msrs:
-	/* fixed MTRR MSRs */
-	.long	MTRR_FIX_64K_00000
-	.long	MTRR_FIX_16K_80000
-	.long	MTRR_FIX_16K_A0000
-	.long	MTRR_FIX_4K_C0000
-	.long	MTRR_FIX_4K_C8000
-	.long	MTRR_FIX_4K_D0000
-	.long	MTRR_FIX_4K_D8000
-	.long	MTRR_FIX_4K_E0000
-	.long	MTRR_FIX_4K_E8000
-	.long	MTRR_FIX_4K_F0000
-	.long	MTRR_FIX_4K_F8000
-
-	/* var MTRR MSRs */
-	.long	MTRR_PHYS_BASE(0)
-	.long	MTRR_PHYS_MASK(0)
-	.long	MTRR_PHYS_BASE(1)
-	.long	MTRR_PHYS_MASK(1)
-	.long	MTRR_PHYS_BASE(2)
-	.long	MTRR_PHYS_MASK(2)
-	.long	MTRR_PHYS_BASE(3)
-	.long	MTRR_PHYS_MASK(3)
-	.long	MTRR_PHYS_BASE(4)
-	.long	MTRR_PHYS_MASK(4)
-	.long	MTRR_PHYS_BASE(5)
-	.long	MTRR_PHYS_MASK(5)
-	.long	MTRR_PHYS_BASE(6)
-	.long	MTRR_PHYS_MASK(6)
-	.long	MTRR_PHYS_BASE(7)
-	.long	MTRR_PHYS_MASK(7)
-
-	.long	0x000 /* NULL, end of table */
-
-clear_fixed_var_mtrr_out:
-
-/*
- * 0x06 is the WB IO type for a given 4k segment.
- * segs is the number of 4k segments in the area of the particular
- *      register we want to use for CAR.
- * reg  is the register where the IO type should be stored.
- */
-.macro extractmask segs, reg
-.if \segs <= 0
-	/*
-	 * The xorl here is superfluous because at the point of first execution
-	 * of this macro, %eax and %edx are cleared. Later invocations of this
-	 * macro will have a monotonically increasing segs parameter.
-	 */
-	xorl \reg, \reg
-.elseif \segs == 1
-	movl	$0x06000000, \reg /* WB IO type */
-.elseif \segs == 2
-	movl	$0x06060000, \reg /* WB IO type */
-.elseif \segs == 3
-	movl	$0x06060600, \reg /* WB IO type */
-.elseif \segs >= 4
-	movl	$0x06060606, \reg /* WB IO type */
-.endif
-.endm
-
-/*
- * carsize is the cache size in bytes we want to use for CAR.
- * windowoffset is the 32k-aligned window into CAR size.
- */
-.macro simplemask carsize, windowoffset
-	.set gas_bug_workaround,(((\carsize - \windowoffset) >> 12) - 4)
-	extractmask gas_bug_workaround, %eax
-	.set gas_bug_workaround,(((\carsize - \windowoffset) >> 12))
-	extractmask gas_bug_workaround, %edx
-	/*
-	 * Without the gas bug workaround, the entire macro would consist
-	 * only of the two lines below:
-	 *   extractmask (((\carsize - \windowoffset) >> 12) - 4), %eax
-	 *   extractmask (((\carsize - \windowoffset) >> 12)), %edx
-	 */
-.endm
-
-#if CONFIG_DCACHE_RAM_SIZE > 0x10000
-#error Invalid CAR size, must be at most 64k.
-#endif
-#if CONFIG_DCACHE_RAM_SIZE < 0x1000
-#error Invalid CAR size, must be at least 4k. This is a processor limitation.
-#endif
-#if (CONFIG_DCACHE_RAM_SIZE & (0x1000 - 1))
-#error Invalid CAR size, is not a multiple of 4k. This is a processor limitation.
-#endif
-
-#if CONFIG_DCACHE_RAM_SIZE > 0x8000
-	/* Enable caching for 32K-64K using fixed MTRR. */
-	movl	$MTRR_FIX_4K_C0000, %ecx
-	simplemask CONFIG_DCACHE_RAM_SIZE, 0x8000
-	wrmsr
-#endif
-
-	/* Enable caching for 0-32K using fixed MTRR. */
-	movl	$MTRR_FIX_4K_C8000, %ecx
-	simplemask CONFIG_DCACHE_RAM_SIZE, 0
-	wrmsr
-
-	/* Enable cache for our code in Flash because we do XIP here. */
-	movl	$MTRR_PHYS_BASE(1), %ecx
-	xorl	%edx, %edx
-	/*
-	 * IMPORTANT: The following calculation _must_ be done at runtime. See
-	 * https://www.coreboot.org/pipermail/coreboot/2010-October/060855.html
-	 */
-	movl	$copy_and_run, %eax
-	andl	$(~(CONFIG_XIP_ROM_SIZE - 1)), %eax
-	orl	$MTRR_TYPE_WRPROT, %eax
-	wrmsr
-
-	movl	$MTRR_PHYS_MASK(1), %ecx
-	movl	$0x0000000f, %edx
-	movl	$(~(CONFIG_XIP_ROM_SIZE - 1) | MTRR_PHYS_MASK_VALID), %eax
-	wrmsr
-
-	/* Enable cache. */
-	movl	%cr0, %eax
-	andl	$(~(CR0_CacheDisable | CR0_NoWriteThrough)), %eax
-	movl	%eax, %cr0
-
-	/* Read the CAR region. This will also fill up the cache.
-	 * IMPORTANT: This step is mandatory.
-	 */
-	movl	$CONFIG_DCACHE_RAM_BASE, %esi
-	cld
-	movl	$(CONFIG_DCACHE_RAM_SIZE >> 2), %ecx
-	rep	lodsl
-
-	/* Clear the CAR region. */
-	movl	$CONFIG_DCACHE_RAM_BASE, %edi
-	movl	$(CONFIG_DCACHE_RAM_SIZE >> 2), %ecx
-	xorl	%eax, %eax
-	rep	stosl
-
-	movl	$(CONFIG_DCACHE_RAM_BASE + CONFIG_DCACHE_RAM_SIZE), %eax
-	movl	%eax, %esp
-lout:
-	/* Restore the BIST result. */
-	movl	%ebp, %eax
-
-	pushl	%eax  /* BIST */
-	call	romstage_main
-
-	/* Setup stack as indicated by return value from romstage_main(). */
-	movl	%eax, %esp
-
-	/* We don't need CAR from now on. */
-
-	/* Disable cache. */
-	movl	%cr0, %eax
-	orl	$CR0_CacheDisable, %eax
-	movl	%eax, %cr0
-
-	/* Clear the fixed MTRR we used. */
-	movl	$MTRR_FIX_4K_C8000, %ecx
-	xorl	%edx, %edx
-	xorl	%eax, %eax
-	wrmsr
-
-#if CONFIG_DCACHE_RAM_SIZE > 0x8000
-	movl	$MTRR_FIX_4K_C0000, %ecx
-	wrmsr
-#endif
-
-	/*
-	 * Enable variable and disable fixed MTRRs.
-	 * Default memory type will be UC.
-	 */
-	movl	$MTRR_DEF_TYPE_MSR, %ecx
-	xorl	%edx, %edx
-	movl	$MTRR_DEF_TYPE_EN, %eax
-	wrmsr
-
-	/* Enable cache. */
-	movl	%cr0, %eax
-	andl	$(~(CR0_CacheDisable | CR0_NoWriteThrough)), %eax
-	movl	%eax, %cr0
-
-__main:
-	post_code(POST_PREPARE_RAMSTAGE)
-	cld			/* Clear direction flag. */
-	call	copy_and_run
-
-.Lhlt:
-	post_code(POST_DEAD_CODE)
-	hlt
-	jmp	.Lhlt
diff --git a/src/cpu/intel/car/p3/cache_as_ram.S b/src/cpu/intel/car/p3/cache_as_ram.S
index e716caf..6f5076f 100644
--- a/src/cpu/intel/car/p3/cache_as_ram.S
+++ b/src/cpu/intel/car/p3/cache_as_ram.S
@@ -19,11 +19,6 @@
 #include <cpu/x86/mtrr.h>
 #include <cpu/x86/cache.h>
 #include <cpu/x86/post_code.h>
-#include <cpu/x86/lapic_def.h>
-
-/* Macro to access Local APIC registers at default base. */
-#define LAPIC(x)		$(LAPIC_DEFAULT_BASE | LAPIC_ ## x)
-#define START_IPI_VECTOR	((CONFIG_AP_SIPI_VECTOR >> 12) & 0xff)
 
 #define CACHE_AS_RAM_SIZE CONFIG_DCACHE_RAM_SIZE
 #define CACHE_AS_RAM_BASE CONFIG_DCACHE_RAM_BASE
@@ -34,14 +29,7 @@
 cache_as_ram:
 	post_code(0x20)
 
-	movl	$LAPIC_BASE_MSR, %ecx
-	rdmsr
-	andl	$LAPIC_BASE_MSR_BOOTSTRAP_PROCESSOR, %eax
-	jz	ap_init
-
-	/* Zero out all fixed range and variable range MTRRs.
-	 * For hyper-threaded CPUs these are shared.
-	 */
+	/* Zero out all fixed range and variable range MTRRs. */
 	movl	$mtrr_table, %esi
 	movl	$((mtrr_table_end - mtrr_table) >> 1), %edi
 	xorl	%eax, %eax
@@ -64,22 +52,7 @@
 
 	post_code(0x22)
 
-	/* Determine CPU_ADDR_BITS and load PHYSMASK high
-	 * word to %edx.
-	 */
-	movl	$0x80000000, %eax
-	cpuid
-	cmpl	$0x80000008, %eax
-	jc	addrsize_no_MSR
-	movl	$0x80000008, %eax
-	cpuid
-	movb	%al, %cl
-	sub	$32, %cl
-	movl	$1, %edx
-	shl	%cl, %edx
-	subl	$1, %edx
-	jmp	addrsize_set_high
-addrsize_no_MSR:
+	/* Determine CPU_ADDR_BITS and load PHYSMASK high word to %edx. */
 	movl	$1, %eax
 	cpuid
 	andl	$(1 << 6 | 1 << 17), %edx	/* PAE or PSE36 */
@@ -87,141 +60,13 @@
 	movl	$0x0f, %edx
 
 	/* Preload high word of address mask (in %edx) for Variable
-	 * MTRRs 0 and 1 and enable local APIC at default base.
-	 */
+	   MTRRs 0 and 1. */
 addrsize_set_high:
 	xorl	%eax, %eax
 	movl	$MTRR_PHYS_MASK(0), %ecx
 	wrmsr
 	movl	$MTRR_PHYS_MASK(1), %ecx
 	wrmsr
-	movl	$LAPIC_BASE_MSR, %ecx
-	not	%edx
-	movl	%edx, %ebx
-	rdmsr
-	andl	%ebx, %edx
-	andl	$(~LAPIC_BASE_MSR_ADDR_MASK), %eax
-	orl	$(LAPIC_DEFAULT_BASE | LAPIC_BASE_MSR_ENABLE), %eax
-	wrmsr
-
-bsp_init:
-
-	post_code(0x23)
-
-	/* Send INIT IPI to all excluding ourself. */
-	movl	LAPIC(ICR), %edi
-	movl	$(LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | LAPIC_DM_INIT), %eax
-1:	movl	%eax, (%edi)
-	movl	$0x30, %ecx
-2:	pause
-	dec	%ecx
-	jnz	2b
-	movl	(%edi), %ecx
-	andl	$LAPIC_ICR_BUSY, %ecx
-	jnz	1b
-
-	post_code(0x24)
-
-	movl	$1, %eax
-	cpuid
-	btl	$28, %edx
-	jnc	sipi_complete
-	bswapl	%ebx
-	movzx	%bh, %edi
-	cmpb	$1, %bh
-	jbe	sipi_complete	/* only one LAPIC ID in package */
-
-	movl	$0, %eax
-	cpuid
-	movb	$1, %bl
-	cmpl	$4, %eax
-	jb	cores_counted
-	movl	$4, %eax
-	movl	$0, %ecx
-	cpuid
-	shr	$26, %eax
-	movb	%al, %bl
-	inc	%bl
-
-cores_counted:
-	movl	%edi, %eax
-	divb	%bl
-	cmpb	$1, %al
-	jbe	sipi_complete	/* only LAPIC ID of a core */
-
-	/* For a hyper-threading processor, cache must not be disabled
-	 * on an AP on the same physical package with the BSP.
-	 */
-
-hyper_threading_cpu:
-
-	/* delay 10 ms */
-	movl	$10000, %ecx
-1:	inb	$0x80, %al
-	dec	%ecx
-	jnz	1b
-
-	post_code(0x25)
-
-	/* Send Start IPI to all excluding ourself. */
-	movl	LAPIC(ICR), %edi
-	movl	$(LAPIC_DEST_ALLBUT | LAPIC_DM_STARTUP | START_IPI_VECTOR), %eax
-1:	movl	%eax, (%edi)
-	movl	$0x30, %ecx
-2:	pause
-	dec	%ecx
-	jnz	2b
-	movl	(%edi), %ecx
-	andl	$LAPIC_ICR_BUSY, %ecx
-	jnz	1b
-
-	/* delay 250 us */
-	movl	$250, %ecx
-1:	inb	$0x80, %al
-	dec	%ecx
-	jnz	1b
-
-	post_code(0x26)
-
-	/* Wait for sibling CPU to start. */
-1:	movl	$(MTRR_PHYS_BASE(0)), %ecx
-	rdmsr
-	andl	%eax, %eax
-	jnz	sipi_complete
-
-	movl	$0x30, %ecx
-2:	pause
-	dec	%ecx
-	jnz	2b
-	jmp	1b
-
-
-ap_init:
-	post_code(0x27)
-
-	/* Do not disable cache (so BSP can enable it). */
-	movl	%cr0, %eax
-	andl	$(~(CR0_CacheDisable | CR0_NoWriteThrough)), %eax
-	movl	%eax, %cr0
-
-	post_code(0x28)
-
-	/* MTRR registers are shared between HT siblings. */
-	movl	$(MTRR_PHYS_BASE(0)), %ecx
-	movl	$(1 << 12), %eax
-	xorl	%edx, %edx
-	wrmsr
-
-	post_code(0x29)
-
-ap_halt:
-	cli
-1:	hlt
-	jmp	1b
-
-
-
-sipi_complete:
 
 	post_code(0x2a)
 
@@ -245,41 +90,6 @@
 	orl	$MTRR_DEF_TYPE_EN, %eax
 	wrmsr
 
-	/* Enable L2 cache Write-Back (WBINVD and FLUSH#).
-	 *
-	 * MSR is set when DisplayFamily_DisplayModel is one of:
-	 * 06_0x, 06_17, 06_1C
-	 *
-	 * Description says this bit enables use of WBINVD and FLUSH#.
-	 * Should this be set only after the system bus and/or memory
-	 * controller can successfully handle write cycles?
-	 */
-
-#define EAX_FAMILY(a)	(a << 8)	/* for family <= 0fH */
-#define EAX_MODEL(a)	(((a & 0xf0) << 12) | ((a & 0xf) << 4))
-
-	movl	$1, %eax
-	cpuid
-	movl	%eax, %ebx
-	andl	$EAX_FAMILY(0x0f), %eax
-	cmpl	$EAX_FAMILY(0x06), %eax
-	jne	no_msr_11e
-	movl	%ebx, %eax
-	andl	$EAX_MODEL(0xff), %eax
-	cmpl	$EAX_MODEL(0x17), %eax
-	je	has_msr_11e
-	cmpl	$EAX_MODEL(0x1c), %eax
-	je	has_msr_11e
-	andl	$EAX_MODEL(0xf0), %eax
-	cmpl	$EAX_MODEL(0x00), %eax
-	jne	no_msr_11e
-has_msr_11e:
-	movl	$0x11e, %ecx
-	rdmsr
-	orl	$(1 << 8), %eax
-	wrmsr
-no_msr_11e:
-
 	post_code(0x2c)
 
 	/* Enable cache (CR0.CD = 0, CR0.NW = 0). */
@@ -288,11 +98,16 @@
 	invd
 	movl	%eax, %cr0
 
-	/* Clear the cache memory region. This will also fill up the cache. */
+	/* Read then clear the CAR region. This will also fill up the cache.
+	 * IMPORTANT: The read is mandatory.
+	 */
+	movl	$CACHE_AS_RAM_BASE, %esi
+	movl	%esi, %edi
 	cld
-	xorl	%eax, %eax
-	movl	$CACHE_AS_RAM_BASE, %edi
 	movl	$(CACHE_AS_RAM_SIZE >> 2), %ecx
+	rep	lodsl
+	movl	$(CACHE_AS_RAM_SIZE >> 2), %ecx
+	xorl	%eax, %eax
 	rep	stosl
 
 	post_code(0x2d)
diff --git a/src/cpu/intel/car/romstage_legacy.c b/src/cpu/intel/car/romstage_legacy.c
deleted file mode 100644
index 2de6691..0000000
--- a/src/cpu/intel/car/romstage_legacy.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * This file is part of the coreboot project.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <cpu/intel/romstage.h>
-
-asmlinkage void *romstage_main(unsigned long bist)
-{
-	mainboard_romstage_entry(bist);
-	return (void *)CONFIG_RAMTOP;
-}
diff --git a/src/cpu/intel/slot_1/Kconfig b/src/cpu/intel/slot_1/Kconfig
index f535a03..ab66632 100644
--- a/src/cpu/intel/slot_1/Kconfig
+++ b/src/cpu/intel/slot_1/Kconfig
@@ -28,7 +28,7 @@
 
 config DCACHE_RAM_BASE
 	hex
-	default 0xce000
+	default 0xfefc0000
 
 config DCACHE_RAM_SIZE
 	hex
diff --git a/src/cpu/intel/slot_1/Makefile.inc b/src/cpu/intel/slot_1/Makefile.inc
index ca7c154..9e34106 100644
--- a/src/cpu/intel/slot_1/Makefile.inc
+++ b/src/cpu/intel/slot_1/Makefile.inc
@@ -28,5 +28,5 @@
 subdirs-y += ../../x86/smm
 subdirs-y += ../microcode
 
-cpu_incs-y += $(src)/cpu/intel/car/cache_as_ram.inc
-romstage-y += ../car/romstage_legacy.c
+cpu_incs-y += $(src)/cpu/intel/car/p3/cache_as_ram.S
+romstage-y += ../car/romstage.c