issue 51 and 52: set mtrr for ap before stop it, and _RAMBASE above 1M
support and pgtbl after 1M support


git-svn-id: svn://svn.coreboot.org/coreboot/trunk@2142 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1
diff --git a/src/config/linuxbios_ram.ld b/src/config/linuxbios_ram.ld
index 6fa311f..a159ce0 100644
--- a/src/config/linuxbios_ram.ld
+++ b/src/config/linuxbios_ram.ld
@@ -15,6 +15,7 @@
 /*
  *	Written by Johan Rydberg, based on work by Daniel Kahlin.
  *      Rewritten by Eric Biederman
+ *  2005.12 yhlu add linuxbios_ram cross the vga font buffer handling
  */
 /*
  *	We use ELF as output format. So that we can
@@ -91,21 +92,25 @@
 	_stack = .;
 	.stack . : {
 		/* Reserve a stack for each possible cpu */
-		. = (CONFIG_MAX_CPUS * STACK_SIZE) ;
+		/* the stack for ap will be put after pgtbl in 1M to CONFIG_LB_MEM_TOPK range when VGA and ROM_RUN and CONFIG_LB_MEM_TOPK>1024*/
+		. = ((CONFIG_CONSOLE_VGA || CONFIG_PCI_ROM_RUN)&&(_RAMBASE<0x100000)&&((CONFIG_LB_MEM_TOPK<<10)>0x100000) ) ? STACK_SIZE : (CONFIG_MAX_CPUS*STACK_SIZE);
 	}
 	_estack = .;
-	_heap = .;
-	.heap . : {
-		/* Reserve 256K for the heap */
-		. = HEAP_SIZE ;
-		. = ALIGN(4);
-	}
-	_eheap = .;
+        _heap = .;
+        .heap . : {
+                /* Reserve 256K for the heap */
+                . = HEAP_SIZE ;
+                . = ALIGN(4);
+        }
+        _eheap = .;
 	/* The ram segment
  	 * This is all address of the memory resident copy of linuxBIOS.
 	 */
-	_ram_seg = _text;
+	_ram_seg = _text; 
 	_eram_seg = _eheap;
+
+        _bogus = ASSERT( !((CONFIG_CONSOLE_VGA || CONFIG_PCI_ROM_RUN) && ((_ram_seg<0xa0000) && (_eram_seg>0xa0000))) , "please increase CONFIG_LB_MEM_TOPK and if still fail, try to set _RAMBASE more than 1M");
+	
 	/DISCARD/ : {
 		*(.comment)
 		*(.note)
diff --git a/src/cpu/amd/car/clear_1m_ram.c b/src/cpu/amd/car/clear_1m_ram.c
index d4c6600..c61610f 100644
--- a/src/cpu/amd/car/clear_1m_ram.c
+++ b/src/cpu/amd/car/clear_1m_ram.c
@@ -69,3 +69,43 @@
 #endif
         );
 }
+
+/* be warned, this file will be used by core other than core 0/node 0 or core0/node0 when cpu_reset*/
+static inline __attribute__((always_inline)) void set_1m_ram(void)
+{
+        __asm__ volatile (
+
+        /* disable cache */
+        "movl   %%cr0, %%eax\n\t"
+        "orl    $(0x1<<30),%%eax\n\t"
+        "movl    %%eax, %%cr0\n\t"
+
+        /* enable caching for first 1M using variable mtrr */
+        "movl    $0x200, %%ecx\n\t"
+        "xorl    %%edx, %%edx\n\t"
+        "movl     $(0 | 6), %%eax\n\t"
+//      "movl     $(0 | MTRR_TYPE_WRBACK), %%eax\n\t"
+        "wrmsr\n\t"
+
+        "movl    $0x201, %%ecx\n\t"
+        "movl    $0x0000000f, %%edx\n\t"
+#if CONFIG_USE_INIT
+        "movl    %%esi, %%eax\n\t"
+#else
+        "movl    $((~(( 0 + (CONFIG_LB_MEM_TOPK<<10) ) -1)) | 0x800), %%eax\n\t"
+#endif
+        "wrmsr\n\t"
+
+        /* enable cache */
+        "movl    %%cr0, %%eax\n\t"
+        "andl    $0x9fffffff,%%eax\n\t"
+        "movl    %%eax, %%cr0\n\t"
+//      "invd\n\t" // Is the BSP done with mem init?
+        :
+        :
+#if CONFIG_USE_INIT
+        "S"((~(( 0 + (CONFIG_LB_MEM_TOPK<<10) ) -1)) | 0x800)
+#endif
+        );
+}
+
diff --git a/src/cpu/amd/car/post_cache_as_ram.c b/src/cpu/amd/car/post_cache_as_ram.c
index fec5905..345e4c3 100644
--- a/src/cpu/amd/car/post_cache_as_ram.c
+++ b/src/cpu/amd/car/post_cache_as_ram.c
@@ -48,13 +48,9 @@
         if(cpu_reset==0) { // cpu_reset don't need to clear it 
 		clear_1m_ram();
         }
-
-#if 0
-	int i;
-	for(i=0;i<0x800000;i++) {
-		outb(0x66, 0x80);
+	else {
+		set_1m_ram();
 	}
-#endif
 
         __asm__ volatile (
                 /* set new esp */ /* before _RAMBASE */
@@ -63,7 +59,7 @@
                 ::"a"( (DCACHE_RAM_BASE + DCACHE_RAM_SIZE)- _RAMBASE )
         );
 
-       {
+        {
                 unsigned new_cpu_reset;
 
                 /* get back cpu_reset from ebx */
diff --git a/src/cpu/amd/model_fxx/init_cpus.c b/src/cpu/amd/model_fxx/init_cpus.c
index 718a0f6..6a070d5 100644
--- a/src/cpu/amd/model_fxx/init_cpus.c
+++ b/src/cpu/amd/model_fxx/init_cpus.c
@@ -257,6 +257,7 @@
 			lapic_write(LAPIC_MSG_REG, (apicid<<24) | 0x44); // bsp can not check it before stop_this_cpu
 
 			disable_cache_as_ram(); // inline
+			set_1m_ram(); // inline
                         stop_this_cpu(); // inline, it will stop all cores except node0/core0 the bsp .... 
                 }
 
diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c
index 4e7e696..aa62e24 100644
--- a/src/cpu/x86/lapic/lapic_cpu_init.c
+++ b/src/cpu/x86/lapic/lapic_cpu_init.c
@@ -1,3 +1,8 @@
+/*
+	2005.12 yhlu add linuxbios_ram cross the vga font buffer handling
+	2005.12 yhlu add _RAMBASE above 1M support for SMP
+*/
+
 #include <cpu/x86/lapic.h>
 #include <delay.h>
 #include <string.h>
@@ -9,7 +14,6 @@
 #include <smp/spinlock.h>
 #include <cpu/cpu.h>
 
-
 #if CONFIG_SMP == 1
 
 /* This is a lot more paranoid now, since Linux can NOT handle
@@ -19,6 +23,32 @@
  * We actually handling that case by noting which cpus startup
  * and not telling anyone about the ones that dont.
  */ 
+static unsigned long get_valid_start_eip(unsigned long orig_start_eip)
+{
+	return (unsigned long)orig_start_eip & 0xfffff; // 20 bit 
+}
+
+static void copy_secondary_start_to_1m_below(void) 
+{
+#if _RAMBASE > 0x100000
+        extern char _secondary_start[];
+        extern char _secondary_start_end[];
+        unsigned long code_size;
+        unsigned long start_eip;
+
+        /* _secondary_start need to be masked 20 above bit, because 16 bit code in secondary.S
+                Also We need to copy the _secondary_start to the below 1M region
+        */
+        start_eip = get_valid_start_eip((unsigned long)_secondary_start);
+        code_size = (unsigned long)_secondary_start_end - (unsigned long)_secondary_start;
+
+        /* copy the _secondary_start to the ram below 1M*/
+        memcpy(start_eip, (unsigned long)_secondary_start, code_size);
+
+        printk_debug("start_eip=0x%08lx, offset=0x%08lx, code_size=0x%08lx\n", start_eip, ((unsigned long)_secondary_start - start_eip), code_size);
+#endif
+}
+
 static int lapic_start_cpu(unsigned long apicid)
 {
 	int timeout;
@@ -87,8 +117,8 @@
 		return 0;
 	}
 
-	start_eip = (unsigned long)_secondary_start;
-	printk_spew("start_eip=0x%08lx\n", start_eip);
+	start_eip = get_valid_start_eip((unsigned long)_secondary_start);
+	printk_debug("start_eip=0x%08lx\n", start_eip);
        
 	num_starts = 2;
 
@@ -193,7 +223,25 @@
 	index = ++last_cpu_index;
 	
 	/* Find end of the new processors stack */
+#if (CONFIG_LB_MEM_TOPK>1024) && (_RAMBASE < 0x100000) && ((CONFIG_CONSOLE_VGA==1) || (CONFIG_PCI_ROM_RUN == 1))
+	if(index<1) { // only keep bsp on low 
+		stack_end = ((unsigned long)_estack) - (STACK_SIZE*index) - sizeof(struct cpu_info);
+	} else {
+		// for all APs, let use stack after pgtbl, 20480 is the pgtbl size for every cpu
+		stack_end = 0x100000+(20480 + STACK_SIZE)*CONFIG_MAX_CPUS - (STACK_SIZE*index);
+#if (0x100000+(20480 + STACK_SIZE)*CONFIG_MAX_CPU) > (CONFIG_LB_MEM_TOPK<<10)
+		#warning "We may need to increase CONFIG_LB_MEM_TOPK, it need to be more than (0x100000+(20480 + STACK_SIZE)*CONFIG_MAX_CPU)\n"
+#endif
+		if(stack_end > (CONFIG_LB_MEM_TOPK<<10)) {
+			printk_debug("start_cpu: Please increase the CONFIG_LB_MEM_TOPK more than %dK\n", stack_end>>10);
+			die("Can not go on\n");
+		}
+		stack_end -= sizeof(struct cpu_info);
+	}
+#else
 	stack_end = ((unsigned long)_estack) - (STACK_SIZE*index) - sizeof(struct cpu_info);
+#endif
+
 	
 	/* Record the index and which cpu structure we are using */
 	info = (struct cpu_info *)stack_end;
@@ -251,6 +299,7 @@
 	int old_active_count, active_count;
 	device_t cpu;
 	/* Loop through the cpus once getting them started */
+
 	for(cpu = cpu_bus->children; cpu ; cpu = cpu->sibling) {
 		if (cpu->path.type != DEVICE_PATH_APIC) {
 			continue;
@@ -327,6 +376,8 @@
 	
 	/* Find the device structure for the boot cpu */
 	info->cpu = alloc_find_dev(cpu_bus, &cpu_path);
+
+	copy_secondary_start_to_1m_below(); // why here? In case some day we can start core1 in amd_sibling_init
 	
 	/* Initialize the bootstrap processor */
 	cpu_initialize();
diff --git a/src/cpu/x86/lapic/secondary.S b/src/cpu/x86/lapic/secondary.S
index b212f43..e531eed 100644
--- a/src/cpu/x86/lapic/secondary.S
+++ b/src/cpu/x86/lapic/secondary.S
@@ -3,7 +3,7 @@
 #include <cpu/x86/mtrr.h>
 #include <cpu/x86/lapic_def.h>
 	.text
-	.globl _secondary_start
+	.globl _secondary_start, _secondary_start_end
 	.balign 4096
 _secondary_start:
 	.code16
@@ -48,9 +48,9 @@
 1:	hlt
 	jmp	1b
 
-gdtaddr:
-	.word	gdt_limit	/* the table limit */
-	.long	gdt		/* we know the offset */
+	gdtaddr:
+	.word   gdt_limit	/* the table limit */
+	.long   gdt             /* we know the offset */
 
-
+_secondary_start_end:
 .code32
diff --git a/src/cpu/x86/pae/pgtbl.c b/src/cpu/x86/pae/pgtbl.c
index 8297281..53db758 100644
--- a/src/cpu/x86/pae/pgtbl.c
+++ b/src/cpu/x86/pae/pgtbl.c
@@ -1,3 +1,7 @@
+/*
+	2005.12 yhlu add linuxbios_ram cross the vga font buffer handling
+*/
+
 #include <console/console.h>
 #include <cpu/cpu.h>
 #include <cpu/x86/pae.h>
@@ -49,7 +53,25 @@
 		struct pde pd[2048];
 		struct pde pdp[512];
 	} __attribute__ ((packed));
+
+#if (CONFIG_LB_MEM_TOPK>1024) && (_RAMBASE<0x100000) && ((CONFIG_CONSOLE_VGA==1) || (CONFIG_PCI_ROM_RUN == 1))
+	/*
+	 pgtbl is too big, so use last one 1M before CONFIG_LB_MEM_TOP, otherwise for 8 way dual core with vga support will push stack and heap cross 0xa0000, 
+	 and that region need to be used as vga font buffer. Please make sure set CONFIG_LB_MEM_TOPK=2048 in MB Config
+	*/
+	struct pg_table *pgtbl = 0x100000; //1M
+
+	unsigned x_end = 0x100000 + sizeof(struct pg_table) * CONFIG_MAX_CPUS;
+#if (0x100000+20480*CONFIG_MAX_CPU) > (CONFIG_LB_MEM_TOPK<<10)
+                #warning "We may need to increase CONFIG_LB_MEM_TOPK, it need to be more than (0x100000+20480*CONFIG_MAX_CPU)\n"
+#endif
+	if(x_end > (CONFIG_LB_MEM_TOPK<<10)) {
+                        printk_debug("map_2M_page: Please increase the CONFIG_LB_MEM_TOPK more than %dK\n", x_end>>10);
+                        die("Can not go on");
+	}
+#else
 	static struct pg_table pgtbl[CONFIG_MAX_CPUS] __attribute__ ((aligned(4096)));
+#endif
 	static unsigned long mapped_window[CONFIG_MAX_CPUS];
 	unsigned long index;
 	unsigned long window;