1201_ht_bus0_dev0_fidvid_core.diff
https://openbios.org/roundup/linuxbios/issue41
Lord have mercy upon us.




git-svn-id: svn://svn.coreboot.org/coreboot/trunk@2118 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1
diff --git a/src/arch/i386/Config.lb b/src/arch/i386/Config.lb
index e9b426c..ebb014f 100644
--- a/src/arch/i386/Config.lb
+++ b/src/arch/i386/Config.lb
@@ -31,17 +31,17 @@
 addaction clean "rm -f romimage payload.*"
 
 if CONFIG_USE_INIT
-makerule init.o
-        depends "$(INIT-OBJECTS)"
-        action  "$(LD) -melf_i386 -r -o init.pre.o $(INIT-OBJECTS)"
-        action  "$(OBJCOPY) --rename-section .text=.init.text --rename-section .data=.init.data --rename-section .rodata=.init.rodata --rename-section .rodata.str1.1=.init.rodata.str1.1 init.pre.o init.o"
-end
+	makerule init.o
+        	depends "$(INIT-OBJECTS)"
+	        action  "$(LD) -melf_i386 -r -o init.pre.o $(INIT-OBJECTS)"
+	        action  "$(OBJCOPY) --rename-section .text=.init.text --rename-section .data=.init.data --rename-section .rodata=.init.rodata --rename-section .rodata.str1.1=.init.rodata.str1.1 init.pre.o init.o"
+	end
 
-makerule linuxbios   
-	depends	"crt0.o init.o linuxbios_ram.rom ldscript.ld"
-	action	"$(CC) -nostdlib -nostartfiles -static -o $@ -T ldscript.ld crt0.o init.o"
-	action	"$(CROSS_COMPILE)nm -n linuxbios | sort > linuxbios.map"
-end
+	makerule linuxbios   
+		depends	"crt0.o init.o linuxbios_ram.rom ldscript.ld"
+		action	"$(CC) -nostdlib -nostartfiles -static -o $@ -T ldscript.ld crt0.o init.o"
+		action	"$(CROSS_COMPILE)nm -n linuxbios | sort > linuxbios.map"
+	end
 
 end
 
diff --git a/src/arch/i386/include/arch/hlt.h b/src/arch/i386/include/arch/hlt.h
index 23ff1aa..3b2acf1 100644
--- a/src/arch/i386/include/arch/hlt.h
+++ b/src/arch/i386/include/arch/hlt.h
@@ -7,7 +7,7 @@
 	__builtin_hlt();
 }
 #else
-static inline void hlt(void)
+static inline __attribute__((always_inline)) void hlt(void)
 {
 	asm("hlt");
 }
diff --git a/src/arch/i386/include/arch/romcc_io.h b/src/arch/i386/include/arch/romcc_io.h
index 1d3e603..c0c2250 100644
--- a/src/arch/i386/include/arch/romcc_io.h
+++ b/src/arch/i386/include/arch/romcc_io.h
@@ -117,7 +117,7 @@
 
 typedef unsigned device_t;
 
-static inline uint8_t pci_read_config8(device_t dev, unsigned where)
+static inline __attribute__((always_inline)) uint8_t pci_read_config8(device_t dev, unsigned where)
 {
 	unsigned addr;
 	addr = dev | where;
@@ -125,7 +125,7 @@
 	return inb(0xCFC + (addr & 3));
 }
 
-static inline uint16_t pci_read_config16(device_t dev, unsigned where)
+static inline __attribute__((always_inline)) uint16_t pci_read_config16(device_t dev, unsigned where)
 {
 	unsigned addr;
 	addr = dev | where;
@@ -133,7 +133,7 @@
 	return inw(0xCFC + (addr & 2));
 }
 
-static inline uint32_t pci_read_config32(device_t dev, unsigned where)
+static inline __attribute__((always_inline)) uint32_t pci_read_config32(device_t dev, unsigned where)
 {
 	unsigned addr;
 	addr = dev | where;
@@ -141,7 +141,7 @@
 	return inl(0xCFC);
 }
 
-static inline void pci_write_config8(device_t dev, unsigned where, uint8_t value)
+static inline __attribute__((always_inline)) void pci_write_config8(device_t dev, unsigned where, uint8_t value)
 {
 	unsigned addr;
 	addr = dev | where;
@@ -149,7 +149,7 @@
 	outb(value, 0xCFC + (addr & 3));
 }
 
-static inline void pci_write_config16(device_t dev, unsigned where, uint16_t value)
+static inline __attribute__((always_inline)) void pci_write_config16(device_t dev, unsigned where, uint16_t value)
 {
 	unsigned addr;
 	addr = dev | where;
@@ -157,7 +157,7 @@
 	outw(value, 0xCFC + (addr & 2));
 }
 
-static inline void pci_write_config32(device_t dev, unsigned where, uint32_t value)
+static inline __attribute__((always_inline)) void pci_write_config32(device_t dev, unsigned where, uint32_t value)
 {
 	unsigned addr;
 	addr = dev | where;
@@ -180,53 +180,53 @@
 
 
 /* Generic functions for pnp devices */
-static inline void pnp_write_config(device_t dev, uint8_t reg, uint8_t value)
+static inline __attribute__((always_inline)) void pnp_write_config(device_t dev, uint8_t reg, uint8_t value)
 {
 	unsigned port = dev >> 8;
 	outb(reg, port );
 	outb(value, port +1);
 }
 
-static inline uint8_t pnp_read_config(device_t dev, uint8_t reg)
+static inline __attribute__((always_inline)) uint8_t pnp_read_config(device_t dev, uint8_t reg)
 {
 	unsigned port = dev >> 8;
 	outb(reg, port);
 	return inb(port +1);
 }
 
-static inline void pnp_set_logical_device(device_t dev)
+static inline __attribute__((always_inline)) void pnp_set_logical_device(device_t dev)
 {
 	unsigned device = dev & 0xff;
 	pnp_write_config(dev, 0x07, device);
 }
 
-static inline void pnp_set_enable(device_t dev, int enable)
+static inline __attribute__((always_inline)) void pnp_set_enable(device_t dev, int enable)
 {
 	pnp_write_config(dev, 0x30, enable?0x1:0x0);
 }
 
-static inline int pnp_read_enable(device_t dev)
+static inline __attribute__((always_inline)) int pnp_read_enable(device_t dev)
 {
 	return !!pnp_read_config(dev, 0x30);
 }
 
-static inline void pnp_set_iobase(device_t dev, unsigned index, unsigned iobase)
+static inline __attribute__((always_inline)) void pnp_set_iobase(device_t dev, unsigned index, unsigned iobase)
 {
 	pnp_write_config(dev, index + 0, (iobase >> 8) & 0xff);
 	pnp_write_config(dev, index + 1, iobase & 0xff);
 }
 
-static inline uint16_t pnp_read_iobase(device_t dev, unsigned index)
+static inline __attribute__((always_inline)) uint16_t pnp_read_iobase(device_t dev, unsigned index)
 {
-	return (uint16_t)((pnp_read_config(dev, index) << 8) | pnp_read_config(dev, index + 1));
+	return ((uint16_t)(pnp_read_config(dev, index)) << 8) | pnp_read_config(dev, index + 1);
 }
 
-static inline void pnp_set_irq(device_t dev, unsigned index, unsigned irq)
+static inline __attribute__((always_inline)) void pnp_set_irq(device_t dev, unsigned index, unsigned irq)
 {
 	pnp_write_config(dev, index, irq);
 }
 
-static inline void pnp_set_drq(device_t dev, unsigned index, unsigned drq)
+static inline __attribute__((always_inline)) void pnp_set_drq(device_t dev, unsigned index, unsigned drq)
 {
 	pnp_write_config(dev, index, drq & 0xff);
 }
diff --git a/src/arch/i386/include/arch/smp/atomic.h b/src/arch/i386/include/arch/smp/atomic.h
index 7b68377..7061461 100644
--- a/src/arch/i386/include/arch/smp/atomic.h
+++ b/src/arch/i386/include/arch/smp/atomic.h
@@ -41,7 +41,7 @@
  * Atomically increments @v by 1.  Note that the guaranteed
  * useful range of an atomic_t is only 24 bits.
  */ 
-static __inline__ void atomic_inc(atomic_t *v)
+static __inline__ __attribute__((always_inline)) void atomic_inc(atomic_t *v)
 {
 	__asm__ __volatile__(
 		"lock ; incl %0"
@@ -56,7 +56,7 @@
  * Atomically decrements @v by 1.  Note that the guaranteed
  * useful range of an atomic_t is only 24 bits.
  */ 
-static __inline__ void atomic_dec(atomic_t *v)
+static __inline__ __attribute__((always_inline)) void atomic_dec(atomic_t *v)
 {
 	__asm__ __volatile__(
 		"lock ; decl %0"
diff --git a/src/arch/i386/include/arch/smp/spinlock.h b/src/arch/i386/include/arch/smp/spinlock.h
index 65ad8d0..7101545 100644
--- a/src/arch/i386/include/arch/smp/spinlock.h
+++ b/src/arch/i386/include/arch/smp/spinlock.h
@@ -40,14 +40,14 @@
 #define spin_unlock_string \
 	"movb $1,%0"
 
-static inline void spin_lock(spinlock_t *lock)
+static inline __attribute__((always_inline)) void spin_lock(spinlock_t *lock)
 {
 	__asm__ __volatile__(
 		spin_lock_string
 		:"=m" (lock->lock) : : "memory");
 }
 
-static inline void spin_unlock(spinlock_t *lock)
+static inline __attribute__((always_inline)) void spin_unlock(spinlock_t *lock)
 {
 	__asm__ __volatile__(
 		spin_unlock_string
@@ -55,7 +55,7 @@
 }
 
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void cpu_relax(void)
+static inline __attribute__((always_inline)) void cpu_relax(void)
 {
 	__asm__ __volatile__("rep;nop": : :"memory");
 }
diff --git a/src/arch/i386/lib/c_start.S b/src/arch/i386/lib/c_start.S
index ce13d15..04b5a68 100644
--- a/src/arch/i386/lib/c_start.S
+++ b/src/arch/i386/lib/c_start.S
@@ -22,18 +22,20 @@
 	leal	_stack, %edi
 	movl	$_estack, %ecx
 	subl	%edi, %ecx
+	shrl	$2, %ecx   /* it is 32 bit align, right? */
 	xorl	%eax, %eax
 	rep
-	stosb
+	stosl
 
 	/** clear bss */
 	leal	_bss, %edi
 	movl	$_ebss, %ecx
 	subl	%edi, %ecx
 	jz	.Lnobss
+	shrl	$2, %ecx  /* it is 32 bit align, right? */
 	xorl	%eax, %eax
 	rep
-	stosb
+	stosl
 .Lnobss:
 
 	/* set new stack */
diff --git a/src/arch/i386/lib/console.c b/src/arch/i386/lib/console.c
index fd1b5d7..a1f5d6f 100644
--- a/src/arch/i386/lib/console.c
+++ b/src/arch/i386/lib/console.c
@@ -120,58 +120,8 @@
 static void print_spew_hex32(unsigned int value) { __console_tx_hex32(BIOS_SPEW, value); }
 static void print_spew(const char *str) { __console_tx_string(BIOS_SPEW, str); }
 
-/* Non inline versions.... */
-#if 0
-static void print_alert_char_(unsigned char value) NOINLINE   { print_alert_char(value); }
-static void print_alert_hex8_(unsigned char value) NOINLINE   { print_alert_hex8(value); }
-static void print_alert_hex16_(unsigned short value) NOINLINE { print_alert_hex16(value); }
-static void print_alert_hex32_(unsigned int value) NOINLINE   { print_alert_hex32(value); }
-static void print_alert_(const char *str) NOINLINE            { print_alert(str); }
-
-static void print_crit_char_(unsigned char value) NOINLINE   { print_crit_char(value); }
-static void print_crit_hex8_(unsigned char value) NOINLINE   { print_crit_hex8(value); }
-static void print_crit_hex16_(unsigned short value) NOINLINE { print_crit_hex16(value); }
-static void print_crit_hex32_(unsigned int value) NOINLINE   { print_crit_hex32(value); }
-static void print_crit_(const char *str) NOINLINE            { print_crit(str); }
-
-static void print_err_char_(unsigned char value) NOINLINE   { print_err_char(value); }
-static void print_err_hex8_(unsigned char value) NOINLINE   { print_err_hex8(value); }
-static void print_err_hex16_(unsigned short value) NOINLINE { print_err_hex16(value); }
-static void print_err_hex32_(unsigned int value) NOINLINE   { print_err_hex32(value); }
-static void print_err_(const char *str) NOINLINE            { print_err(str); }
-
-static void print_warning_char_(unsigned char value) NOINLINE   { print_warning_char(value); }
-static void print_warning_hex8_(unsigned char value) NOINLINE   { print_warning_hex8(value); }
-static void print_warning_hex16_(unsigned short value) NOINLINE { print_warning_hex16(value); }
-static void print_warning_hex32_(unsigned int value) NOINLINE   { print_warning_hex32(value); }
-static void print_warning_(const char *str) NOINLINE            { print_warning(str); }
-
-static void print_notice_char_(unsigned char value) NOINLINE   { print_notice_char(value); }
-static void print_notice_hex8_(unsigned char value) NOINLINE   { print_notice_hex8(value); }
-static void print_notice_hex16_(unsigned short value) NOINLINE { print_notice_hex16(value); }
-static void print_notice_hex32_(unsigned int value) NOINLINE   { print_notice_hex32(value); }
-static void print_notice_(const char *str) NOINLINE            { print_notice(str); }
-
-static void print_info_char_(unsigned char value) NOINLINE   { print_info_char(value); }
-static void print_info_hex8_(unsigned char value) NOINLINE   { print_info_hex8(value); }
-static void print_info_hex16_(unsigned short value) NOINLINE { print_info_hex16(value); }
-static void print_info_hex32_(unsigned int value) NOINLINE   { print_info_hex32(value); }
-static void print_info_(const char *str) NOINLINE            { print_info(str); }
-
-static void print_debug_char_(unsigned char value) NOINLINE   { print_debug_char(value); }
-static void print_debug_hex8_(unsigned char value) NOINLINE   { print_debug_hex8(value); }
-static void print_debug_hex16_(unsigned short value) NOINLINE { print_debug_hex16(value); }
-static void print_debug_hex32_(unsigned int value) NOINLINE   { print_debug_hex32(value); }
-static void print_debug_(const char *str) NOINLINE            { print_debug(str); }
-
-static void print_spew_char_(unsigned char value) NOINLINE   { print_spew_char(value); }
-static void print_spew_hex8_(unsigned char value) NOINLINE   { print_spew_hex8(value); }
-static void print_spew_hex16_(unsigned short value) NOINLINE { print_spew_hex16(value); }
-static void print_spew_hex32_(unsigned int value) NOINLINE   { print_spew_hex32(value); }
-static void print_spew_(const char *str) NOINLINE            { print_spew(str); }
-#endif
-
-#else
+#else  
+/* CONFIG_USE_INIT == 1 */
 
 extern int do_printk(int msg_level, const char *fmt, ...);
 
@@ -273,12 +223,13 @@
 #define print_spew_hex32(HEX)    printk_spew   ("%08x", (HEX))
 
 
-#endif /* CONFIG_USE_INIT == 0 */
+#endif /* CONFIG_USE_INIT */
 
 #ifndef LINUXBIOS_EXTRA_VERSION
 #define LINUXBIOS_EXTRA_VERSION ""
 #endif
 
+
 static void console_init(void)
 {
 	static const char console_test[] = 
diff --git a/src/config/Options.lb b/src/config/Options.lb
index 0717d38..857dce8 100644
--- a/src/config/Options.lb
+++ b/src/config/Options.lb
@@ -815,6 +815,30 @@
 	comment "CK804 device count from 0 or 1"
 end
 
+define HT_CHAIN_UNITID_BASE
+	default 1
+	export always
+	comment "this will be first hypertransport device's unitid base, if sb ht chain only has one ht device, it could be 0"
+end
+
+define HT_CHAIN_END_UNITID_BASE
+        default 0x20
+        export always
+        comment "this will be unit id of the end of hypertransport chain (usually the real SB) if it is small than HT_CHAIN_UNITID_BASE, it could be 0"
+end
+
+define SB_HT_CHAIN_UNITID_OFFSET_ONLY
+        default 1
+        export always
+        comment "this will decided if only offset SB hypertransport chain"
+end
+
+define K8_SB_HT_CHAIN_ON_BUS0
+        default 0 
+        export always
+        comment "this will make SB hypertransport chain sit on bus 0"
+end
+
 define K8_HW_MEM_HOLE_SIZEK
         default 0
         export always
diff --git a/src/console/console.c b/src/console/console.c
index 7f97b5c..86ec26d 100644
--- a/src/console/console.c
+++ b/src/console/console.c
@@ -16,7 +16,7 @@
 	struct console_driver *driver;
 	if(get_option(&console_loglevel, "debug_level"))
 		console_loglevel=DEFAULT_CONSOLE_LOGLEVEL;
-
+	
 	for(driver = console_drivers; driver < econsole_drivers; driver++) {
 		if (!driver->init)
 			continue;
@@ -57,9 +57,10 @@
 	struct console_driver *driver;
 	if (!initialized)
 		return 0;
-	for(driver = console_drivers; driver < econsole_drivers; driver++)
+	for(driver = console_drivers; driver < econsole_drivers; driver++) {
 		if (driver->tst_byte)
 			break;
+	}
 	if (driver == econsole_drivers)
 		return 0;
 	while (!driver->tst_byte());
diff --git a/src/console/printk.c b/src/console/printk.c
index da330c9..01a52af 100644
--- a/src/console/printk.c
+++ b/src/console/printk.c
@@ -25,7 +25,7 @@
 void display(char*);
 extern int vtxprintf(void (*)(unsigned char), const char *, va_list);
 
-spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
 
 int do_printk(int msg_level, const char *fmt, ...)
 {
diff --git a/src/console/vga_console.c b/src/console/vga_console.c
index bdd56f3..e9756c8 100644
--- a/src/console/vga_console.c
+++ b/src/console/vga_console.c
@@ -15,11 +15,7 @@
 
 int vga_line, vga_col;
 
-#if CONFIG_CONSOLE_VGA == 1
-extern int vga_inited; // it will be changed in pci_rom.c
-#else
-int vga_inited = 0;
-#endif
+int vga_inited = 0; // it will be changed in pci_rom.c
 
 static int vga_console_inited = 0;
 
diff --git a/src/cpu/amd/car/clear_1m_ram.c b/src/cpu/amd/car/clear_1m_ram.c
index 85ba59c..80b215e 100644
--- a/src/cpu/amd/car/clear_1m_ram.c
+++ b/src/cpu/amd/car/clear_1m_ram.c
@@ -1,5 +1,7 @@
 /* by yhlu 6.2005 */
 /* be warned, this file will be used core 0/node 0 only */
+static inline __attribute__((always_inline)) void clear_1m_ram(void)
+{
         __asm__ volatile (
 
         /* disable cache */
@@ -51,3 +53,4 @@
 	"invd\n\t"
 
         );
+}
diff --git a/src/cpu/amd/car/copy_and_run.c b/src/cpu/amd/car/copy_and_run.c
index 89a864d..9c6508b 100644
--- a/src/cpu/amd/car/copy_and_run.c
+++ b/src/cpu/amd/car/copy_and_run.c
@@ -28,10 +28,18 @@
 #define GETBIT(bb, src, ilen) GETBIT_LE32(bb, src, ilen)
 #endif
 
+static inline void print_debug_cp_run(const char *strval, uint32_t val)
+{
+#if CONFIG_USE_INIT
+        printk_debug("%s%08x\r\n", strval, val);
+#else
+        print_debug(strval); print_debug_hex32(val); print_debug("\r\n");
+#endif
+}
+
 static void copy_and_run(unsigned cpu_reset)
 {
 	uint8_t *src, *dst; 
-	unsigned long dst_len;
         unsigned long ilen = 0, olen = 0, last_m_off =  1;
         uint32_t bb = 0;
         unsigned bc = 0;
@@ -44,9 +52,9 @@
 		"leal _iseg, %1\n\t"
 		"leal _eiseg, %2\n\t"
 		"subl %1, %2\n\t"
-		: "=a" (src), "=b" (dst), "=c" (dst_len)
+		: "=a" (src), "=b" (dst), "=c" (olen)
 	);
-	memcpy(src, dst, dst_len);
+	memcpy(src, dst, olen);
 #else 
 
         __asm__ volatile (
@@ -55,13 +63,10 @@
                 : "=a" (src) , "=b" (dst)
         );
 
-#if CONFIG_USE_INIT		
-	printk_debug("src=%08x\r\n",src); 
-	printk_debug("dst=%08x\r\n",dst);
-#else
-        print_debug("src="); print_debug_hex32(src); print_debug("\r\n");
-        print_debug("dst="); print_debug_hex32(dst); print_debug("\r\n");
-#endif
+	print_debug_cp_run("src=",(uint32_t)src); 
+	print_debug_cp_run("dst=",(uint32_t)dst);
+	
+//	dump_mem(src, src+0x100);
 
         for(;;) {
                 unsigned int m_off, m_len;
@@ -105,11 +110,9 @@
         }
 #endif
 //	dump_mem(dst, dst+0x100);
-#if CONFIG_USE_INIT
-	printk_debug("linxbios_ram.bin length = %08x\r\n", olen);
-#else
-	print_debug("linxbios_ram.bin length = "); print_debug_hex32(olen); print_debug("\r\n");
-#endif
+
+	print_debug_cp_run("linxbios_ram.bin length = ", olen);
+
 	print_debug("Jumping to LinuxBIOS.\r\n");
 
 	if(cpu_reset == 1 ) {
diff --git a/src/cpu/amd/car/disable_cache_as_ram.c b/src/cpu/amd/car/disable_cache_as_ram.c
index a699cae..06a558f 100644
--- a/src/cpu/amd/car/disable_cache_as_ram.c
+++ b/src/cpu/amd/car/disable_cache_as_ram.c
@@ -1,11 +1,12 @@
 /* by yhlu 6.2005 */
 /* be warned, this file will be used other cores and core 0 / node 0 */
+static inline __attribute__((always_inline)) void disable_cache_as_ram(void)
+{
         __asm__ volatile (
 	/* 
 	FIXME : backup stack in CACHE_AS_RAM into mmx and sse and after we get STACK up, we restore that.
 		It is only needed if we want to go back
 	*/
-	
         /* We don't need cache as ram for now on */
         /* disable cache */
         "movl    %cr0, %eax\n\t"
@@ -42,5 +43,5 @@
         "movl    %cr0, %eax\n\t"
         "andl    $0x9fffffff,%eax\n\t"
         "movl    %eax, %cr0\n\t"
-
         );
+}
diff --git a/src/cpu/amd/dualcore/Config.lb b/src/cpu/amd/dualcore/Config.lb
index dd8dd09..acc5d2e 100644
--- a/src/cpu/amd/dualcore/Config.lb
+++ b/src/cpu/amd/dualcore/Config.lb
@@ -1,5 +1 @@
-uses CONFIG_LOGICAL_CPUS
-
-if CONFIG_LOGICAL_CPUS
-	object amd_sibling.o
-end
+object amd_sibling.o
diff --git a/src/cpu/amd/dualcore/dualcore.c b/src/cpu/amd/dualcore/dualcore.c
index 3923891..e215842 100644
--- a/src/cpu/amd/dualcore/dualcore.c
+++ b/src/cpu/amd/dualcore/dualcore.c
@@ -1,31 +1,72 @@
 /* 2004.12 yhlu add dual core support */
 
+
+#ifndef SET_NB_CFG_54
+	#define SET_NB_CFG_54 1
+#endif
+
 #include "cpu/amd/dualcore/dualcore_id.c"
 
 static inline unsigned get_core_num_in_bsp(unsigned nodeid)
 {
-        return ((pci_read_config32(PCI_DEV(0, 0x18+nodeid, 3), 0xe8)>>12) & 3);
+	uint32_t dword;
+	dword = pci_read_config32(PCI_DEV(0, 0x18+nodeid, 3), 0xe8);
+	dword >>= 12;
+	dword &= 3;
+	return dword;
 }
 
-static inline uint8_t set_apicid_cpuid_lo(void) 
+#if SET_NB_CFG_54 == 1
+static inline uint8_t set_apicid_cpuid_lo(void)
 {
         if(is_cpu_pre_e0()) return 0; // pre_e0 can not be set
 
-
-        if(read_option(CMOS_VSTART_dual_core, CMOS_VLEN_dual_core, 0) != 0)  { // disable dual_core
-                return 0;
-        }
-
-                // set the NB_CFG[54]=1; why the OS will be happy with that ???
+        // set the NB_CFG[54]=1; why the OS will be happy with that ???
         msr_t msr;
         msr = rdmsr(NB_CFG_MSR);
         msr.hi |= (1<<(54-32)); // InitApicIdCpuIdLo
         wrmsr(NB_CFG_MSR, msr);
 
         return 1;
+}
+#else
 
+static inline void set_apicid_cpuid_lo(void) { }
+
+#endif
+
+static inline void real_start_other_core(unsigned nodeid)
+{
+	uint32_t dword;
+	// set PCI_DEV(0, 0x18+nodeid, 3), 0x44 bit 27 to redirect all MC4 accesses and error logging to core0
+	dword = pci_read_config32(PCI_DEV(0, 0x18+nodeid, 3), 0x44);
+	dword |= 1<<27; // NbMcaToMstCpuEn bit
+	pci_write_config32(PCI_DEV(0, 0x18+nodeid, 3), 0x44, dword);
+	// set PCI_DEV(0, 0x18+nodeid, 0), 0x68 bit 5 to start core1
+	dword = pci_read_config32(PCI_DEV(0, 0x18+nodeid, 0), 0x68);
+	dword |= 1<<5;
+	pci_write_config32(PCI_DEV(0, 0x18+nodeid, 0), 0x68, dword);
 }
 
+//it is running on core0 of node0
+static inline void start_other_cores(void)
+{
+	unsigned nodes;
+	unsigned nodeid;
+
+        if(read_option(CMOS_VSTART_dual_core, CMOS_VLEN_dual_core, 0) != 0)  { // disable dual_core
+                return;
+        }
+
+        nodes = get_nodes();
+
+        for(nodeid=0; nodeid<nodes; nodeid++) {
+		if( get_core_num_in_bsp(nodeid) > 0) {
+			real_start_other_core(nodeid);
+		}
+	}
+
+}
 #if USE_DCACHE_RAM == 0
 static void do_k8_init_and_stop_secondaries(void)
 {
@@ -62,7 +103,26 @@
 	pci_write_config32(dev_f0, 0x68, val);
 
 	/* Set the lapicid */
-	lapic_write(LAPIC_ID,(0x10 + id.coreid*0x10 + id.nodeid) << 24);
+        #if (ENABLE_APIC_EXT_ID == 1)
+                unsigned initial_apicid = get_initial_apicid();
+                #if LIFT_BSP_APIC_ID == 0
+                if( initial_apicid != 0 ) // other than bsp
+                #endif
+                {
+                                /* use initial apic id to lift it */
+                                uint32_t dword = lapic_read(LAPIC_ID);
+                                dword &= ~(0xff<<24);
+                                dword |= (((initial_apicid + APIC_ID_OFFSET) & 0xff)<<24);
+
+                                lapic_write(LAPIC_ID, dword);
+                }
+
+                #if LIFT_BSP_APIC_ID == 1
+                bsp_apicid += APIC_ID_OFFSET;
+                #endif
+
+        #endif
+
 
 	/* Remember the cpuid */
 	if (id.coreid == 0) {
diff --git a/src/cpu/amd/dualcore/dualcore_id.c b/src/cpu/amd/dualcore/dualcore_id.c
index a1a898a..3899697 100644
--- a/src/cpu/amd/dualcore/dualcore_id.c
+++ b/src/cpu/amd/dualcore/dualcore_id.c
@@ -1,26 +1,27 @@
 /* 2004.12 yhlu add dual core support */
 
 #include <arch/cpu.h>
+#include <cpu/amd/dualcore.h>
+#ifdef __ROMCC__
 #include <cpu/amd/model_fxx_msr.h>
+#endif
 
-static inline unsigned int read_nb_cfg_54(void)
+//called by bus_cpu_scan too
+unsigned int read_nb_cfg_54(void)
 {
         msr_t msr;
         msr = rdmsr(NB_CFG_MSR);
         return ( ( msr.hi >> (54-32)) & 1);
 }
 
-struct node_core_id {
-	unsigned nodeid:8;
-	unsigned coreid:8;
-};
-
-static inline unsigned get_initial_apicid(void)
+static inline unsigned get_initial_apicid(void) 
 {
 	return ((cpuid_ebx(1) >> 24) & 0xf);
 }
 
-static inline struct node_core_id get_node_core_id(unsigned nb_cfg_54) {
+//called by amd_siblings too
+struct node_core_id get_node_core_id(unsigned nb_cfg_54) 
+{
 	struct node_core_id id;
 	//    get the apicid via cpuid(1) ebx[27:24]
 	if( nb_cfg_54) {
@@ -45,6 +46,7 @@
 }
 
 static inline struct node_core_id get_node_core_id_x(void) {
-	return get_node_core_id( read_nb_cfg_54() ); 
+
+	return get_node_core_id( read_nb_cfg_54() ); // for pre_e0() nb_cfg_54 always be 0
 }
 
diff --git a/src/cpu/amd/model_fxx/apic_timer.c b/src/cpu/amd/model_fxx/apic_timer.c
index 5a81f91..8eeb32f 100644
--- a/src/cpu/amd/model_fxx/apic_timer.c
+++ b/src/cpu/amd/model_fxx/apic_timer.c
@@ -7,10 +7,13 @@
 {
 	/* Set the apic timer to no interrupts and periodic mode */
 	lapic_write(LAPIC_LVTT, (1 << 17)|(1<< 16)|(0 << 12)|(0 << 0));
+
 	/* Set the divider to 1, no divider */
 	lapic_write(LAPIC_TDCR, LAPIC_TDR_DIV_1);
+
 	/* Set the initial counter to 0xffffffff */
 	lapic_write(LAPIC_TMICT, 0xffffffff);
+
 }
 
 void udelay(unsigned usecs)
diff --git a/src/cpu/amd/model_fxx/fidvid.c b/src/cpu/amd/model_fxx/fidvid.c
new file mode 100644
index 0000000..8d1b84c
--- /dev/null
+++ b/src/cpu/amd/model_fxx/fidvid.c
@@ -0,0 +1,396 @@
+#if K8_SET_FIDVID == 1
+
+#define K8_SET_FIDVID_DEBUG 0 
+
+#define K8_SET_FIDVID_STORE_AP_APICID_AT_FIRST 1
+
+static inline void print_debug_fv(const char *str, unsigned val)
+{
+#if K8_SET_FIDVID_DEBUG == 1
+        #if CONFIG_USE_INIT==1
+        	printk_debug("%s%x\r\n", str, val);
+        #else
+                print_debug(str); print_debug_hex32(val); print_debug("\r\n");
+        #endif
+#endif
+}
+
+static inline void print_debug_fv_8(const char *str, unsigned val)
+{
+#if K8_SET_FIDVID_DEBUG == 1
+        #if CONFIG_USE_INIT==1
+                printk_debug("%s%02x\r\n", str, val);
+        #else
+                print_debug(str); print_debug_hex8(val); print_debug("\r\n");
+        #endif
+#endif
+}
+
+static inline void print_debug_fv_64(const char *str, unsigned val, unsigned val2)
+{
+#if K8_SET_FIDVID_DEBUG == 1
+        #if CONFIG_USE_INIT==1
+                printk_debug("%s%x%x\r\n", str, val, val2);
+        #else
+                print_debug(str); print_debug_hex32(val); print_debug_hex32(val2); print_debug("\r\n");
+        #endif
+#endif
+}
+
+
+static void enable_fid_change(void)
+{
+	uint32_t dword;
+	unsigned nodes;
+	int i;
+
+	nodes = ((pci_read_config32(PCI_DEV(0, 0x18, 0), 0x60)>>4) & 7) + 1;
+
+	for(i=0; i<nodes; i++) {
+		dword = pci_read_config32(PCI_DEV(0, 0x18+i, 3), 0xd8);
+		dword &= 0x8ff00000;
+		dword |= (2<<28) | (0x02710);
+	        pci_write_config32(PCI_DEV(0, 0x18+i, 3), 0xd8, dword);
+
+		dword = 0x04e2a707;
+		pci_write_config32(PCI_DEV(0, 0x18+i, 3), 0xd4, dword);
+
+		dword = pci_read_config32(PCI_DEV(0, 0x18+i, 2), 0x94);
+		dword |= (1<<14);// disable the DRAM interface at first, it will be enabled by raminit again
+		pci_write_config32(PCI_DEV(0, 0x18+i, 2), 0x94, dword);
+
+                dword = 0x23070000; //enable FID/VID change
+//		dword = 0x00070000; //enable FID/VID change
+                pci_write_config32(PCI_DEV(0, 0x18+i, 3), 0x80, dword);
+
+                dword = 0x00132113;
+                pci_write_config32(PCI_DEV(0, 0x18+i, 3), 0x84, dword);
+
+	}
+}
+
+static unsigned set_fidvid(unsigned apicid, unsigned fidvid, int showmessage)
+{
+	//for (cur, new) there is one <1600MHz x8 to find out next_fid 
+	const static uint8_t next_fid_a[] = {
+	/* x4  x5  x6  x7  x8  x9 x10 x11 x12 x13 x14 x15 */ // 0:x4, 2:x5....BASE=4, MIN=4, MAX=25, INC=2 result = (xX-BASE)*INC
+/* x4 */    0,  9,  9,  8,  9,  9,  9,  9,  9,  9,  9,  9,
+/* x5 */    9,  0, 11, 11,  9,  9, 10, 11, 11, 11, 11, 11, 
+/* x6 */   11, 11,  0, 13, 11, 11, 11, 11, 12, 13, 13, 13,
+/* x7 */   13, 13, 13,  0, 13, 13, 13, 13, 13, 13, 14, 15,
+/* x8 */    4,  9,  9,  9,  0,  9,  9,  9,  9,  9,  9,  9,
+/* x9 */    4,  5, 10, 10,  8,  0,  0,  0,  0,  0,  0,  0, 
+/*x10 */    9,  5, 11, 11,  9,  0,  0,  0,  0,  0,  0,  0, 
+/*x11 */   10,  5,  6, 12, 10,  0,  0,  0,  0,  0,  0,  0,
+/*x12 */   11, 11,  6, 13, 11,  0,  0,  0,  0,  0,  0,  0,
+/*x13 */   12, 12,  6,  7, 12,  0,  0,  0,  0,  0,  0,  0,
+/*x14 */   13, 13, 13,  7, 13,  0,  0,  0,  0,  0,  0,  0,
+/*x15 */   14, 14, 14,  7, 14,  0,  0,  0,  0,  0,  0,  0, 
+	};
+
+        msr_t msr;
+        uint32_t vid;
+        uint32_t fid;
+	uint32_t vid_max;
+	uint32_t fid_max;
+        uint32_t vid_cur;
+        uint32_t fid_cur;
+	unsigned apicidx;
+
+	int steps;
+	int loop;
+
+	apicidx = lapicid();
+
+	if(apicid!=apicidx) {
+#if CONFIG_USE_INIT == 1
+		printk_err("wrong apicid, we want change %x, but it is %x\r\n", apicid, apicidx);
+#else
+		print_err("wrong apicid, we want change "); print_err_hex8(apicid); print_err(" but it is "); print_err_hex8(apicidx); print_err("\r\n");
+#endif
+		return fidvid;	
+	}
+
+	fid = (fidvid >> 8) & 0x3f;
+	vid = (fidvid >> 16) & 0x3f;
+
+	msr = rdmsr(0xc0010042);
+
+        vid_cur = msr.hi & 0x3f;
+        fid_cur = msr.lo & 0x3f;
+
+	if((vid_cur==vid) && (fid_cur==fid)) return fidvid; 
+
+        vid_max = (msr.hi>>(48-32)) & 0x3f;
+        fid_max = (msr.lo>>16) & 0x3f;
+
+        //set vid to max
+        msr.hi = 1;
+        msr.lo = (vid_max<<8) | (fid_cur);
+        msr.lo |= (1<<16); // init changes
+        wrmsr(0xc0010041, msr);
+
+        for(loop=0;loop<100000;loop++){
+		msr = rdmsr(0xc0010042);
+                if(!(msr.lo & (1<<31))) break;
+        }
+	vid_cur = msr.hi & 0x3f;
+
+	steps = 8; //??
+        while((fid_cur!=fid) && (steps-->0)) {
+		uint32_t fid_temp;	
+		if((fid_cur > (8-4)*2) && (fid> (8-4)*2)) {
+	        	if(fid_cur<fid) {
+        	        	fid_temp = fid_cur + 2;
+       	                } else {
+               	                fid_temp = fid_cur - 2;
+                       	}
+		}
+		else { //there is one < 8, So we need to lookup the table to find the fid_cur
+			int temp;
+			temp = next_fid_a[(fid_cur/2)*12+(fid/2)];
+			if(temp <= 0) break;
+			fid_temp = (temp-4) * 2;
+		}
+		if(fid_temp>fid_max) break;
+
+		fid_cur = fid_temp;
+
+	        //set target fid
+        	msr.hi = (100000/5);
+	        msr.lo = (vid_cur<<8) | fid_cur;
+	        msr.lo |= (1<<16); // init changes
+	        wrmsr(0xc0010041, msr);
+
+
+#if K8_SET_FIDVID_DEBUG == 1
+		if(showmessage) {
+			print_debug_fv_8("\tapicid in set_fidvid = ", apicid);
+			print_debug_fv_64("ctrl msr fid, vid ", msr.hi, msr.lo); 
+		}
+#endif
+
+	        for(loop=0;loop<100000;loop++){
+        		msr = rdmsr(0xc0010042);
+	                if(!(msr.lo & (1<<31))) break;
+        	}
+	        fid_cur = msr.lo & 0x3f;
+
+#if K8_SET_FIDVID_DEBUG == 1
+		if(showmessage)	{	
+                        print_debug_fv_64("status msr fid, vid ", msr.hi, msr.lo);
+		}
+#endif
+	}
+
+        //set vid to final 
+        msr.hi = 1;
+        msr.lo = (vid<<8) | (fid_cur);
+        msr.lo |= (1<<16); // init changes
+        wrmsr(0xc0010041, msr);
+
+        for(loop=0;loop<100000;loop++){
+                msr = rdmsr(0xc0010042);
+                if(!(msr.lo & (1<<31))) break;
+        }
+        vid_cur = msr.hi & 0x3f;
+
+	fidvid = (vid_cur<< 16) | (fid_cur<<8);
+
+	if(showmessage) {
+		if((fid!=fid_cur) || (vid!=vid_cur)) {
+			print_err("set fidvid failed\r\n");
+		}
+	}
+
+	return fidvid;
+
+}
+
+static void init_fidvid_ap(unsigned bsp_apicid, unsigned apicid)
+{
+
+	uint32_t send;
+        uint32_t readback;
+	msr_t msr;
+        uint32_t vid_cur;
+        uint32_t fid_cur;
+	int loop;
+
+        msr =  rdmsr(0xc0010042);
+        send = ((msr.lo>>16) & 0x3f) << 8; //max fid
+        send |= ((msr.hi>>(48-32)) & 0x3f) << 16; //max vid
+	send |= (apicid<<24); // ap apicid
+
+        vid_cur = msr.hi & 0x3f;
+        fid_cur = msr.lo & 0x3f;
+
+	// set to current
+        msr.hi = 1;
+        msr.lo = (vid_cur<<8) | (fid_cur);
+        wrmsr(0xc0010041, msr);
+
+	wait_cpu_state(bsp_apicid, 1);
+        //send signal to BSP about this AP max fid and vid
+        lapic_write(LAPIC_MSG_REG, send | 1); //AP at state 1 that sent our fid and vid
+
+//	wait_cpu_state(bsp_apicid, 2);// don't need we can use apicid directly	
+	loop = 100000;
+        while(--loop>0) {
+		//remote read BSP signal that include vid and fid that need to set
+                if(lapic_remote_read(bsp_apicid, LAPIC_MSG_REG, &readback)!=0) continue;
+                if(((readback>>24) & 0xff) == apicid) break; // it is this cpu turn
+        }
+
+	if(loop>0) {
+       		readback = set_fidvid(apicid, readback & 0xffff00, 1); // this AP
+       		//send signal to BSP that this AP fid/vid is set // allow to change state2 is together with apicid
+	        send = (apicid<<24) | (readback & 0x00ffff00); // AP at state that We set the requested fid/vid
+	}
+
+       	lapic_write(LAPIC_MSG_REG, send | 2);
+
+	wait_cpu_state(bsp_apicid, 3);
+}
+
+static unsigned calc_common_fidvid(unsigned fidvid, unsigned fidvidx)
+{
+	/* FIXME: need to check the change path to verify if it is reachable when common fid is small than 1.6G */	
+	if((fidvid & 0xff00)<=(fidvidx & 0xff00)) {
+		return fidvid;
+	}
+	else {
+		return fidvidx;
+	}
+} 
+
+struct fidvid_st {
+	unsigned common_fidvid;
+};
+
+static void init_fidvid_bsp_stage1(unsigned ap_apicid, void *gp ) 
+{		
+		unsigned readback;
+
+		struct fidvid_st *fvp = gp;
+		int loop;
+		
+                print_debug_fv("state 1: ap_apicid=", ap_apicid);
+
+		loop = 100000;
+                while(--loop > 0) {
+  	              if(lapic_remote_read(ap_apicid, LAPIC_MSG_REG, &readback)!=0) continue;
+                      if((readback & 0xff) == 1) break; //target ap is in stage 1 
+                }
+
+                print_debug_fv("\treadback=", readback);
+
+                fvp->common_fidvid = calc_common_fidvid(fvp->common_fidvid, readback & 0xffff00);
+
+                print_debug_fv("\tcommon_fidvid=", fvp->common_fidvid);
+
+}
+static void init_fidvid_bsp_stage2(unsigned ap_apicid, void *gp)
+{
+		unsigned readback;
+
+		struct fidvid_st *fvp = gp;
+		int loop;
+
+                print_debug_fv("state 2: ap_apicid=", ap_apicid);
+
+                lapic_write(LAPIC_MSG_REG, fvp->common_fidvid | (ap_apicid<<24) | 2); // all set to state2
+		
+		loop = 100000;	
+                while(--loop > 0) {
+                	if(lapic_remote_read(ap_apicid, LAPIC_MSG_REG, &readback)!=0) continue;
+                        if((readback & 0xff) == 2) break; // target ap is stage 2, and it'd FID has beed set
+                }
+
+                print_debug_fv("\treadback=", readback);
+}
+
+#if K8_SET_FIDVID_STORE_AP_APICID_AT_FIRST == 1
+struct ap_apicid_st {
+	unsigned num;
+	unsigned apicid[16]; // 8 way dual core need 16
+	/* FIXME: 32 node quad core, may need 128 */
+};
+
+static void store_ap_apicid(unsigned ap_apicid, void *gp)
+{
+	struct ap_apicid_st *p = gp;
+	
+	p->apicid[p->num++] = ap_apicid;
+
+}
+#endif
+
+static void init_fidvid_bsp(unsigned bsp_apicid) 
+{
+        uint32_t vid_max;
+        uint32_t fid_max;
+
+	struct fidvid_st fv;
+
+#if K8_SET_FIDVID_STORE_AP_APICID_AT_FIRST == 1
+	struct ap_apicid_st ap_apicidx;
+	unsigned i;
+#endif
+
+	
+        msr_t msr;
+        msr =  rdmsr(0xc0010042);
+        fid_max = ((msr.lo>>16) & 0x3f); //max fid
+        vid_max = ((msr.hi>>(48-32)) & 0x3f); //max vid
+	fv.common_fidvid = (fid_max<<8)|(vid_max<<16);
+
+
+        // for all APs (We know the APIC ID of all APs even the APIC ID is lifted)
+        // remote read from AP about max fid/vid
+
+	//let all ap trains to state 1
+	lapic_write(LAPIC_MSG_REG,  (bsp_apicid<<24) | 1);  
+
+        // calculate the common max fid/vid that could be used for all APs and BSP
+#if K8_SET_FIDVID_STORE_AP_APICID_AT_FIRST == 1
+	ap_apicidx.num = 0;
+	
+	for_each_ap(bsp_apicid, K8_SET_FIDVID_CORE0_ONLY, store_ap_apicid, &ap_apicidx);
+
+	for(i=0;i<ap_apicidx.num;i++) {
+		init_fidvid_bsp_stage1(ap_apicidx.apicid[i], &fv);
+	}
+#else
+	for_each_ap(bsp_apicid, K8_SET_FIDVID_CORE0_ONLY, init_fidvid_bsp_stage1, &fv);
+#endif
+
+
+        // set BSP fid and vid
+	print_debug_fv("bsp apicid=", bsp_apicid);
+	fv.common_fidvid = set_fidvid(bsp_apicid, fv.common_fidvid, 1);
+        print_debug_fv("common_fidvid=", fv.common_fidvid);
+
+
+        //for all APs ( We know the APIC ID of all AP even the APIC ID is lifted)
+        // send signal to the AP it could change it's fid/vid
+        // remote read singnal from AP that AP is done
+
+        fv.common_fidvid &= 0xffff00;
+
+	//set state 2 allow is in init_fidvid_bsp_stage2
+#if K8_SET_FIDVID_STORE_AP_APICID_AT_FIRST == 1
+        for(i=0;i<ap_apicidx.num;i++) {
+                init_fidvid_bsp_stage2(ap_apicidx.apicid[i], &fv);
+        }
+#else
+	for_each_ap(bsp_apicid, K8_SET_FIDVID_CORE0_ONLY, init_fidvid_bsp_stage2, &fv);
+#endif
+
+	lapic_write(LAPIC_MSG_REG, fv.common_fidvid | (bsp_apicid<<24) | 3); // clear the state
+
+	//here  wait a while, so last ap could read pack, and stop it, don't call init_timer too early or just don't use init_timer
+
+}
+
+#endif
diff --git a/src/cpu/amd/model_fxx/init_cpus.c b/src/cpu/amd/model_fxx/init_cpus.c
index 9e1772e..718a0f6 100644
--- a/src/cpu/amd/model_fxx/init_cpus.c
+++ b/src/cpu/amd/model_fxx/init_cpus.c
@@ -1,4 +1,13 @@
 //it takes the ENABLE_APIC_EXT_ID and APIC_ID_OFFSET and LIFT_BSP_APIC_ID
+#ifndef K8_SET_FIDVID
+	#define K8_SET_FIDVID 0
+	
+#endif
+
+#ifndef K8_SET_FIDVID_CORE0_ONLY
+	/* MSR FIDVID_CTL and FIDVID_STATUS are shared by cores, so may don't need to do twice*/
+       	#define K8_SET_FIDVID_CORE0_ONLY 1
+#endif
 
 typedef void (*process_ap_t)(unsigned apicid, void *gp);
 
@@ -100,6 +109,10 @@
 #define LAPIC_MSG_REG 0x380
 
 
+#if K8_SET_FIDVID == 1
+static void init_fidvid_ap(unsigned bsp_apicid, unsigned apicid);
+#endif
+
 static inline __attribute__((always_inline)) void print_apicid_nodeid_coreid(unsigned apicid, struct node_core_id id, const char *str)
 {
 	#if CONFIG_USE_INIT == 0
@@ -139,7 +152,11 @@
 }
 
 
+#if RAMINIT_SYSINFO == 1
+static unsigned init_cpus(unsigned cpu_init_detectedx ,struct sys_info *sysinfo)
+#else
 static unsigned init_cpus(unsigned cpu_init_detectedx)
+#endif
 {
 		unsigned bsp_apicid = 0;
 		unsigned apicid;
@@ -193,7 +210,7 @@
 			if (id.nodeid!=0) //all core0 except bsp
 				print_apicid_nodeid_coreid(apicid, id, " core0: ");
 		}
-	#if 1 
+	#if 0 
                 else { //all core1
 			print_apicid_nodeid_coreid(apicid, id, " core1: ");
                 }
@@ -202,11 +219,20 @@
 #endif
 
                 if (cpu_init_detectedx) {
+		#if RAMINIT_SYSINFO == 1
+			//We need to init sblnk and sbbusn, because it is called before ht_setup_chains_x
+		        sysinfo->sblnk = get_sblnk();
+			sysinfo->sbbusn = node_link_to_bus(0, sysinfo->sblnk);
+		#endif
 			print_apicid_nodeid_coreid(apicid, id, "\r\n\r\n\r\nINIT detect from ");
 
 			print_debug("\r\nIssuing SOFT_RESET...\r\n");
 
+			#if RAMINIT_SYSINFO == 1
+                        soft_reset(sysinfo);
+			#else
 			soft_reset();
+			#endif
 
                 }
 
@@ -219,6 +245,13 @@
 		lapic_write(LAPIC_MSG_REG, (apicid<<24) | 0x33); // mark the cpu is started
 
 		if(apicid != bsp_apicid) {
+	#if K8_SET_FIDVID == 1
+		#if (CONFIG_LOGICAL_CPUS == 1) && (K8_SET_FIDVID_CORE0_ONLY == 1)
+			if(id.coreid == 0 ) // only need set fid for core0
+		#endif 
+       		                init_fidvid_ap(bsp_apicid, apicid);
+	#endif
+
                         // We need to stop the CACHE as RAM for this CPU, really?
 			wait_cpu_state(bsp_apicid, 0x44);
 			lapic_write(LAPIC_MSG_REG, (apicid<<24) | 0x44); // bsp can not check it before stop_this_cpu
diff --git a/src/cpu/x86/16bit/reset16.lds b/src/cpu/x86/16bit/reset16.lds
index f32597c..0ba54c9 100644
--- a/src/cpu/x86/16bit/reset16.lds
+++ b/src/cpu/x86/16bit/reset16.lds
@@ -5,7 +5,8 @@
 
 SECTIONS {
 	/* Trigger an error if I have an unuseable start address */
-	_ROMTOP = (_start >= 0xffff0000) ? 0xfffffff0 : 0xfffffff8;
+	_bogus = ASSERT(_start >= 0xffff0000, "_start to low please decrease ROM_IMAGE_SIZE");
+	_ROMTOP = 0xfffffff0;
 	. = _ROMTOP;
 	.reset . : {
 		*(.reset)
diff --git a/src/cpu/x86/32bit/entry32.lds b/src/cpu/x86/32bit/entry32.lds
index 37a75ba..e69de29 100644
--- a/src/cpu/x86/32bit/entry32.lds
+++ b/src/cpu/x86/32bit/entry32.lds
@@ -1,14 +0,0 @@
-/*
-	_cache_ram_seg_base = DEFINED(CACHE_RAM_BASE)? CACHE_RAM_BASE - _rodata : 0;
-	_cache_ram_seg_base_low    = (_cache_ram_seg_base) & 0xffff;
-	_cache_ram_seg_base_middle = (_cache_ram_seg_base >> 16) & 0xff;
-	_cache_ram_seg_base_high   = (_cache_ram_seg_base >> 24) & 0xff;
-
-	_rom_code_seg_base =  _ltext - _text;
-	_rom_code_seg_base_low    = (_rom_code_seg_base) & 0xffff;
-	_rom_code_seg_base_middle = (_rom_code_seg_base >> 16) & 0xff;
-	_rom_code_seg_base_high   = (_rom_code_seg_base >> 24) & 0xff;
-*/
-
-
-
diff --git a/src/cpu/x86/lapic/lapic_cpu_init.c b/src/cpu/x86/lapic/lapic_cpu_init.c
index 7f34879..4e7e696 100644
--- a/src/cpu/x86/lapic/lapic_cpu_init.c
+++ b/src/cpu/x86/lapic/lapic_cpu_init.c
@@ -227,24 +227,19 @@
 }
 
 /* C entry point of secondary cpus */
-
-// secondary_cpu_lock is used to serialize initialization of secondary CPUs
-// This can be used to avoid interleaved debugging messages.
-
-static spinlock_t secondary_cpu_lock = SPIN_LOCK_UNLOCKED;
-
 void secondary_cpu_init(void)
 {
 	atomic_inc(&active_cpus);
-
 #if SERIAL_CPU_INIT == 1
-	spin_lock(&secondary_cpu_lock);
+  #if CONFIG_MAX_CPUS>2
+	spin_lock(&start_cpu_lock);
+  #endif
 #endif
-
 	cpu_initialize();
-
 #if SERIAL_CPU_INIT == 1
-	spin_unlock(&secondary_cpu_lock);
+  #if CONFIG_MAX_CPUS>2
+	spin_unlock(&start_cpu_lock);
+  #endif
 #endif
 
 	atomic_dec(&active_cpus);
@@ -260,12 +255,15 @@
 		if (cpu->path.type != DEVICE_PATH_APIC) {
 			continue;
 		}
+
 		if (!cpu->enabled) {
 			continue;
 		}
+
 		if (cpu->initialized) {
 			continue;
 		}
+
 		if (!start_cpu(cpu)) {
 			/* Record the error in cpu? */
 			printk_err("CPU  %u would not start!\n",
diff --git a/src/devices/device.c b/src/devices/device.c
index 303a669..4ca469b 100644
--- a/src/devices/device.c
+++ b/src/devices/device.c
@@ -369,7 +369,6 @@
 }
 
 #if CONFIG_CONSOLE_VGA == 1
-
 device_t vga_pri = 0;
 static void allocate_vga_resource(void)
 {
@@ -377,31 +376,52 @@
 #warning "This function knows to much about PCI stuff, it should be just a ietrator/visitor."
 
 	/* FIXME handle the VGA pallette snooping */
-	struct device *dev, *vga, *vga_onboard;
+	struct device *dev, *vga, *vga_onboard, *vga_first, *vga_last;
 	struct bus *bus;
 	bus = 0;
 	vga = 0;
 	vga_onboard = 0;
+	vga_first = 0;
+	vga_last = 0;
 	for(dev = all_devices; dev; dev = dev->next) {
 		if (!dev->enabled) continue;
 		if (((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) &&
 			((dev->class >> 8) != PCI_CLASS_DISPLAY_OTHER)) 
 		{
-			if (!vga) {
-				if (dev->on_mainboard) {
-					vga_onboard = dev;
-				} else {
-					vga = dev;
-				}
-			}
+                        if (!vga_first) {
+                                if (dev->on_mainboard) {
+                                        vga_onboard = dev;
+                                } else {
+                                        vga_first = dev;
+                                }
+                        } else {
+                                if (dev->on_mainboard) {
+                                        vga_onboard = dev;
+                                } else {
+                                        vga_last = dev;
+                                }
+                        }
+
 			/* It isn't safe to enable other VGA cards */
 			dev->command &= ~(PCI_COMMAND_MEMORY | PCI_COMMAND_IO);
 		}
 	}
 	
-	if (!vga) {
-		vga = vga_onboard;
-	}
+        vga = vga_last;
+
+        if(!vga) {
+                vga = vga_first;
+        }
+
+#if 1
+        if (vga_onboard) // will use on board vga as pri
+#else
+        if (!vga) // will use last add on adapter as pri
+#endif
+        {
+                vga = vga_onboard;
+        }
+
 	
 	if (vga) {
 		/* vga is first add on card or the only onboard vga */
@@ -419,6 +439,7 @@
 		bus = (bus == bus->dev->bus)? 0 : bus->dev->bus;
 	} 
 }
+
 #endif
 
 
@@ -499,7 +520,6 @@
  */
 int reset_bus(struct bus *bus)
 {
-	device_t dev;
 	if (bus && bus->dev && bus->dev->ops && bus->dev->ops->reset_bus)
 	{
 		bus->dev->ops->reset_bus(bus);
diff --git a/src/devices/hypertransport.c b/src/devices/hypertransport.c
index a30c8f6..fd34708 100644
--- a/src/devices/hypertransport.c
+++ b/src/devices/hypertransport.c
@@ -1,3 +1,9 @@
+/*
+	2005.11 yhlu add let the real sb to use small uintid
+
+*/
+
+
 #include <bitops.h>
 #include <console/console.h>
 #include <device/device.h>
@@ -11,7 +17,7 @@
 #define OPT_HT_LINK 0
         
 #if OPT_HT_LINK == 1
-#include "../northbridge/amd/amdk8/cpu_rev.c"
+#include <cpu/amd/model_fxx_rev.h>
 #endif
 
 static device_t ht_scan_get_devs(device_t *old_devices)
@@ -71,12 +77,13 @@
 	}
 	/* AMD K8 Unsupported 1Ghz? */
 	if ((dev->vendor == PCI_VENDOR_ID_AMD) && (dev->device == 0x1100)) {
-#if K8_HT_FREQ_1G_SUPPORT == 1  
-		if (is_cpu_pre_e0()) 
-#endif
-		{
+#if K8_HT_FREQ_1G_SUPPORT == 1 
+		if (is_cpu_pre_e0()) { // only e0 later suupport 1GHz HT
 			freq_cap &= ~(1 << HT_FREQ_1000Mhz);
-		}
+		} 
+#else
+		freq_cap &= ~(1 << HT_FREQ_1000Mhz);
+#endif
 
 	}
 	return freq_cap;
@@ -248,7 +255,7 @@
 	return pos;
 }
 
-static void ht_collapse_early_enumeration(struct bus *bus)
+static void ht_collapse_early_enumeration(struct bus *bus, unsigned offset_unitid)
 {
 	unsigned int devfn;
 	struct ht_link prev;
@@ -275,6 +282,26 @@
 		}
 	} while((ctrl & (1 << 5)) == 0);
 
+	        //actually, only for one HT device HT chain, and unitid is 0
+#if HT_CHAIN_UNITID_BASE == 0
+        if(offset_unitid) {
+                return;
+        }
+#endif
+
+        /* Check if is already collapsed */
+        if((!offset_unitid)|| (offset_unitid && (!((HT_CHAIN_END_UNITID_BASE == 0) && (HT_CHAIN_END_UNITID_BASE <HT_CHAIN_UNITID_BASE))))) {
+                struct device dummy;
+                uint32_t id;
+                dummy.bus              = bus;
+                dummy.path.type        = DEVICE_PATH_PCI;
+                dummy.path.u.pci.devfn = PCI_DEVFN(0, 0);
+                id = pci_read_config32(&dummy, PCI_VENDOR_ID);
+                if ( ! ( (id == 0xffffffff) || (id == 0x00000000) ||
+                    (id == 0x0000ffff) || (id == 0xffff0000) ) ) {
+                             return;
+                }
+        }
 
 	/* Spin through the devices and collapse any early
 	 * hypertransport enumeration.
@@ -309,15 +336,25 @@
 }
 
 unsigned int hypertransport_scan_chain(struct bus *bus, 
-	unsigned min_devfn, unsigned max_devfn, unsigned int max)
+	unsigned min_devfn, unsigned max_devfn, unsigned int max, unsigned offset_unitid)
 {
+	//even HT_CHAIN_UNITID_BASE == 0, we still can go through this function, because of end_of_chain check, also We need it to optimize link
 	unsigned next_unitid, last_unitid;
 	device_t old_devices, dev, func;
-	unsigned min_unitid = 1;
+	unsigned min_unitid = (offset_unitid) ? HT_CHAIN_UNITID_BASE:1;
 	struct ht_link prev;
+	device_t last_func = 0;
+
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+        //let't record the device of last ht device, So we can set the Unitid to HT_CHAIN_END_UNITID_BASE
+        unsigned real_last_unitid; 
+        uint8_t real_last_pos;
+	device_t real_last_dev;
+	int ht_dev_num = 0;
+#endif
 
 	/* Restore the hypertransport chain to it's unitialized state */
-	ht_collapse_early_enumeration(bus);
+	ht_collapse_early_enumeration(bus, offset_unitid);
 
 	/* See which static device nodes I have */
 	old_devices = bus->children;
@@ -405,6 +442,7 @@
 			func->path.u.pci.devfn += (next_unitid << 3);
 			static_count = (func->path.u.pci.devfn >> 3) 
 				- (dev->path.u.pci.devfn >> 3) + 1;
+			last_func = func;
 		}
 
 		/* Compute the number of unitids consumed */
@@ -416,6 +454,14 @@
 		}
 
 		/* Update the Unitid of the next device */
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+		if(offset_unitid) {
+	                real_last_unitid = next_unitid;
+        	        real_last_pos = pos;
+			real_last_dev = dev;
+			ht_dev_num++;
+		}
+#endif
 		next_unitid += count;
 
 		/* Setup the hypetransport link */
@@ -442,6 +488,26 @@
 		printk_debug("HyperT reset not needed\n");
 	}
 #endif
+
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+        if(offset_unitid && (ht_dev_num>0)) {
+                uint16_t flags;
+                int i;
+		device_t last_func = 0;
+                flags = pci_read_config16(real_last_dev, real_last_pos + PCI_CAP_FLAGS);
+                flags &= ~0x1f;
+                flags |= HT_CHAIN_END_UNITID_BASE & 0x1f;
+                pci_write_config16(real_last_dev, real_last_pos + PCI_CAP_FLAGS, flags);
+
+                for(func = real_last_dev; func; func = func->sibling) {
+                        func->path.u.pci.devfn -= ((real_last_unitid - HT_CHAIN_END_UNITID_BASE) << 3);
+			last_func = func;
+                }
+
+                next_unitid = real_last_unitid;
+        }
+#endif
+
 	if (next_unitid > 0x1f) {
 		next_unitid = 0x1f;
 	}
@@ -454,13 +520,15 @@
 		for(left = old_devices; left; left = left->sibling) {
 			printk_debug("%s\n", dev_path(left));
 		}
-		die("Left over static devices.  Check your Config.lb\n");
+		printk_err("HT: Left over static devices.  Check your Config.lb\n");
+		if(last_func  && !last_func->sibling) // put back the left over static device, and let pci_scan_bus disable it
+			last_func->sibling = old_devices; 
 	}
-	
+
 	/* Now that nothing is overlapping it is safe to scan the
 	 * children. 
 	 */
-	max = pci_scan_bus(bus, 0x00, (next_unitid << 3)|7, max);
+	max = pci_scan_bus(bus, 0x00, (next_unitid << 3)|7, max); 
 	return max; 
 }
 
diff --git a/src/devices/pci_device.c b/src/devices/pci_device.c
index f3f53f0..a002d1c 100644
--- a/src/devices/pci_device.c
+++ b/src/devices/pci_device.c
@@ -1043,7 +1043,7 @@
 		for(left = old_devices; left; left = left->sibling) {
 			printk_debug("%s\n", dev_path(left));
 		}
-		die("Left over static devices.  Check your Config.lb\n");
+		die("PCI: Left over static devices.  Check your Config.lb\n");
 	}
 
 	/* For all children that implement scan_bus (i.e. bridges)
diff --git a/src/devices/pci_rom.c b/src/devices/pci_rom.c
index 64a85bb..fbc6130 100644
--- a/src/devices/pci_rom.c
+++ b/src/devices/pci_rom.c
@@ -22,14 +22,15 @@
 	}
 
 	printk_debug("rom address for %s = %x\n", dev_path(dev), rom_address);
+	
+	if(!dev->on_mainboard) {
+		/* enable expansion ROM address decoding */
+		pci_write_config32(dev, PCI_ROM_ADDRESS,
+				   rom_address|PCI_ROM_ADDRESS_ENABLE);
+	}
 
-	/* enable expansion ROM address decoding */
-	pci_write_config32(dev, PCI_ROM_ADDRESS,
-			   rom_address|PCI_ROM_ADDRESS_ENABLE);
-
-	rom_header = (struct rom_header *) rom_address;
-	printk_spew("PCI Expansion ROM, signature 0x%04x, \n\t"
-		    "INIT size 0x%04x, data ptr 0x%04x\n",
+	rom_header = (struct rom_header *)rom_address;
+	printk_spew("PCI Expansion ROM, signature 0x%04x, INIT size 0x%04x, data ptr 0x%04x\n",
 		    le32_to_cpu(rom_header->signature),
 		    rom_header->size * 512, le32_to_cpu(rom_header->data));
 	if (le32_to_cpu(rom_header->signature) != PCI_ROM_HDR) {
@@ -38,7 +39,7 @@
 		return NULL;
 	}
 
-	rom_data = (struct pci_data *) ((unsigned char *) rom_header + le32_to_cpu(rom_header->data));
+	rom_data = (unsigned char *) rom_header + le32_to_cpu(rom_header->data);
 	printk_spew("PCI ROM Image, Vendor %04x, Device %04x,\n",
 		    rom_data->vendor, rom_data->device);
 	if (dev->vendor != rom_data->vendor || dev->device != rom_data->device) {
@@ -51,7 +52,7 @@
 		    rom_data->class_hi, rom_data->class_lo,
 		    rom_data->type);
 	if (dev->class != ((rom_data->class_hi << 8) | rom_data->class_lo)) {
-		printk_err("Class Code mismatch ROM %08x, dev %08x\n", 
+		printk_debug("Class Code mismatch ROM %08x, dev %08x\n", 
 			    (rom_data->class_hi << 8) | rom_data->class_lo, dev->class);
 		//return NULL;
 	}
@@ -59,12 +60,14 @@
 	return rom_header;
 }
 
-static void *pci_ram_image_start = PCI_RAM_IMAGE_START;
+static void *pci_ram_image_start = (void *)PCI_RAM_IMAGE_START;
 
 #if CONFIG_CONSOLE_VGA == 1
-int vga_inited = 0;		// used by vga_console.c 
+extern int vga_inited;		// defined in vga_console.c 
+#if CONFIG_CONSOLE_VGA_MULTI == 0
 extern device_t vga_pri;	// the primary vga device, defined in device.c
 #endif
+#endif
 
 struct rom_header *pci_rom_load(struct device *dev, struct rom_header *rom_header)
 {
@@ -76,8 +79,8 @@
 	rom_address = pci_read_config32(dev, PCI_ROM_ADDRESS);
 
 	do {
-		rom_header = (struct rom_header *) ((unsigned char *) rom_header + image_size); // get next image
-	        rom_data = (struct pci_data *) ((unsigned char *) rom_header + le32_to_cpu(rom_header->data));
+		rom_header = (unsigned char *) rom_header + image_size; // get next image
+	        rom_data = (unsigned char *) rom_header + le32_to_cpu(rom_header->data);
         	image_size = le32_to_cpu(rom_data->ilen) * 512;
 	} while ((rom_data->type!=0) && (rom_data->indicator!=0));  // make sure we got x86 version
 
@@ -87,7 +90,9 @@
 
 	if (PCI_CLASS_DISPLAY_VGA == rom_data->class_hi) {
 #if CONFIG_CONSOLE_VGA == 1
+	#if CONFIG_CONSOLE_VGA_MULTI == 0
 		if (dev != vga_pri) return NULL; // only one VGA supported
+	#endif
 		printk_debug("copying VGA ROM Image from %x to %x, %x bytes\n",
 			    rom_header, PCI_VGA_RAM_IMAGE_START, rom_size);
 		memcpy(PCI_VGA_RAM_IMAGE_START, rom_header, rom_size);
@@ -95,11 +100,11 @@
 		return (struct rom_header *) (PCI_VGA_RAM_IMAGE_START);
 #endif
 	} else {
-		printk_spew("%s, copying non-VGA ROM Image from %x to %x, %x bytes\n",
-			    __func__, rom_header, pci_ram_image_start, rom_size);
+		printk_debug("copying non-VGA ROM Image from %x to %x, %x bytes\n",
+			    rom_header, pci_ram_image_start, rom_size);
 		memcpy(pci_ram_image_start, rom_header, rom_size);
 		pci_ram_image_start += rom_size;
-		return (struct rom_header *) pci_ram_image_start;
+		return (struct rom_header *) (pci_ram_image_start-rom_size);
 	}
 	/* disable expansion ROM address decoding */
 	pci_write_config32(dev, PCI_ROM_ADDRESS, rom_address & ~PCI_ROM_ADDRESS_ENABLE);
diff --git a/src/include/cpu/amd/dualcore.h b/src/include/cpu/amd/dualcore.h
index a38565b..fb53c92 100644
--- a/src/include/cpu/amd/dualcore.h
+++ b/src/include/cpu/amd/dualcore.h
@@ -1,14 +1,24 @@
 #ifndef CPU_AMD_DUALCORE_H
 #define CPU_AMD_DUALCORE_H
 
-struct device;
+#if defined(__GNUC__) 
+unsigned int read_nb_cfg_54(void);
+#endif
 
 struct node_core_id {
         unsigned nodeid;
         unsigned coreid;
 };
 
-void amd_sibling_init(struct device *cpu, struct node_core_id id);
-struct node_core_id get_node_core_id(void);
+#if defined(__GNUC__)
+// it can be used to get unitid and coreid it running only
+struct node_core_id get_node_core_id(unsigned int nb_cfg_54);
+#endif
+
+#ifndef __ROMCC__
+struct device;
+unsigned get_apicid_base(unsigned ioapic_num);
+void amd_sibling_init(struct device *cpu);
+#endif
 
 #endif /* CPU_AMD_DUALCORE_H */
diff --git a/src/include/cpu/x86/lapic.h b/src/include/cpu/x86/lapic.h
index 12ca518..5d696dc 100644
--- a/src/include/cpu/x86/lapic.h
+++ b/src/include/cpu/x86/lapic.h
@@ -10,17 +10,17 @@
 #  define NEED_LAPIC 1
 #endif
 
-static inline unsigned long lapic_read(unsigned long reg)
+static inline __attribute__((always_inline)) unsigned long lapic_read(unsigned long reg)
 {
 	return *((volatile unsigned long *)(LAPIC_DEFAULT_BASE+reg));
 }
 
-static inline void lapic_write(unsigned long reg, unsigned long v)
+static inline __attribute__((always_inline)) void lapic_write(unsigned long reg, unsigned long v)
 {
 	*((volatile unsigned long *)(LAPIC_DEFAULT_BASE+reg)) = v;
 }
 
-static inline void lapic_wait_icr_idle(void)
+static inline __attribute__((always_inline)) void lapic_wait_icr_idle(void)
 {
 	do { } while ( lapic_read( LAPIC_ICR ) & LAPIC_ICR_BUSY );
 }
@@ -46,13 +46,14 @@
 	wrmsr(LAPIC_BASE_MSR, msr);
 }
 
-static inline unsigned long lapicid(void)
+static inline __attribute__((always_inline)) unsigned long lapicid(void)
 {
 	return lapic_read(LAPIC_ID) >> 24;
 }
 
-static inline void stop_this_cpu(void)
+static inline __attribute__((always_inline)) void stop_this_cpu(void)
 {
+
 	unsigned apicid;
 	apicid = lapicid();
 
diff --git a/src/include/device/hypertransport.h b/src/include/device/hypertransport.h
index f04d0ec..8eba981 100644
--- a/src/include/device/hypertransport.h
+++ b/src/include/device/hypertransport.h
@@ -4,7 +4,7 @@
 #include <device/hypertransport_def.h>
 
 unsigned int hypertransport_scan_chain(struct bus *bus, 
-	unsigned min_devfn, unsigned max_devfn, unsigned int max);
+	unsigned min_devfn, unsigned max_devfn, unsigned int max, unsigned offset_unitid);
 unsigned int ht_scan_bridge(struct device *dev, unsigned int max);
 extern struct device_operations default_ht_ops_bus;
 
diff --git a/src/include/device/pci_ids.h b/src/include/device/pci_ids.h
index 373b73c..d219b96 100644
--- a/src/include/device/pci_ids.h
+++ b/src/include/device/pci_ids.h
@@ -438,6 +438,9 @@
 #define PCI_DEVICE_ID_AMD_8131_PCIX	0x7450
 #define PCI_DEVICE_ID_AMD_8131_IOAPIC   0x7451
 
+#define PCI_DEVICE_ID_AMD_8132_PCIX     0x7458
+#define PCI_DEVICE_ID_AMD_8132_IOAPIC   0x7459
+
 #define PCI_VENDOR_ID_TRIDENT		0x1023
 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX	0x2000
 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX	0x2001
@@ -910,6 +913,20 @@
 #define PCI_DEVICE_ID_CERN_HIPPI_DST	0x0021
 #define PCI_DEVICE_ID_CERN_HIPPI_SRC	0x0022
 
+#define PCI_DEVICE_ID_NVIDIA_CK8S_HT           0x00e1
+#define PCI_DEVICE_ID_NVIDIA_CK8S_LPC          0x00e0
+#define PCI_DEVICE_ID_NVIDIA_CK8S_SM           0x00e4
+#define PCI_DEVICE_ID_NVIDIA_CK8S_USB          0x00e7
+#define PCI_DEVICE_ID_NVIDIA_CK8S_USB2         0x00e8
+#define PCI_DEVICE_ID_NVIDIA_CK8S_NIC          0x00e6
+#define PCI_DEVICE_ID_NVIDIA_CK8S_ACI          0x00ea
+#define PCI_DEVICE_ID_NVIDIA_CK8S_MCI          0x00e9
+#define PCI_DEVICE_ID_NVIDIA_CK8S_IDE          0x00e5
+#define PCI_DEVICE_ID_NVIDIA_CK8S_SATA0        0x00ee
+#define PCI_DEVICE_ID_NVIDIA_CK8S_SATA1        0x00e3
+#define PCI_DEVICE_ID_NVIDIA_CK8S_PCI          0x00ed
+#define PCI_DEVICE_ID_NVIDIA_CK8S_PCI_AGP      0x00e2
+
 #define PCI_VENDOR_ID_NVIDIA			0x10de
 #define PCI_DEVICE_ID_NVIDIA_TNT		0x0020
 #define PCI_DEVICE_ID_NVIDIA_TNT2		0x0028
@@ -1060,8 +1077,8 @@
 #define PCI_DEVICE_ID_VIA_8233C_0	0x3109
 #define PCI_DEVICE_ID_VIA_8361		0x3112
 #define PCI_DEVICE_ID_VIA_8233A		0x3147
-#define PCI_DEVICE_ID_VIA_CLE266_VGA	0x3122
-#define PCI_DEVICE_ID_VIA_8623		0x3123
+#define PCI_DEVICE_ID_VIA_CLE266_VGA  	0x3122
+#define PCI_DEVICE_ID_VIA_8623          0x3123
 #define PCI_DEVICE_ID_VIA_86C100A	0x6100
 #define PCI_DEVICE_ID_VIA_8231		0x8231
 #define PCI_DEVICE_ID_VIA_8231_4	0x8235
@@ -1207,6 +1224,21 @@
 #define PCI_DEVICE_ID_SERVERWORKS_GCLE2   0x0227
 #define PCI_DEVICE_ID_SERVERWORKS_CSB5ISA 0x0230
 
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5780_PXB  0x0130
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5780_PCIE 0x0132
+#define PCI_DEVICE_ID_BROADCOM_BCM5780_NIC 0x1668
+#define PCI_DEVICE_ID_BROADCOM_BCM5780_NIC1 0x1669
+
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_HT_PXB 0x0036
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_PXBX   0x0104
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_SATA         0x024a
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_SB_PCI_MAIN 0x0205
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_IDE 0x0214
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_LPC 0x0234
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_WDT 0x0238
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_XIOAPIC 0x0235
+#define PCI_DEVICE_ID_SERVERWORKS_BCM5785_USB 0x0223
+
 #define PCI_VENDOR_ID_SBE		0x1176
 #define PCI_DEVICE_ID_SBE_WANXL100	0x0301
 #define PCI_DEVICE_ID_SBE_WANXL200	0x0302
@@ -1763,7 +1795,6 @@
 #define PCI_DEVICE_ID_INTEL_82801CA_1F3 0x2483
 #define PCI_DEVICE_ID_INTEL_82801CA_1D1 0x2484
 #define PCI_DEVICE_ID_INTEL_82801CA_1F5 0x2485
-#define PCI_DEVICE_ID_INTEL_82801CA_1F6 0x2486
 #define PCI_DEVICE_ID_INTEL_82801CA_1D2 0x2487
 #define PCI_DEVICE_ID_INTEL_82870_1E0 0x1461
 #define PCI_DEVICE_ID_INTEL_82870_1F0 0x1460
@@ -1793,17 +1824,15 @@
 #define PCI_DEVICE_ID_INTEL_82801E_11	0x245b
 #define PCI_DEVICE_ID_INTEL_82801E_13	0x245d
 #define PCI_DEVICE_ID_INTEL_82801E_14	0x245e
-#define PCI_DEVICE_ID_INTEL_82801CA_LAN  0x2449
-#define PCI_DEVICE_ID_INTEL_82801CA_PCI  0x244e		// Same as 82801ER
-#define PCI_DEVICE_ID_INTEL_82801CA_LPC	0x2480
-#define PCI_DEVICE_ID_INTEL_82801CA_USB	0x2482
-#define PCI_DEVICE_ID_INTEL_82801CA_SMB 0x2483
-#define PCI_DEVICE_ID_INTEL_82801CA_USB2 0x2484
-#define PCI_DEVICE_ID_INTEL_82801CA_AC97_AUDIO	0x2485
-#define PCI_DEVICE_ID_INTEL_82801CA_AC97_MODEM	0x2486
-#define PCI_DEVICE_ID_INTEL_82801CA_USB3 0x2487
+#define PCI_DEVICE_ID_INTEL_82801CA_0	0x2480
+#define PCI_DEVICE_ID_INTEL_82801CA_2	0x2482
+#define PCI_DEVICE_ID_INTEL_82801CA_3	0x2483
+#define PCI_DEVICE_ID_INTEL_82801CA_4	0x2484
+#define PCI_DEVICE_ID_INTEL_82801CA_5	0x2485
+#define PCI_DEVICE_ID_INTEL_82801CA_6	0x2486
+#define PCI_DEVICE_ID_INTEL_82801CA_7	0x2487
 #define PCI_DEVICE_ID_INTEL_82801CA_10	0x248a
-#define PCI_DEVICE_ID_INTEL_82801CA_IDE	0x248b
+#define PCI_DEVICE_ID_INTEL_82801CA_11	0x248b
 #define PCI_DEVICE_ID_INTEL_82801CA_12	0x248c
 #define PCI_DEVICE_ID_INTEL_82801DB_0	0x24c0
 #define PCI_DEVICE_ID_INTEL_82801DB_2	0x24c2
diff --git a/src/northbridge/amd/amdk8/amdk8.h b/src/northbridge/amd/amdk8/amdk8.h
index 89c03fc..7e064af 100644
--- a/src/northbridge/amd/amdk8/amdk8.h
+++ b/src/northbridge/amd/amdk8/amdk8.h
@@ -1,3 +1,7 @@
+#ifndef AMDK8_H
+
+#define AMDK8_H
+
 /* Definitions of various K8 registers */
 /* Function 0 */
 #define HT_TRANSACTION_CONTROL 0x68
@@ -226,3 +230,5 @@
 #define InitComplete      (1 << 1)
 #define NonCoherent       (1 << 2)
 #define ConnectionPending (1 << 4)
+
+#endif
diff --git a/src/northbridge/amd/amdk8/coherent_ht.c b/src/northbridge/amd/amdk8/coherent_ht.c
index 36d2dcd..7d43f3c 100644
--- a/src/northbridge/amd/amdk8/coherent_ht.c
+++ b/src/northbridge/amd/amdk8/coherent_ht.c
@@ -1812,9 +1812,9 @@
 
 #if CONFIG_MAX_PHYSICAL_CPUS > 1
 	result = setup_smp();
-#endif
 	result.nodes = verify_mp_capabilities(result.nodes);
 	clear_dead_routes(result.nodes);
+#endif
 	if (result.nodes == 1) {
 		setup_uniprocessor();
 	}
diff --git a/src/northbridge/amd/amdk8/debug.c b/src/northbridge/amd/amdk8/debug.c
index d0841e8..ca45cbe 100644
--- a/src/northbridge/amd/amdk8/debug.c
+++ b/src/northbridge/amd/amdk8/debug.c
@@ -2,6 +2,18 @@
  * generic K8 debug code, used by mainboard specific auto.c
  *
  */
+
+static inline void print_debug_addr(const char *str, void *val)
+{
+#if CACHE_AS_RAM_ADDRESS_DEBUG == 1
+        #if CONFIG_USE_INIT==1
+                printk_debug("------Address debug: %s%x------\r\n", str, val);
+        #else
+		print_debug ("------Address debug: "); print_debug(str); print_debug_hex32(val); print_debug("------\r\n");
+        #endif
+#endif
+}
+
 #if 1
 static void print_debug_pci_dev(unsigned dev)
 {
@@ -34,6 +46,7 @@
 #if CONFIG_USE_INIT
 		printk_debug(" %04x:%04x\r\n", (id & 0xffff), (id>>16));
 #else
+		print_debug(" ");
 		print_debug_hex32(id);
 		print_debug("\r\n");
 #endif
diff --git a/src/northbridge/amd/amdk8/early_ht.c b/src/northbridge/amd/amdk8/early_ht.c
index 2711657..5134494 100644
--- a/src/northbridge/amd/amdk8/early_ht.c
+++ b/src/northbridge/amd/amdk8/early_ht.c
@@ -1,14 +1,26 @@
-static int enumerate_ht_chain(void)
+/*  
+	2005.11 yhlu add let the real sb to use small unitid
+*/
+// only for sb ht chain
+static void enumerate_ht_chain(void)
 {
+#if HT_CHAIN_UNITID_BASE != 0 
+/* HT_CHAIN_UNITID_BASE could be 0 (only one ht device in the ht chain), if so, don't need to go through the chain  */ 
+
 	/* Assumption the HT chain that is bus 0 has the HT I/O Hub on it.
 	 * On most boards this just happens.  If a cpu has multiple
 	 * non Coherent links the appropriate bus registers for the
 	 * links needs to be programed to point at bus 0.
 	 */
 	unsigned next_unitid, last_unitid;
-	int reset_needed = 0;
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+	//let't record the device of last ht device, So we can set the Unitid to HT_CHAIN_END_UNITID_BASE
+	unsigned real_last_unitid;
+	uint8_t real_last_pos;
+	int ht_dev_num = 0; // except host_bridge
+#endif
 
-	next_unitid = 1;
+	next_unitid = HT_CHAIN_UNITID_BASE;
 	do {
 		uint32_t id;
 		uint8_t hdr_type, pos;
@@ -58,6 +70,11 @@
 					flags &= ~0x1f;
 					flags |= next_unitid & 0x1f;
 					count = (flags >> 5) & 0x1f;
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+					real_last_unitid = next_unitid;
+					real_last_pos = pos;
+					ht_dev_num++ ;
+#endif
 					next_unitid += count;
 
 					/* Test for end of chain */
@@ -78,7 +95,17 @@
 			pos = pci_read_config8(PCI_DEV(0, 0, 0), pos + PCI_CAP_LIST_NEXT);
 		}
 	} while((last_unitid != next_unitid) && (next_unitid <= 0x1f));
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+	if(ht_dev_num>0) {
+		uint16_t flags;
+		flags = pci_read_config16(PCI_DEV(0,real_last_unitid,0), real_last_pos + PCI_CAP_FLAGS); 
+                flags &= ~0x1f;
+                flags |= HT_CHAIN_END_UNITID_BASE & 0x1f;
+		pci_write_config16(PCI_DEV(0, real_last_unitid, 0), real_last_pos + PCI_CAP_FLAGS, flags);
+	}
+#endif
 
-	return reset_needed;
+#endif
+
 }
 
diff --git a/src/northbridge/amd/amdk8/incoherent_ht.c b/src/northbridge/amd/amdk8/incoherent_ht.c
index bdb2676..02a0c2d 100644
--- a/src/northbridge/amd/amdk8/incoherent_ht.c
+++ b/src/northbridge/amd/amdk8/incoherent_ht.c
@@ -1,17 +1,12 @@
 /*
  	This should be done by Eric
 	2004.12 yhlu add multi ht chain dynamically support
-
+	2005.11 yhlu add let real sb to use small unitid
 */
 #include <device/pci_def.h>
 #include <device/pci_ids.h>
 #include <device/hypertransport_def.h>
 
-
-/* We can reduce the size of code generated by romcc by
- * changing all of the fixed size types that live in registers
- * into simple unsigned variables. (ie s/uint8_t/unsigned/g)
- */
 #ifndef K8_HT_FREQ_1G_SUPPORT
         #define K8_HT_FREQ_1G_SUPPORT 0
 #endif
@@ -20,15 +15,22 @@
 	#define K8_SCAN_PCI_BUS 0
 #endif
 
+#ifndef K8_ALLOCATE_IO_RANGE
+        #define K8_ALLOCATE_IO_RANGE 0
+#endif
+
+// Do we need allocate MMIO? Current We direct last 64M to sblink only, We can not lose access to last 4M range to ROM 
+#ifndef K8_ALLOCATE_MMIO_RANGE
+        #define K8_ALLOCATE_MMIO_RANGE 0
+#endif
+
 static inline void print_linkn_in (const char *strval, uint8_t byteval)
 {
-#if 1
 #if CONFIG_USE_INIT
         printk_debug("%s%02x\r\n", strval, byteval); 
 #else
         print_debug(strval); print_debug_hex8(byteval); print_debug("\r\n");
 #endif
-#endif
 }
 
 static uint8_t ht_lookup_capability(device_t dev, uint16_t val)
@@ -74,18 +76,27 @@
         return ht_lookup_capability(dev, 1); // Host/Secondary Interface Block Format
 }
 
-static void ht_collapse_previous_enumeration(uint8_t bus)
+static void ht_collapse_previous_enumeration(uint8_t bus, unsigned offset_unitid)
 {
 	device_t dev;
 	uint32_t id;
 
+	//actually, only for one HT device HT chain, and unitid is 0
+#if HT_CHAIN_UNITID_BASE == 0
+	if(offset_unitid) {
+		return;
+	}
+#endif
+
 	/* Check if is already collapsed */
-	dev = PCI_DEV(bus, 0, 0);
-        id = pci_read_config32(dev, PCI_VENDOR_ID);
-        if ( ! ( (id == 0xffffffff) || (id == 0x00000000) ||
-            (id == 0x0000ffff) || (id == 0xffff0000) ) ) {
-                     return;
-        }
+	if((!offset_unitid) || (offset_unitid && (!((HT_CHAIN_END_UNITID_BASE == 0) && (HT_CHAIN_END_UNITID_BASE <HT_CHAIN_UNITID_BASE))))) {
+		dev = PCI_DEV(bus, 0, 0);
+        	id = pci_read_config32(dev, PCI_VENDOR_ID);
+	        if ( ! ( (id == 0xffffffff) || (id == 0x00000000) ||
+        	    (id == 0x0000ffff) || (id == 0xffff0000) ) ) {
+	                     return;
+        	}
+	} 
 
 	/* Spin through the devices and collapse any previous
 	 * hypertransport enumeration.
@@ -136,21 +147,24 @@
 	/* AMD 8131 Errata 48 */
 	if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8131_PCIX << 16))) {
 		freq_cap &= ~(1 << HT_FREQ_800Mhz);
-	} 
+		return freq_cap;
+	}
 
 	/* AMD 8151 Errata 23 */
 	if (id == (PCI_VENDOR_ID_AMD | (PCI_DEVICE_ID_AMD_8151_SYSCTRL << 16))) {
 		freq_cap &= ~(1 << HT_FREQ_800Mhz);
+		return freq_cap;
 	} 
 	
 	/* AMD K8 Unsupported 1Ghz? */
 	if (id == (PCI_VENDOR_ID_AMD | (0x1100 << 16))) {
-#if K8_HT_FREQ_1G_SUPPORT == 1
-	    	/* Supported starting with E0 */
-		device_t dev_2 = PCI_DEV(0,0x18,2);
-              	if(pci_read_config32(dev_2,0x9c) < 0x20f00) 
-#endif
+	#if K8_HT_FREQ_1G_SUPPORT == 1 
+	        if (is_cpu_pre_e0()) {  // only E0 later support 1GHz
 			freq_cap &= ~(1 << HT_FREQ_1000Mhz);
+		}
+	#else	
+                freq_cap &= ~(1 << HT_FREQ_1000Mhz);
+	#endif
 	}
 
 	return freq_cap;
@@ -236,6 +250,7 @@
 
 	/* See if I am changing dev1's width */
 	old_width = pci_read_config8(dev1, pos1 + LINK_WIDTH(offs1) + 1);
+	old_width &= 0x77;
 	needs_reset |= old_width != width;
 
 	/* Set dev1's widths */
@@ -246,6 +261,7 @@
 
 	/* See if I am changing dev2's width */
 	old_width = pci_read_config8(dev2, pos2 + LINK_WIDTH(offs2) + 1);
+	old_width &= 0x77;
 	needs_reset |= old_width != width;
 
 	/* Set dev2's widths */
@@ -254,8 +270,14 @@
 	return needs_reset;
 }
 #if (USE_DCACHE_RAM == 1) && (K8_SCAN_PCI_BUS == 1)
-static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus);
-static int scan_pci_bus( unsigned bus) 
+
+#if RAMINIT_SYSINFO == 1
+static void ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned offset_unitid, struct sys_info *sysinfo);
+static int scan_pci_bus( unsigned bus , struct sys_info *sysinfo) 
+#else
+static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned offset_unitid);
+static int scan_pci_bus( unsigned bus)
+#endif
 {
         /*      
                 here we already can access PCI_DEV(bus, 0, 0) to PCI_DEV(bus, 0x1f, 0x7)
@@ -324,24 +346,30 @@
 			                ((unsigned int) max_bus << 16));
 			        pci_write_config32(dev, PCI_PRIMARY_BUS, buses);
 				
-				{
 				/* here we need to figure out if dev is a ht bridge
 					if it is ht bridge, we need to call ht_setup_chainx at first
 				   Not verified --- yhlu
 				*/
-					uint8_t upos;
-			                upos = ht_lookup_host_capability(dev); // one func one ht sub
-			                if (upos) { // sub ht chain
-						uint8_t busn;
-						busn = (new_bus & 0xff);
-				                /* Make certain the HT bus is not enumerated */
-				                ht_collapse_previous_enumeration(busn);
-						/* scan the ht chain */
-				                new_bus |= (ht_setup_chainx(dev,upos,busn)<<16); // store reset_needed to upword
-			                }
-				}
+				uint8_t upos;
+		                upos = ht_lookup_host_capability(dev); // one func one ht sub
+		                if (upos) { // sub ht chain
+					uint8_t busn;
+					busn = (new_bus & 0xff);
+			                /* Make certain the HT bus is not enumerated */
+			                ht_collapse_previous_enumeration(busn, 0);
+					/* scan the ht chain */
+					#if RAMINIT_SYSINFO == 1
+			                ht_setup_chainx(dev,upos,busn, 0, sysinfo); // don't need offset unitid
+					#else
+					new_bus |= (ht_setup_chainx(dev, upos, busn, 0)<<16); // store reset_needed to upword
+					#endif
+		                }
 				
+				#if RAMINIT_SYSINFO == 1				
+				new_bus = scan_pci_bus(new_bus, sysinfo);
+				#else
 				new_bus = scan_pci_bus(new_bus);
+				#endif
 				/* set real max bus num in that */
 
 			        buses = (buses & 0xff00ffff) |
@@ -370,14 +398,31 @@
 	return new_bus; 
 }
 #endif
-static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus)
+
+#if RAMINIT_SYSINFO == 1
+static void ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned offset_unitid, struct sys_info *sysinfo)
+#else
+static int ht_setup_chainx(device_t udev, uint8_t upos, uint8_t bus, unsigned offset_unitid)
+#endif
 {
+	//even HT_CHAIN_UNITID_BASE == 0, we still can go through this function, because of end_of_chain check, also We need it to optimize link
+
 	uint8_t next_unitid, last_unitid;
 	unsigned uoffs;
-	int reset_needed=0;
+
+#if RAMINIT_SYSINFO == 0
+	int reset_needed = 0;
+#endif
+
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+        //let't record the device of last ht device, So we can set the Unitid to HT_CHAIN_END_UNITID_BASE
+        unsigned real_last_unitid;
+        uint8_t real_last_pos;
+	int ht_dev_num = 0;
+#endif
 
 	uoffs = PCI_HT_HOST_OFFS;
-	next_unitid = 1;
+	next_unitid = (offset_unitid) ? HT_CHAIN_UNITID_BASE:1;
 
 	do {
 		uint32_t id;
@@ -391,11 +436,24 @@
 			ctrl = pci_read_config16(udev, upos + LINK_CTRL(uoffs));
 			/* Is this the end of the hypertransport chain? */
 			if (ctrl & (1 << 6)) {
-				break;
+				goto end_of_chain;	
 			}
-			/* Has the link failed */
-			if (ctrl & (1 << 4)) {
-				break;
+
+			if (ctrl & ((1 << 4) | (1 << 8))) {
+                               /*
+				* Either the link has failed, or we have
+                                * a CRC error.
+                                * Sometimes this can happen due to link
+                                * retrain, so lets knock it down and see
+                                * if its transient
+                                */
+				ctrl |= ((1 << 6) | (1 <<8)); // Link fail + Crc
+                                pci_write_config16(udev, upos + LINK_CTRL(uoffs), ctrl);
+                                ctrl = pci_read_config16(udev, upos + LINK_CTRL(uoffs));
+                                if (ctrl & ((1 << 4) | (1 << 8))) {
+                                	print_err("Detected error on Hypertransport Link\n");
+					break;
+                                }
 			}
 		} while((ctrl & (1 << 5)) == 0);
 	
@@ -413,7 +471,10 @@
 
 		pos = ht_lookup_slave_capability(dev);
 		if (!pos) {
-			print_err("HT link capability not found\r\n");
+                        print_err("udev="); print_err_hex32(udev);
+                        print_err("\tupos="); print_err_hex32(upos);
+                        print_err("\tuoffs="); print_err_hex32(uoffs);
+			print_err("\tHT link capability not found\r\n");
 			break;
 		}
 
@@ -441,6 +502,14 @@
 
                 /* Compute the number of unitids consumed */
                 count = (flags >> 5) & 0x1f;
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+		if(offset_unitid) {
+	                real_last_unitid = next_unitid;
+        	        real_last_pos = pos;
+			ht_dev_num++;
+		}
+#endif
+
                 next_unitid += count;
 
 		/* Find which side of the ht link we are on,
@@ -449,9 +518,22 @@
 		 */
 		flags = pci_read_config16(dev, pos + PCI_CAP_FLAGS);
                 offs = ((flags>>10) & 1) ? PCI_HT_SLAVE1_OFFS : PCI_HT_SLAVE0_OFFS;
-                
-                /* Setup the Hypertransport link */
-                reset_needed |= ht_optimize_link(udev, upos, uoffs, dev, pos, offs);
+               
+		#if RAMINIT_SYSINFO == 1
+                /* store the link pair here and we will Setup the Hypertransport link later, after we get final FID/VID */
+		{
+			struct link_pair_st *link_pair = &sysinfo->link_pair[sysinfo->link_pair_num];
+			link_pair->udev = udev;
+			link_pair->upos = upos;
+			link_pair->uoffs = uoffs;
+			link_pair->dev = dev;
+			link_pair->pos = pos;
+			link_pair->offs = offs;
+			sysinfo->link_pair_num++;
+		}
+		#else
+		reset_needed |= ht_optimize_link(udev, upos, uoffs, dev, pos, offs);
+		#endif
 
 #if CK804_DEVN_BASE==0
 		if(id == 0x005e10de) {
@@ -466,11 +548,52 @@
 
 	} while((last_unitid != next_unitid) && (next_unitid <= 0x1f));
 
+end_of_chain: ;
+	
+#if HT_CHAIN_END_UNITID_BASE < HT_CHAIN_UNITID_BASE
+        if(offset_unitid && (ht_dev_num>0)  ) {
+                uint16_t flags;
+		int i;
+                flags = pci_read_config16(PCI_DEV(bus,real_last_unitid,0), real_last_pos + PCI_CAP_FLAGS);
+                flags &= ~0x1f;
+                flags |= HT_CHAIN_END_UNITID_BASE & 0x1f;
+                pci_write_config16(PCI_DEV(bus, real_last_unitid, 0), real_last_pos + PCI_CAP_FLAGS, flags);
+
+                #if RAMINIT_SYSINFO == 1
+		// Here need to change the dev in the array
+		for(i=0;i<sysinfo->link_pair_num;i++)
+                {
+                        struct link_pair_st *link_pair = &sysinfo->link_pair[i];
+                        if(link_pair->udev == PCI_DEV(bus, real_last_unitid, 0)) {
+				link_pair->udev = PCI_DEV(bus, HT_CHAIN_END_UNITID_BASE, 0);
+				continue;
+			}
+                        if(link_pair->dev == PCI_DEV(bus, real_last_unitid, 0)) {
+                                link_pair->dev = PCI_DEV(bus, HT_CHAIN_END_UNITID_BASE, 0);
+                        }
+                }
+		#endif
+
+        }
+#endif
+
+#if RAMINIT_SYSINFO == 0
 	return reset_needed;
+#endif
+
 }
 
+#if RAMINIT_SYSINFO == 1
+static void ht_setup_chain(device_t udev, unsigned upos, struct sys_info *sysinfo)
+#else
 static int ht_setup_chain(device_t udev, unsigned upos)
+#endif
 {
+	unsigned offset_unitid = 0;
+#if HT_CHAIN_UNITID_BASE != 1
+        offset_unitid = 1;
+#endif
+
         /* Assumption the HT chain that is bus 0 has the HT I/O Hub on it.
          * On most boards this just happens.  If a cpu has multiple
          * non Coherent links the appropriate bus registers for the
@@ -478,9 +601,17 @@
          */
 
         /* Make certain the HT bus is not enumerated */
-        ht_collapse_previous_enumeration(0);
+        ht_collapse_previous_enumeration(0, 0);
 
-        return ht_setup_chainx(udev, upos, 0);
+#if HT_CHAIN_UNITID_BASE != 1
+        offset_unitid = 1;
+#endif
+
+#if RAMINIT_SYSINFO == 1
+	ht_setup_chainx(udev, upos, 0, offset_unitid, sysinfo);
+#else
+        return ht_setup_chainx(udev, upos, 0, offset_unitid);
+#endif
 }
 static int optimize_link_read_pointer(uint8_t node, uint8_t linkn, uint8_t linkt, uint8_t val)
 {
@@ -506,7 +637,7 @@
 	return 0;
 }
 
-static int optimize_link_in_coherent(uint8_t ht_c_num)
+static int optimize_link_read_pointers_chain(uint8_t ht_c_num)
 {
 	int reset_needed; 
 	uint8_t i;
@@ -541,20 +672,28 @@
 	return reset_needed;
 }
 
+#if RAMINIT_SYSINFO == 1
+static void ht_setup_chains(uint8_t ht_c_num, struct sys_info *sysinfo)
+#else
 static int ht_setup_chains(uint8_t ht_c_num)
+#endif
 {
 	/* Assumption the HT chain that is bus 0 has the HT I/O Hub on it. 
 	 * On most boards this just happens.  If a cpu has multiple
 	 * non Coherent links the appropriate bus registers for the
 	 * links needs to be programed to point at bus 0.
 	 */
-	int reset_needed; 
         uint8_t upos;
         device_t udev;
 	uint8_t i;
 
-	reset_needed = 0;
+#if RAMINIT_SYSINFO == 0
+	int reset_needed = 0;
+#else
+	sysinfo->link_pair_num = 0;
+#endif
 
+	// first one is SB Chain
 	for (i = 0; i < ht_c_num; i++) {
 		uint32_t reg;
 		uint8_t devpos;
@@ -564,6 +703,7 @@
 		#if (USE_DCACHE_RAM == 1) && (K8_SCAN_PCI_BUS == 1)
 		unsigned bus;
 		#endif
+		unsigned offset_unitid = 0;
 		
 		reg = pci_read_config32(PCI_DEV(0,0x18,1), 0xe0 + i * 4);
 
@@ -576,32 +716,57 @@
 		dword &= ~(0xffff<<8);
 		dword |= (reg & 0xffff0000)>>8;
 		pci_write_config32( PCI_DEV(0, devpos,0), regpos , dword);
-		
+	
+
+        #if HT_CHAIN_UNITID_BASE != 1
+                #if SB_HT_CHAIN_UNITID_OFFSET_ONLY == 1
+                if(i==0) // to check if it is sb ht chain
+                #endif
+                        offset_unitid = 1;
+        #endif
+	
 	        /* Make certain the HT bus is not enumerated */
-        	ht_collapse_previous_enumeration(busn);
+        	ht_collapse_previous_enumeration(busn, offset_unitid);
 
 		upos = ((reg & 0xf00)>>8) * 0x20 + 0x80;
 		udev =  PCI_DEV(0, devpos, 0);
-		
-		reset_needed |= ht_setup_chainx(udev,upos,busn);
+
+#if RAMINIT_SYSINFO == 1
+		ht_setup_chainx(udev,upos,busn, offset_unitid, sysinfo); // all not
+#else
+		reset_needed |= ht_setup_chainx(udev,upos,busn, offset_unitid); //all not
+#endif
 
 		#if (USE_DCACHE_RAM == 1) && (K8_SCAN_PCI_BUS == 1)
 	        /* You can use use this in romcc, because there is function call in romcc, recursive will kill you */
 		bus = busn; // we need 32 bit 
-        	reset_needed |= (scan_pci_bus(bus)>>16); // take out reset_needed that stored in upword
+#if RAMINIT_SYSINFO == 1
+        	scan_pci_bus(bus, sysinfo);
+#else
+		reset_needed |= (scan_pci_bus(bus)>>16); // take out reset_needed that stored in upword
+#endif
 		#endif
 	}
 
-	reset_needed |= optimize_link_in_coherent(ht_c_num);		
-	
-	return reset_needed;
-}
+#if RAMINIT_SYSINFO == 0
+	reset_needed |= optimize_link_read_pointers_chain(ht_c_num);
 
-#ifndef K8_ALLOCATE_IO_RANGE 
-	#define K8_ALLOCATE_IO_RANGE 0
+	return reset_needed;
 #endif
 
+}
+
+static inline unsigned get_nodes(void)
+{
+        return ((pci_read_config32(PCI_DEV(0, 0x18, 0), 0x60)>>4) & 7) + 1;
+}
+
+
+#if RAMINIT_SYSINFO == 1
+static void ht_setup_chains_x(struct sys_info *sysinfo)
+#else
 static int ht_setup_chains_x(void)
+#endif
 {               
         uint8_t nodeid;
         uint32_t reg; 
@@ -612,11 +777,18 @@
 #if K8_ALLOCATE_IO_RANGE == 1	
 	unsigned next_io_base;
 #endif
-      
+
+	nodes = get_nodes();     
+ 
         /* read PCI_DEV(0,0x18,0) 0x64 bit [8:9] to find out SbLink m */
         reg = pci_read_config32(PCI_DEV(0, 0x18, 0), 0x64);
         /* update PCI_DEV(0, 0x18, 1) 0xe0 to 0x05000m03, and next_busn=0x3f+1 */
 	print_linkn_in("SBLink=", ((reg>>8) & 3) );
+#if RAMINIT_SYSINFO == 1
+	sysinfo->sblnk = (reg>>8) & 3;
+	sysinfo->sbbusn = 0;
+	sysinfo->nodes = nodes;
+#endif
         tempreg = 3 | ( 0<<4) | (((reg>>8) & 3)<<8) | (0<<16)| (0x3f<<24);
         pci_write_config32(PCI_DEV(0, 0x18, 1), 0xe0, tempreg);
 
@@ -639,8 +811,6 @@
 		pci_write_config32(PCI_DEV(0, 0x18, 1), 0xc0 + ht_c_num * 8, 0);
         }
  
-	nodes = ((pci_read_config32(PCI_DEV(0, 0x18, 0), 0x60)>>4) & 7) + 1;
-
         for(nodeid=0; nodeid<nodes; nodeid++) {
                 device_t dev; 
                 uint8_t linkn;
@@ -671,7 +841,7 @@
 			/* io range allocation */
 		        tempreg = nodeid | (linkn<<4) |  ((next_io_base+0x3)<<12); //limit
 		        pci_write_config32(PCI_DEV(0, 0x18, 1), 0xC4 + ht_c_num * 8, tempreg);
-		        tempreg = 3 | ( 3<<4) | (next_io_base<<12);        //base
+		        tempreg = 3 /*| ( 3<<4)*/ | (next_io_base<<12);        //base :ISA and VGA ?
 		        pci_write_config32(PCI_DEV(0, 0x18, 1), 0xC0 + ht_c_num * 8, tempreg);
 		        next_io_base += 0x3+0x1;
 #endif
@@ -717,6 +887,64 @@
 		}
         }
 
-        return ht_setup_chains(i);
+#if RAMINIT_SYSINFO == 1
+	sysinfo->ht_c_num = i;
+        ht_setup_chains(i, sysinfo);
+#else
+	return ht_setup_chains(i);
+#endif
 
 }
+
+#if RAMINIT_SYSINFO == 1
+static int optimize_link_incoherent_ht(struct sys_info *sysinfo)
+{
+	// We need to use recorded link pair info to optimize the link
+	int i;
+	int reset_needed = 0;
+	
+	unsigned link_pair_num = sysinfo->link_pair_num;
+
+	for(i=0; i< link_pair_num; i++) {	
+		struct link_pair_st *link_pair= &sysinfo->link_pair[i];
+		reset_needed |= ht_optimize_link(link_pair->udev, link_pair->upos, link_pair->uoffs, link_pair->dev, link_pair->pos, link_pair->offs);
+	}
+
+	reset_needed |= optimize_link_read_pointers(sysinfo->ht_c_num);
+
+	return reset_needed;
+
+}
+#endif
+
+
+static unsigned get_sblnk(void)
+{
+        uint32_t reg;
+        /* read PCI_DEV(0,0x18,0) 0x64 bit [8:9] to find out SbLink m */
+        reg = pci_read_config32(PCI_DEV(0, 0x18, 0), 0x64);
+        return ((reg>>8) & 3) ;
+}
+
+/* Look up a which bus a given node/link combination is on.
+ * return 0 when we can't find the answer.
+ */
+static unsigned node_link_to_bus(unsigned node, unsigned link)
+{
+        unsigned reg;
+
+        for(reg = 0xE0; reg < 0xF0; reg += 0x04) {
+                unsigned config_map;
+                config_map = pci_read_config32(PCI_DEV(0, 0x18, 1), reg);
+                if ((config_map & 3) != 3) {
+                        continue;
+                }
+                if ((((config_map >> 4) & 7) == node) &&
+                        (((config_map >> 8) & 3) == link))
+                {
+                        return (config_map >> 16) & 0xff;
+                }
+        }
+        return 0;
+}
+
diff --git a/src/northbridge/amd/amdk8/misc_control.c b/src/northbridge/amd/amdk8/misc_control.c
index 4cd3d0d..c7176c3 100644
--- a/src/northbridge/amd/amdk8/misc_control.c
+++ b/src/northbridge/amd/amdk8/misc_control.c
@@ -17,7 +17,8 @@
 #include <part/hard_reset.h>
 #include <pc80/mc146818rtc.h>
 #include <bitops.h>
-#include "./cpu_rev.c"
+#include <cpu/amd/model_fxx_rev.h>
+
 #include "amdk8.h"
 
 /**
diff --git a/src/northbridge/amd/amdk8/northbridge.c b/src/northbridge/amd/amdk8/northbridge.c
index 9febac2..d2db902 100644
--- a/src/northbridge/amd/amdk8/northbridge.c
+++ b/src/northbridge/amd/amdk8/northbridge.c
@@ -2,6 +2,7 @@
 	2004.12 yhlu add dual core support
 	2005.01 yhlu add support move apic before pci_domain in MB Config.lb
 	2005.02 yhlu add e0 memory hole support
+	2005.11 yhlu add put sb ht chain on bus 0
 */
 
 #include <console/console.h>
@@ -96,16 +97,15 @@
 	return (dev->path.u.pci.devfn >> 3) - 0x18;
 }
 
-static unsigned int amdk8_scan_chains(device_t dev, unsigned int max)
+static unsigned int amdk8_scan_chain(device_t dev, unsigned nodeid, unsigned link, unsigned sblink, unsigned int max, unsigned offset_unitid)
 {
-	unsigned nodeid;
-	unsigned link;
-	nodeid = amdk8_nodeid(dev);
 #if 0
 	printk_debug("%s amdk8_scan_chains max: %d starting...\n", 
 		dev_path(dev), max);
 #endif
-	for(link = 0; link < dev->links; link++) {
+//	I want to put sb chain in bus 0 can I?
+
+	 
 		uint32_t link_type;
 		uint32_t busses, config_busses;
 		unsigned free_reg, config_reg;
@@ -114,13 +114,13 @@
 			link_type = pci_read_config32(dev, dev->link[link].cap + 0x18);
 		} while(link_type & ConnectionPending);
 		if (!(link_type & LinkConnected)) {
-			continue;
+			return max;
 		}
 		do {
 			link_type = pci_read_config32(dev, dev->link[link].cap + 0x18);
 		} while(!(link_type & InitComplete));
 		if (!(link_type & NonCoherent)) {
-			continue;
+			return max;
 		}
 		/* See if there is an available configuration space mapping
 		 * register in function 1. 
@@ -146,14 +146,21 @@
 		 * register skip this bus 
 		 */
 		if (config_reg > 0xec) {
-			continue;
+			return max;
 		}
 
 		/* Set up the primary, secondary and subordinate bus numbers.
 		 * We have no idea how many busses are behind this bridge yet,
 		 * so we set the subordinate bus number to 0xff for the moment.
 		 */
-		dev->link[link].secondary = ++max;
+#if K8_SB_HT_CHAIN_ON_BUS0 == 1
+		if((nodeid == 0) && (sblink==link)) { // actually max is 0 here
+			dev->link[link].secondary = max;
+		}
+		else 
+#endif
+			dev->link[link].secondary = ++max;
+		
 		dev->link[link].subordinate = 0xff;
 
 		/* Read the existing primary/secondary/subordinate bus
@@ -188,7 +195,7 @@
 		/* Now we can scan all of the subordinate busses i.e. the
 		 * chain on the hypertranport link 
 		 */
-		max = hypertransport_scan_chain(&dev->link[link], 0, 0xbf, max);
+		max = hypertransport_scan_chain(&dev->link[link], 0, 0xbf, max, offset_unitid);
 
 #if 0
 		printk_debug("%s Hyper transport scan link: %d new max: %d\n",
@@ -211,14 +218,57 @@
 		printk_debug("%s Hypertransport scan link: %d done\n",
 			dev_path(dev), link);
 #endif
-	}
-#if 0
-	printk_debug("%s amdk8_scan_chains max: %d done\n", 
-		dev_path(dev), max);
-#endif
+
 	return max;
 }
 
+static unsigned int amdk8_scan_chains(device_t dev, unsigned int max)
+{
+        unsigned nodeid;
+        unsigned link;
+        unsigned sblink = 0;
+	unsigned offset_unitid = 0;
+        nodeid = amdk8_nodeid(dev);
+	
+
+#if 0
+        printk_debug("%s amdk8_scan_chains max: %d starting...\n",
+                dev_path(dev), max);
+#endif
+//      I want to put sb chain in bus 0 
+
+        if(nodeid==0) {
+                sblink = (pci_read_config32(dev, 0x64)>>8) & 3;
+#if K8_SB_HT_CHAIN_ON_BUS0 == 1
+	#if HT_CHAIN_UNITID_BASE != 1
+                offset_unitid = 1;
+        #endif
+		max = amdk8_scan_chain(dev, nodeid, sblink, sblink, max, offset_unitid ); // do sb ht chain at first, in case s2885 put sb chain (8131/8111) on link2, but put 8151 on link0
+#endif
+        }
+
+        for(link = 0; link < dev->links; link++) {
+#if K8_SB_HT_CHAIN_ON_BUS0 == 1
+		if( (nodeid == 0) && (sblink == link) ) continue; //already done
+#endif
+		offset_unitid = 0;
+	        #if HT_CHAIN_UNITID_BASE != 1
+	                #if SB_HT_CHAIN_UNITID_OFFSET_ONLY == 1
+			if((nodeid == 0) && (sblink == link))
+			#endif
+				offset_unitid = 1;
+		#endif
+
+		max = amdk8_scan_chain(dev, nodeid, link, sblink, max, offset_unitid);
+        }
+#if 0
+        printk_debug("%s amdk8_scan_chains max: %d done\n",
+                dev_path(dev), max);
+#endif
+        return max;
+}
+
+
 static int reg_useable(unsigned reg, 
 	device_t goal_dev, unsigned goal_nodeid, unsigned goal_link)
 {
@@ -508,7 +558,6 @@
 
 	/* release the temp resource */
 	resource->flags = 0;
-
 }
 
 static void amdk8_set_resources(device_t dev)
@@ -1041,7 +1090,7 @@
 	for(reg = 0xe0; reg <= 0xec; reg += 4) {
 		f1_write_config32(reg, 0);
 	}
-	max = pci_scan_bus(&dev->link[0], PCI_DEVFN(0x18, 0), 0xff, max);
+	max = pci_scan_bus(&dev->link[0], PCI_DEVFN(0x18, 0), 0xff, max);  
 	
 	/* Tune the hypertransport transaction for best performance.
 	 * Including enabling relaxed ordering if it is safe.
diff --git a/src/sdram/generic_sdram.c b/src/sdram/generic_sdram.c
index da14166..7591fae 100644
--- a/src/sdram/generic_sdram.c
+++ b/src/sdram/generic_sdram.c
@@ -1,3 +1,17 @@
+
+#ifndef RAMINIT_SYSINFO
+        #define RAMINIT_SYSINFO 0
+#endif
+
+static inline void print_debug_sdram_8(const char *strval, uint32_t val)
+{
+#if CONFIG_USE_INIT
+        printk_debug("%s%02x\r\n", strval, val);
+#else
+        print_debug(strval); print_debug_hex8(val); print_debug("\r\n");
+#endif
+}
+
 void sdram_no_memory(void)
 {
 	print_err("No memory!!\r\n");
@@ -7,31 +21,34 @@
 }
 
 /* Setup SDRAM */
+#if RAMINIT_SYSINFO == 1
+void sdram_initialize(int controllers, const struct mem_controller *ctrl, void *sysinfo)
+#else
 void sdram_initialize(int controllers, const struct mem_controller *ctrl)
+#endif
 {
 	int i;
 	/* Set the registers we can set once to reasonable values */
 	for(i = 0; i < controllers; i++) {
-#if CONFIG_USE_INIT
-		printk_debug("Ram1.%02x\r\n",i);
-#else
-		print_debug("Ram1.");
-		print_debug_hex8(i);
-		print_debug("\r\n");
-#endif
+		print_debug_sdram_8("Ram1.",i);
+
+	#if RAMINIT_SYSINFO == 1
+		sdram_set_registers(ctrl + i , sysinfo);
+	#else
 		sdram_set_registers(ctrl + i);
+	#endif
 	}
 
 	/* Now setup those things we can auto detect */
 	for(i = 0; i < controllers; i++) {
-#if CONFIG_USE_INIT
-                printk_debug("Ram2.%02x\r\n",i);
-#else
-		print_debug("Ram2.");
-		print_debug_hex8(i);
-		print_debug("\r\n");
-#endif
-		sdram_set_spd_registers(ctrl + i);
+                print_debug_sdram_8("Ram2.",i);
+
+	#if RAMINIT_SYSINFO == 1
+		sdram_set_spd_registers(ctrl + i , sysinfo);
+	#else
+                sdram_set_spd_registers(ctrl + i);
+	#endif
+
 	}
 
 	/* Now that everything is setup enable the SDRAM.
@@ -39,7 +56,12 @@
 	 * we need to it by hand.
 	 */
 	print_debug("Ram3\r\n");
+
+	#if RAMINIT_SYSINFO == 1
+	sdram_enable(controllers, ctrl, sysinfo);
+	#else
 	sdram_enable(controllers, ctrl);
+	#endif
 
 	print_debug("Ram4\r\n");
 }
diff --git a/src/southbridge/amd/amd8111/amd8111_early_ctrl.c b/src/southbridge/amd/amd8111/amd8111_early_ctrl.c
new file mode 100644
index 0000000..8a648e8
--- /dev/null
+++ b/src/southbridge/amd/amd8111/amd8111_early_ctrl.c
@@ -0,0 +1,41 @@
+/* by yhlu 2005.10 */
+static void hard_reset(struct sys_info *sysinfo)
+{
+        device_t dev;
+	
+        /* Find the device */
+        dev = PCI_DEV(sysinfo->sbbusn, sysinfo->sbdn+1, 3);
+
+        set_bios_reset();
+
+        /* enable cf9 */
+        pci_write_config8(dev, 0x41, 0xf1);
+        /* reset */
+        outb(0x0e, 0x0cf9);
+}
+
+static void enable_fid_change_on_sb(struct sys_info *sysinfo)
+{
+        device_t dev;
+        /* Find the device */
+        dev = PCI_DEV(sysinfo->sbbusn, sysinfo->sbdn+1, 3);
+
+        pci_write_config8(dev, 0x74, 4);
+
+        /* set VFSMAF ( VID/FID System Management Action Field) to 2 */
+        pci_write_config32(dev, 0x70, 2<<12);
+
+}
+
+static void soft_reset(struct sys_info *sysinfo)
+{
+        device_t dev;
+        
+	/* Find the device */
+        dev = PCI_DEV(sysinfo->sbbusn, sysinfo->sbdn+1, 0);
+
+        set_bios_reset();
+        pci_write_config8(dev, 0x47, 1);
+}
+
+