Forward port bochs smp changes; rename smpdetect.c to smp.c.

Rename smpdetect.c to smp.c - the code does more than just cpu detection.
Don't probe cpu count on demand - schedule it during post.
Add logic to run wrmsr on all cpus.
Don't make mtrr setup specific to kvm - do it on all machines that
    have mtrr and msrs.
Detect cpu signature/features automatically in mptable.
Also, make sure acpi structures are packed.
diff --git a/Makefile b/Makefile
index 356e72c..71e3cb4 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@
 
 # Source files
 SRCBOTH=output.c util.c floppy.c ata.c misc.c mouse.c kbd.c pci.c \
-        serial.c clock.c pic.c cdrom.c ps2port.c smpdetect.c resume.c \
+        serial.c clock.c pic.c cdrom.c ps2port.c smp.c resume.c \
         pnpbios.c pirtable.c
 SRC16=$(SRCBOTH) system.c disk.c apm.c pcibios.c vgahooks.c font.c
 SRC32=$(SRCBOTH) post.c shadow.c memmap.c coreboot.c boot.c \
diff --git a/src/acpi.c b/src/acpi.c
index 6a50971..f140f1a 100644
--- a/src/acpi.c
+++ b/src/acpi.c
@@ -35,7 +35,7 @@
 struct acpi_table_header         /* ACPI common table header */
 {
     ACPI_TABLE_HEADER_DEF
-};
+} PACKED;
 
 /*
  * ACPI 1.0 Root System Description Table (RSDT)
@@ -46,7 +46,7 @@
     ACPI_TABLE_HEADER_DEF       /* ACPI common table header */
     u32 table_offset_entry [3]; /* Array of pointers to other */
     /* ACPI tables */
-};
+} PACKED;
 
 /*
  * ACPI 1.0 Firmware ACPI Control Structure (FACS)
@@ -62,7 +62,7 @@
     u32 S4bios_f        : 1;    /* Indicates if S4BIOS support is present */
     u32 reserved1       : 31;   /* Must be 0 */
     u8  resverved3 [40];        /* Reserved - must be zero */
-};
+} PACKED;
 
 
 /*
@@ -124,7 +124,7 @@
 #else
     u32 flags;
 #endif
-};
+} PACKED;
 
 /*
  * MADT values and structures
@@ -149,7 +149,7 @@
 #else
     u32 flags;
 #endif
-};
+} PACKED;
 
 
 /* Values for Type in APIC_HEADER_DEF */
@@ -185,7 +185,7 @@
 #else
     u32 flags;
 #endif
-};
+} PACKED;
 
 struct madt_io_apic
 {
@@ -195,7 +195,7 @@
     u32 address;                /* APIC physical address */
     u32 interrupt;              /* Global system interrupt where INTI
                                  * lines start */
-};
+} PACKED;
 
 #if CONFIG_KVM
 /* IRQs 5,9,10,11 */
@@ -245,7 +245,7 @@
 {
     u8 *ssdt_ptr = ssdt;
     int i, length;
-    int smp_cpus = smp_probe();
+    int smp_cpus = CountCPUs;
     int acpi_cpus = smp_cpus > 0xff ? 0xff : smp_cpus;
 
     ssdt_ptr[9] = 0; // checksum;
@@ -348,7 +348,7 @@
     ssdt = (void *)(addr);
     addr += acpi_build_processor_ssdt(ssdt);
 
-    int smp_cpus = smp_probe();
+    int smp_cpus = CountCPUs;
     addr = ALIGN(addr, 8);
     madt_addr = addr;
     madt_size = sizeof(*madt) +
diff --git a/src/mptable.c b/src/mptable.c
index 9e030fe..47aee24 100644
--- a/src/mptable.c
+++ b/src/mptable.c
@@ -18,7 +18,7 @@
 
     dprintf(3, "init MPTable\n");
 
-    int smp_cpus = smp_probe();
+    int smp_cpus = CountCPUs;
     if (smp_cpus <= 1)
         // Building an mptable on uniprocessor machines confuses some OSes.
         return;
@@ -57,6 +57,8 @@
     config->lapic = BUILD_APIC_ADDR;
 
     // CPU definitions.
+    u32 cpuid_signature, ebx, ecx, cpuid_features;
+    cpuid(1, &cpuid_signature, &ebx, &ecx, &cpuid_features);
     struct mpt_cpu *cpus = (void*)&config[1];
     int i;
     for (i = 0; i < smp_cpus; i++) {
@@ -67,8 +69,13 @@
         cpu->apicver = 0x11;
         /* cpu flags: enabled, bootstrap cpu */
         cpu->cpuflag = (i == 0 ? 3 : 1);
-        cpu->cpufeature = 0x600;
-        cpu->featureflag = 0x201;
+        if (cpuid_signature) {
+            cpu->cpusignature = cpuid_signature;
+            cpu->featureflag = cpuid_features;
+        } else {
+            cpu->cpusignature = 0x600;
+            cpu->featureflag = 0x201;
+        }
     }
 
     /* isa bus */
diff --git a/src/mptable.h b/src/mptable.h
index d8fecb3..4c4d52f 100644
--- a/src/mptable.h
+++ b/src/mptable.h
@@ -44,7 +44,7 @@
     u8 apicid;
     u8 apicver;
     u8 cpuflag;
-    u32 cpufeature;
+    u32 cpusignature;
     u32 featureflag;
     u32 reserved[2];
 } PACKED;
diff --git a/src/mtrr.c b/src/mtrr.c
index 12837e0..a9cd5f7 100644
--- a/src/mtrr.c
+++ b/src/mtrr.c
@@ -24,28 +24,18 @@
 #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
-static u64 rdmsr(u32 index)
-{
-    u64 ret;
-    asm ("rdmsr" : "=A"(ret) : "c"(index));
-    return ret;
-}
-
-static void wrmsr(u32 index, u64 val)
-{
-    asm volatile ("wrmsr" : : "c"(index), "A"(val));
-}
-
-static void wrmsr_smp(u32 index, u64 val)
-{
-    // XXX - should run this on other CPUs also.
-    wrmsr(index, val);
-}
-
 void mtrr_setup(void)
 {
-    if (! CONFIG_KVM)
+    if (CONFIG_COREBOOT)
         return;
+
+    u32 eax, ebx, ecx, cpuid_features;
+    cpuid(1, &eax, &ebx, &ecx, &cpuid_features);
+    if (!(cpuid_features & CPUID_MTRR))
+        return;
+    if (!(cpuid_features & CPUID_MSR))
+        return;
+
     dprintf(3, "init mtrr\n");
 
     int i, vcnt, fix, wc;
diff --git a/src/post.c b/src/post.c
index 3c68990..638f95a 100644
--- a/src/post.c
+++ b/src/post.c
@@ -163,10 +163,10 @@
     mathcp_setup();
 
     smp_probe_setup();
-
     memmap_setup();
     ram_probe();
     mtrr_setup();
+    smp_probe();
 
     pnp_setup();
     vga_setup();
diff --git a/src/smbios.c b/src/smbios.c
index e2f2190..40812e3 100644
--- a/src/smbios.c
+++ b/src/smbios.c
@@ -559,7 +559,7 @@
     add_struct(smbios_type_0_init(p));
     add_struct(smbios_type_1_init(p));
     add_struct(smbios_type_3_init(p));
-    int smp_cpus = smp_probe();
+    int smp_cpus = CountCPUs;
     for (cpu_num = 1; cpu_num <= smp_cpus; cpu_num++)
         add_struct(smbios_type_4_init(p, cpu_num));
     add_struct(smbios_type_16_init(p));
diff --git a/src/smpdetect.c b/src/smp.c
similarity index 67%
rename from src/smpdetect.c
rename to src/smp.c
index a25e042..49e5631 100644
--- a/src/smpdetect.c
+++ b/src/smp.c
@@ -10,8 +10,6 @@
 #include "cmos.h" // CMOS_BIOS_SMP_COUNT
 #include "farptr.h" // ASSERT32
 
-#define CPUID_APIC (1 << 9)
-
 #define APIC_ICR_LOW ((u8*)BUILD_APIC_ADDR + 0x300)
 #define APIC_SVR     ((u8*)BUILD_APIC_ADDR + 0x0F0)
 
@@ -47,38 +45,68 @@
     return *(volatile const u8 *)addr;
 }
 
-u32 smp_cpus VAR16_32;
+struct { u32 ecx, eax, edx; } smp_mtrr[16] VAR16_32;
+u32 smp_mtrr_count VAR16_32;
+
+void
+wrmsr_smp(u32 index, u64 val)
+{
+    wrmsr(index, val);
+    if (smp_mtrr_count >= ARRAY_SIZE(smp_mtrr))
+        return;
+    smp_mtrr[smp_mtrr_count].ecx = index;
+    smp_mtrr[smp_mtrr_count].eax = val;
+    smp_mtrr[smp_mtrr_count].edx = val >> 32;
+    smp_mtrr_count++;
+}
+
+u32 CountCPUs VAR16_32;
 extern void smp_ap_boot_code();
 ASM16(
     "  .global smp_ap_boot_code\n"
     "smp_ap_boot_code:\n"
-    // Increment the cpu counter
+
+    // Setup data segment
     "  movw $" __stringify(SEG_BIOS) ", %ax\n"
     "  movw %ax, %ds\n"
-    "  lock incl smp_cpus\n"
+
+    // MTRR setup
+    "  movl $smp_mtrr, %esi\n"
+    "  movl smp_mtrr_count, %ebx\n"
+    "1:testl %ebx, %ebx\n"
+    "  jz 2f\n"
+    "  movl 0(%esi), %ecx\n"
+    "  movl 4(%esi), %eax\n"
+    "  movl 8(%esi), %edx\n"
+    "  wrmsr\n"
+    "  addl $12, %esi\n"
+    "  decl %ebx\n"
+    "  jmp 1b\n"
+    "2:\n"
+
+    // Increment the cpu counter
+    "  lock incl CountCPUs\n"
+
     // Halt the processor.
     "1:hlt\n"
     "  jmp 1b\n"
     );
 
-/* find the number of CPUs by launching a SIPI to them */
-int
+// find and initialize the CPUs by launching a SIPI to them
+void
 smp_probe(void)
 {
     ASSERT32();
-    if (smp_cpus)
-        return smp_cpus;
-
     u32 eax, ebx, ecx, cpuid_features;
     cpuid(1, &eax, &ebx, &ecx, &cpuid_features);
     if (! (cpuid_features & CPUID_APIC)) {
         // No apic - only the main cpu is present.
-        smp_cpus = 1;
-        return 1;
+        CountCPUs= 1;
+        return;
     }
 
     // Init the counter.
-    writel(&smp_cpus, 1);
+    writel(&CountCPUs, 1);
 
     // Setup jump trampoline to counter code.
     u64 old = *(u64*)BUILD_AP_BOOT_ADDR;
@@ -100,20 +128,19 @@
     if (CONFIG_COREBOOT)
         mdelay(10);
     else
-        while (inb_cmos(CMOS_BIOS_SMP_COUNT) + 1 != readl(&smp_cpus))
+        while (inb_cmos(CMOS_BIOS_SMP_COUNT) + 1 != readl(&CountCPUs))
             ;
 
     // Restore memory.
     *(u64*)BUILD_AP_BOOT_ADDR = old;
 
-    u32 count = readl(&smp_cpus);
-    dprintf(1, "Found %d cpu(s)\n", count);
-    return count;
+    dprintf(1, "Found %d cpu(s)\n", readl(&CountCPUs));
 }
 
-// Reset smp_cpus to zero (forces a recheck on reboots).
+// Reset variables to zero
 void
 smp_probe_setup(void)
 {
-    smp_cpus = 0;
+    CountCPUs = 0;
+    smp_mtrr_count = 0;
 }
diff --git a/src/util.h b/src/util.h
index 2ba80dd..6ae7f19 100644
--- a/src/util.h
+++ b/src/util.h
@@ -51,6 +51,9 @@
     asm volatile("wbinvd");
 }
 
+#define CPUID_MSR (1 << 5)
+#define CPUID_APIC (1 << 9)
+#define CPUID_MTRR (1 << 12)
 static inline void cpuid(u32 index, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
 {
     asm("cpuid"
@@ -58,6 +61,18 @@
         : "0" (index));
 }
 
+static inline u64 rdmsr(u32 index)
+{
+    u64 ret;
+    asm ("rdmsr" : "=A"(ret) : "c"(index));
+    return ret;
+}
+
+static inline void wrmsr(u32 index, u64 val)
+{
+    asm volatile ("wrmsr" : : "c"(index), "A"(val));
+}
+
 static inline u64 rdtscll(void)
 {
     u64 val;
@@ -177,8 +192,10 @@
 // smm.c
 void smm_init();
 
-// smpdetect.c
-int smp_probe(void);
+// smp.c
+extern u32 CountCPUs VAR16_32;
+void wrmsr_smp(u32 index, u64 val);
+void smp_probe(void);
 void smp_probe_setup(void);
 
 // smbios.c