Implement call32 mechanism using SMIs.

Add support for jumping into 32bit mode using a System Management Mode
(SMM) handler.  When available, this allows SeaBIOS to transition to
32bit mode even when called in vm86 mode.  It will also prevent the
clobbering of the segment registers.

Currently, the SMM mode is only supported in QEMU when running in TCG
mode.  Also, QEMU v2.1 (or later) is needed for it to work when in
vm86 mode.

Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
diff --git a/src/Kconfig b/src/Kconfig
index a863866..a1c0c1e 100644
--- a/src/Kconfig
+++ b/src/Kconfig
@@ -292,6 +292,10 @@
         default y
         help
             Support System Management Mode (on emulators).
+    config CALL32_SMM
+        bool
+        depends on USE_SMM
+        default y
     config MTRR_INIT
         depends on QEMU
         bool "Initialize MTRRs"
diff --git a/src/fw/smm.c b/src/fw/smm.c
index 5b26977..dabc677 100644
--- a/src/fw/smm.c
+++ b/src/fw/smm.c
@@ -13,6 +13,7 @@
 #include "hw/pci_regs.h" // PCI_DEVICE_ID
 #include "output.h" // dprintf
 #include "paravirt.h" // PORT_SMI_STATUS
+#include "stacks.h" // HaveSmmCall32
 #include "string.h" // memcpy
 #include "util.h" // smm_setup
 #include "x86.h" // wbinvd
@@ -42,7 +43,9 @@
 };
 
 struct smm_layout {
-    u8 stack[0x8000];
+    struct smm_state backup1;
+    struct smm_state backup2;
+    u8 stack[0x7c00];
     u64 codeentry;
     u8 pad_8008[0x7df8];
     struct smm_state cpu;
@@ -69,8 +72,49 @@
         }
         // indicate to smm_relocate_and_restore() that the SMM code was executed
         outb(0x00, PORT_SMI_STATUS);
+
+        if (CONFIG_CALL32_SMM) {
+            // Backup current cpu state for SMM trampolining
+            struct smm_layout *newsmm = (void*)BUILD_SMM_ADDR;
+            memcpy(&newsmm->backup1, &smm->cpu, sizeof(newsmm->backup1));
+            memcpy(&newsmm->backup2, &smm->cpu, sizeof(newsmm->backup2));
+            HaveSmmCall32 = 1;
+        }
+
         return;
     }
+
+    if (CONFIG_CALL32_SMM && cmd == CALL32SMM_CMDID) {
+        if (smm->cpu.i32.smm_rev == SMM_REV_I32) {
+            u32 regs[8];
+            memcpy(regs, &smm->cpu.i32.eax, sizeof(regs));
+            if (smm->cpu.i32.ecx == CALL32SMM_ENTERID) {
+                dprintf(9, "smm cpu call pc=%x esp=%x\n", regs[3], regs[4]);
+                memcpy(&smm->backup2, &smm->cpu, sizeof(smm->backup2));
+                memcpy(&smm->cpu, &smm->backup1, sizeof(smm->cpu));
+                memcpy(&smm->cpu.i32.eax, regs, sizeof(regs));
+                smm->cpu.i32.eip = regs[3];
+            } else if (smm->cpu.i32.ecx == CALL32SMM_RETURNID) {
+                dprintf(9, "smm cpu ret %x esp=%x\n", regs[3], regs[4]);
+                memcpy(&smm->cpu, &smm->backup2, sizeof(smm->cpu));
+                memcpy(&smm->cpu.i32.eax, regs, sizeof(regs));
+                smm->cpu.i32.eip = regs[3];
+            }
+        } else if (smm->cpu.i64.smm_rev == SMM_REV_I64) {
+            u64 regs[8];
+            memcpy(regs, &smm->cpu.i64.rdi, sizeof(regs));
+            if ((u32)smm->cpu.i64.rcx == CALL32SMM_ENTERID) {
+                memcpy(&smm->backup2, &smm->cpu, sizeof(smm->backup2));
+                memcpy(&smm->cpu, &smm->backup1, sizeof(smm->cpu));
+                memcpy(&smm->cpu.i64.rdi, regs, sizeof(regs));
+                smm->cpu.i64.rip = (u32)regs[4];
+            } else if ((u32)smm->cpu.i64.rcx == CALL32SMM_RETURNID) {
+                memcpy(&smm->cpu, &smm->backup2, sizeof(smm->cpu));
+                memcpy(&smm->cpu.i64.rdi, regs, sizeof(regs));
+                smm->cpu.i64.rip = (u32)regs[4];
+            }
+        }
+    }
 }
 
 extern void entry_smi(void);
diff --git a/src/stacks.c b/src/stacks.c
index 13d587b..4b26d9f 100644
--- a/src/stacks.c
+++ b/src/stacks.c
@@ -6,6 +6,7 @@
 
 #include "biosvar.h" // GET_GLOBAL
 #include "bregs.h" // CR0_PE
+#include "fw/paravirt.h" // PORT_SMI_CMD
 #include "hw/rtc.h" // rtc_use
 #include "list.h" // hlist_node
 #include "malloc.h" // free
@@ -29,6 +30,139 @@
 } Call32Data VARLOW;
 
 #define C32_SLOPPY 1
+#define C32_SMM    2
+
+int HaveSmmCall32 VARFSEG;
+
+// Backup state in preparation for call32_smm()
+static void
+call32_smm_prep(void)
+{
+    // Backup cmos index register and disable nmi
+    u8 cmosindex = inb(PORT_CMOS_INDEX);
+    outb(cmosindex | NMI_DISABLE_BIT, PORT_CMOS_INDEX);
+    inb(PORT_CMOS_DATA);
+    SET_LOW(Call32Data.cmosindex, cmosindex);
+
+    // Backup ss
+    SET_LOW(Call32Data.ss, GET_SEG(SS));
+
+    SET_LOW(Call32Data.method, C32_SMM);
+}
+
+// Restore state backed up during call32_smm()
+static void
+call32_smm_post(void)
+{
+    SET_LOW(Call32Data.method, 0);
+    SET_LOW(Call32Data.ss, 0);
+
+    // Restore cmos index register
+    outb(GET_LOW(Call32Data.cmosindex), PORT_CMOS_INDEX);
+    inb(PORT_CMOS_DATA);
+}
+
+// Call a SeaBIOS C function in 32bit mode using smm trampoline
+static u32
+call32_smm(void *func, u32 eax)
+{
+    ASSERT16();
+    dprintf(9, "call32_smm %p %x\n", func, eax);
+    call32_smm_prep();
+    u32 bkup_esp;
+    asm volatile(
+        // Backup esp / set esp to flat stack location
+        "  movl %%esp, %0\n"
+        "  movl %%ss, %%eax\n"
+        "  shll $4, %%eax\n"
+        "  addl %%eax, %%esp\n"
+
+        // Transition to 32bit mode, call func, return to 16bit
+        "  movl $" __stringify(CALL32SMM_CMDID) ", %%eax\n"
+        "  movl $" __stringify(CALL32SMM_ENTERID) ", %%ecx\n"
+        "  movl $(" __stringify(BUILD_BIOS_ADDR) " + 1f), %%ebx\n"
+        "  outb %%al, $" __stringify(PORT_SMI_CMD) "\n"
+        "  rep; nop\n"
+        "  hlt\n"
+        "  .code32\n"
+
+        "1:movl %1, %%eax\n"
+        "  calll *%2\n"
+        "  movl %%eax, %1\n"
+
+        "  movl $" __stringify(CALL32SMM_CMDID) ", %%eax\n"
+        "  movl $" __stringify(CALL32SMM_RETURNID) ", %%ecx\n"
+        "  movl $2f, %%ebx\n"
+        "  outb %%al, $" __stringify(PORT_SMI_CMD) "\n"
+        "  rep; nop\n"
+        "  hlt\n"
+
+        // Restore esp
+        "  .code16gcc\n"
+        "2:movl %0, %%esp\n"
+        : "=&r" (bkup_esp), "+r" (eax)
+        : "r" (func)
+        : "eax", "ecx", "edx", "ebx", "cc", "memory");
+    call32_smm_post();
+
+    dprintf(9, "call32_smm done %p %x\n", func, eax);
+    return eax;
+}
+
+// 16bit handler code called from call16_smm()
+u32 VISIBLE16
+call16_smm_helper(u32 eax, u32 edx, u32 (*func)(u32 eax, u32 edx))
+{
+    if (!CONFIG_CALL32_SMM)
+        return eax;
+    call32_smm_post();
+    u32 ret = func(eax, edx);
+    call32_smm_prep();
+    return ret;
+}
+
+u32 FUNCFSEG
+call16_smm(u32 eax, u32 edx, void *func)
+{
+    ASSERT32FLAT();
+    if (!CONFIG_CALL32_SMM)
+        return eax;
+    func -= BUILD_BIOS_ADDR;
+    dprintf(9, "call16_smm %p %x %x\n", func, eax, edx);
+    u32 stackoffset = Call32Data.ss << 4;
+    asm volatile(
+        // Restore esp
+        "  subl %0, %%esp\n"
+
+        // Transition to 16bit mode, call func, return to 32bit
+        "  movl $" __stringify(CALL32SMM_CMDID) ", %%eax\n"
+        "  movl $" __stringify(CALL32SMM_RETURNID) ", %%ecx\n"
+        "  movl $(1f - " __stringify(BUILD_BIOS_ADDR) "), %%ebx\n"
+        "  outb %%al, $" __stringify(PORT_SMI_CMD) "\n"
+        "  rep; nop\n"
+        "  hlt\n"
+
+        "  .code16\n"
+        "1:movl %1, %%eax\n"
+        "  movl %3, %%ecx\n"
+        "  calll _cfunc16_call16_smm_helper\n"
+        "  movl %%eax, %1\n"
+
+        "  movl $" __stringify(CALL32SMM_CMDID) ", %%eax\n"
+        "  movl $" __stringify(CALL32SMM_ENTERID) ", %%ecx\n"
+        "  movl $2f, %%ebx\n"
+        "  outb %%al, $" __stringify(PORT_SMI_CMD) "\n"
+        "  rep; nop\n"
+        "  hlt\n"
+
+        // Set esp to flat stack location
+        "  .code32\n"
+        "2:addl %0, %%esp\n"
+        : "+r" (stackoffset), "+r" (eax), "+d" (edx)
+        : "r" (func)
+        : "eax", "ecx", "ebx", "cc", "memory");
+    return eax;
+}
 
 // Backup state in preparation for call32_sloppy()
 static void
@@ -157,6 +291,8 @@
 call32(void *func, u32 eax, u32 errret)
 {
     ASSERT16();
+    if (CONFIG_CALL32_SMM && GET_GLOBAL(HaveSmmCall32))
+        return call32_smm(func, eax);
     u32 cr0 = getcr0();
     if (cr0 & CR0_PE)
         // Called in 16bit protected mode?!
@@ -223,6 +359,8 @@
 call16_back(u32 eax, u32 edx, void *func)
 {
     ASSERT32FLAT();
+    if (CONFIG_CALL32_SMM && Call32Data.method == C32_SMM)
+        return call16_smm(eax, edx, func);
     if (Call32Data.method == C32_SLOPPY)
         return call16_sloppy(eax, edx, func);
     if (in_post())
diff --git a/src/stacks.h b/src/stacks.h
index c3ddc17..82c4c3c 100644
--- a/src/stacks.h
+++ b/src/stacks.h
@@ -4,7 +4,12 @@
 
 #include "types.h" // u32
 
+#define CALL32SMM_CMDID    0xb5
+#define CALL32SMM_ENTERID  0x1234
+#define CALL32SMM_RETURNID 0x5678
+
 // stacks.c
+extern int HaveSmmCall32;
 u32 call32(void *func, u32 eax, u32 errret);
 extern u8 ExtraStack[], *StackPos;
 u32 stack_hop(u32 eax, u32 edx, void *func);