Support call16() calls after entering 32bit mode from call32().

When transitioning to 32bit via call32() track the stack segment
register and support restoring it on call16() requests.  This permits
call16() to work properly.

Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
diff --git a/src/romlayout.S b/src/romlayout.S
index 8415c9f..9de91a2 100644
--- a/src/romlayout.S
+++ b/src/romlayout.S
@@ -146,9 +146,23 @@
 
         // Make call.
         .code16gcc
-1:      popl %ecx
+1:      movl $_zonelow_seg, %edx        // Adjust %ds, %ss, and %esp
+        movl %edx, %ds
+        movzwl StackSeg, %edx
+        movl %edx, %ecx
+        shll $4, %ecx
+        movl %edx, %ss
+        subl %ecx, %esp
+        movl %edx, %ds
+
+        popl %ecx                       // Call function
         popl %edx
         calll *%ecx
+
+        movl %ss, %edx                  // Readjust %esp
+        shll $4, %edx
+        addl %edx, %esp
+
         // Return via transition32
         movl $(2f + BUILD_BIOS_ADDR), %edx
         jmp transition32
diff --git a/src/stacks.c b/src/stacks.c
index 563ce1c..78ad65c 100644
--- a/src/stacks.c
+++ b/src/stacks.c
@@ -13,6 +13,8 @@
 #include "stacks.h" // struct mutex_s
 #include "util.h" // useRTC
 
+#define MAIN_STACK_MAX (1024*1024)
+
 
 /****************************************************************
  * Extra 16bit stack
@@ -107,6 +109,8 @@
     asm("lgdtl %0" : : "m"(*desc) : "memory");
 }
 
+u16 StackSeg VARLOW;
+
 // Call a 32bit SeaBIOS function from a 16bit SeaBIOS function.
 u32 VISIBLE16
 call32(void *func, u32 eax, u32 errret)
@@ -127,6 +131,8 @@
     struct descloc_s gdt;
     sgdt(&gdt);
 
+    u16 oldstackseg = GET_LOW(StackSeg);
+    SET_LOW(StackSeg, GET_SEG(SS));
     u32 bkup_ss, bkup_esp;
     asm volatile(
         // Backup ss/esp / set esp to flat stack location
@@ -153,6 +159,8 @@
         : "r" (func)
         : "ecx", "edx", "cc", "memory");
 
+    SET_LOW(StackSeg, oldstackseg);
+
     // Restore gdt and fs/gs
     lgdt(&gdt);
     SET_SEG(FS, fs);
@@ -169,7 +177,7 @@
 call16(u32 eax, u32 edx, void *func)
 {
     ASSERT32FLAT();
-    if (getesp() > BUILD_STACK_ADDR)
+    if (getesp() > MAIN_STACK_MAX)
         panic("call16 with invalid stack\n");
     extern u32 __call16(u32 eax, u32 edx, void *func);
     return __call16(eax, edx, func - BUILD_BIOS_ADDR);
@@ -179,7 +187,7 @@
 call16big(u32 eax, u32 edx, void *func)
 {
     ASSERT32FLAT();
-    if (getesp() > BUILD_STACK_ADDR)
+    if (getesp() > MAIN_STACK_MAX)
         panic("call16big with invalid stack\n");
     extern u32 __call16big(u32 eax, u32 edx, void *func);
     return __call16big(eax, edx, func - BUILD_BIOS_ADDR);
@@ -214,14 +222,14 @@
         return;
     }
     extern void _cfunc16__farcall16(void);
-    call16((u32)callregs, 0, _cfunc16__farcall16);
+    call16((u32)callregs - StackSeg * 16, StackSeg, _cfunc16__farcall16);
 }
 
 inline void
 farcall16big(struct bregs *callregs)
 {
     extern void _cfunc16__farcall16(void);
-    call16big((u32)callregs, 0, _cfunc16__farcall16);
+    call16big((u32)callregs - StackSeg * 16, StackSeg, _cfunc16__farcall16);
 }
 
 // Invoke a 16bit software interrupt.
@@ -265,7 +273,7 @@
 getCurThread(void)
 {
     u32 esp = getesp();
-    if (esp <= BUILD_STACK_ADDR)
+    if (esp <= MAIN_STACK_MAX)
         return &MainThread;
     return (void*)ALIGN_DOWN(esp, THREADSTACKSIZE);
 }
@@ -477,7 +485,7 @@
 wait_preempt(void)
 {
     if (MODESEGMENT || !CONFIG_THREAD_OPTIONROMS || !CanPreempt
-        || getesp() < 1024*1024)
+        || getesp() < MAIN_STACK_MAX)
         return 0;
     while (CanPreempt)
         yield();