Switch to new stack when calling ATA function in 16bit mode.

This reduces stack usage (old dos programs don't provide much space).
diff --git a/src/asm-offsets.c b/src/asm-offsets.c
index 1e3a293..c3c3bc6 100644
--- a/src/asm-offsets.c
+++ b/src/asm-offsets.c
@@ -25,7 +25,5 @@
     OFFSET(BDA_ebda_seg, bios_data_area_s, ebda_seg);
 
     COMMENT("EBDA");
-    OFFSET(EBDA_resume_stack_top, extended_bios_data_area_s
-           , resume_stack[FIELD_SIZEOF(struct extended_bios_data_area_s
-                                       , resume_stack)]);
+    DEFINE(EBDA_OFFSET_TOP_STACK, EBDA_OFFSET_TOP_STACK);
 }
diff --git a/src/biosvar.h b/src/biosvar.h
index cd8e0ac..2fb4b8d 100644
--- a/src/biosvar.h
+++ b/src/biosvar.h
@@ -214,8 +214,8 @@
 
     u16 boot_sequence;
 
-    // Resume stack
-    u8 resume_stack[128] __aligned(8);
+    // Stack space available for code that needs it.
+    u8 extra_stack[512] __aligned(8);
 } PACKED;
 
 // Accessor functions
@@ -234,11 +234,16 @@
     GET_FARVAR(eseg, ((struct extended_bios_data_area_s *)0)->var)
 #define SET_EBDA2(eseg, var, val)                                       \
     SET_FARVAR(eseg, ((struct extended_bios_data_area_s *)0)->var, (val))
-#define GET_EBDA(var)                                            \
+#define GET_EBDA(var)                           \
     GET_EBDA2(get_ebda_seg(), var)
-#define SET_EBDA(var, val)                                       \
+#define SET_EBDA(var, val)                      \
     SET_EBDA2(get_ebda_seg(), var, (val))
 
+#define EBDA_OFFSET_TOP_STACK                                   \
+    offsetof(struct extended_bios_data_area_s, extra_stack[     \
+                 FIELD_SIZEOF(struct extended_bios_data_area_s  \
+                              , extra_stack)])
+
 
 /****************************************************************
  * Global variables
diff --git a/src/disk.c b/src/disk.c
index e72be9d..02997fc 100644
--- a/src/disk.c
+++ b/src/disk.c
@@ -39,28 +39,39 @@
 #define DISK_STUB(regs) \
     __disk_stub(__func__, __LINE__, (regs))
 
-static __always_inline int
-send_disk_op(struct disk_op_s *op)
+static int
+__send_disk_op(struct disk_op_s *op_p, u16 op_s)
 {
+    struct disk_op_s dop;
+    memcpy_far(MAKE_FARPTR(GET_SEG(SS), &dop)
+               , MAKE_FARPTR(op_s, op_p)
+               , sizeof(dop));
+
     dprintf(DEBUG_HDL_13, "disk_op d=%d lba=%d buf=%p count=%d cmd=%d\n"
-            , op->driveid, (u32)op->lba, op->far_buffer
-            , op->count, op->command);
+            , dop.driveid, (u32)dop.lba, dop.far_buffer
+            , dop.count, dop.command);
 
     irq_enable();
 
     int status;
-    if (op->command == CMD_CDEMU_READ)
-        status = cdrom_read_512(op);
-    else if (op->command == CMD_CDROM_READ)
-        status = cdrom_read(op);
+    if (dop.command == CMD_CDEMU_READ)
+        status = cdrom_read_512(&dop);
+    else if (dop.command == CMD_CDROM_READ)
+        status = cdrom_read(&dop);
     else
-        status = ata_cmd_data(op);
+        status = ata_cmd_data(&dop);
 
     irq_disable();
 
     return status;
 }
 
+static int
+send_disk_op(struct disk_op_s *op)
+{
+    return stack_hop((u32)op, GET_SEG(SS), 0, __send_disk_op);
+}
+
 static void
 basic_access(struct bregs *regs, u8 device, u16 command)
 {
diff --git a/src/farptr.h b/src/farptr.h
index ad5fee5..cb301b1 100644
--- a/src/farptr.h
+++ b/src/farptr.h
@@ -11,6 +11,7 @@
 // Dummy definitions used to make sure gcc understands dependencies
 // between SET_SEG and GET/READ/WRITE_SEG macros.
 extern u16 __segment_ES, __segment_CS, __segment_DS, __segment_SS;
+extern u16 __segment_FS, __segment_GS;
 
 // Low level macros for reading/writing memory via a segment selector.
 #define READ8_SEG(SEG, value, var)                      \
@@ -98,7 +99,7 @@
         SET_VAR(ES, (var), __sfv_val);          \
     } while (0)
 
-// Macros for accesssing a 32bit pointer from 16bit mode.  (They
+// Macros for accesssing a 32bit pointer from 16bit real mode.  (They
 // automatically update the %es segment, break the pointer into
 // segment/offset, and then make the access.)
 #define __GET_FARPTR(ptr) ({                                            \
@@ -116,7 +117,7 @@
 // equivalent 16bit segment/offset values.
 #define FARPTR_TO_SEG(p) (((u32)(p)) >> 4)
 #define FARPTR_TO_OFFSET(p) (((u32)(p)) & 0xf)
-#define MAKE_FARPTR(seg,off) ((void*)(((seg)<<4)+(off)))
+#define MAKE_FARPTR(seg,off) ((void*)(((u32)(seg)<<4)+(u32)(off)))
 
 
 #if MODE16 == 1
diff --git a/src/romlayout.S b/src/romlayout.S
index 0d8e58a..6c5429d 100644
--- a/src/romlayout.S
+++ b/src/romlayout.S
@@ -321,7 +321,7 @@
         movw BDA_ebda_seg, %ax
         movw %ax, %ss
         movw %ax, %ds
-        movl $EBDA_resume_stack_top, %esp
+        movl $EBDA_OFFSET_TOP_STACK, %esp
 
         // Call handler.
         movl %ebx, %eax
diff --git a/src/util.c b/src/util.c
index 1237cd1..b94f0d1 100644
--- a/src/util.c
+++ b/src/util.c
@@ -8,6 +8,7 @@
 #include "bregs.h" // struct bregs
 #include "config.h" // SEG_BIOS
 #include "farptr.h" // GET_FARPTR
+#include "biosvar.h" // get_ebda_seg
 
 // Call a function with a specified register state.  Note that on
 // return, the interrupt enable/disable flag may be altered.
@@ -22,21 +23,21 @@
 #endif
         : "+a" (callregs), "+m" (*callregs)
         :
-        : "ebx", "ecx", "edx", "esi", "edi", "ebp", "cc");
+        : "ebx", "ecx", "edx", "esi", "edi", "ebp", "cc", "memory");
 }
 
 inline void
 call16big(struct bregs *callregs)
 {
-#if MODE16 == 1
-    extern void __force_link_error__only_in_32bit_mode();
-    __force_link_error__only_in_32bit_mode();
-#endif
+    extern void __force_link_error__call16big_only_in_32bit_mode();
+    if (MODE16)
+        __force_link_error__call16big_only_in_32bit_mode();
+
     asm volatile(
         "calll __call16big_from32\n"
         : "+a" (callregs), "+m" (*callregs)
         :
-        : "ebx", "ecx", "edx", "esi", "edi", "ebp", "cc");
+        : "ebx", "ecx", "edx", "esi", "edi", "ebp", "cc", "memory");
 }
 
 inline void
@@ -47,6 +48,37 @@
     call16(callregs);
 }
 
+// Switch to the extra stack in ebda and call a function.
+inline u32
+stack_hop(u32 eax, u32 edx, u32 ecx, void *func)
+{
+    extern void __force_link_error__stack_hop_only_in_16bit_mode();
+    if (!MODE16)
+        __force_link_error__stack_hop_only_in_16bit_mode();
+
+    u32 ebda_seg = get_ebda_seg();
+    u32 tmp;
+    asm volatile(
+        // Backup current %ss value.
+        "movl %%ss, %4\n"
+        // Copy ebda seg to %ss and %ds
+        "movl %3, %%ss\n"
+        "movl %3, %%ds\n"
+        // Backup %esp and set it to new value
+        "movl %%esp, %3\n"
+        "movl %5, %%esp\n"
+        // Call func
+        "calll %6\n"
+        // Restore segments and stack
+        "movl %3, %%esp\n"
+        "movl %4, %%ss\n"
+        "movl %4, %%ds\n"
+        : "+a" (eax), "+d" (edx), "+c" (ecx), "+r" (ebda_seg), "=r" (tmp)
+        : "i" (EBDA_OFFSET_TOP_STACK), "m" (*(u8*)func)
+        : "cc", "memory");
+    return eax;
+}
+
 // Sum the bytes in the specified area.
 u8
 checksum(u8 *far_data, u32 len)
diff --git a/src/util.h b/src/util.h
index b3d6a49..5db1ba0 100644
--- a/src/util.h
+++ b/src/util.h
@@ -138,6 +138,7 @@
 void handle_1ab1(struct bregs *regs);
 
 // util.c
+inline u32 stack_hop(u32 eax, u32 edx, u32 ecx, void *func);
 u8 checksum(u8 *far_data, u32 len);
 
 // shadow.c