Minor - always set %esp immediately after setting %ss.

Setting %ss/%esp in sequence is guaranteed to be atomic.  So, always
    set them in this order to ensure irqs and nmis see a consistent
    stack.
diff --git a/src/romlayout.S b/src/romlayout.S
index 4676d2f..c31d9cc 100644
--- a/src/romlayout.S
+++ b/src/romlayout.S
@@ -158,8 +158,8 @@
         movw %ax, %ds
         movw BDA_ebda_seg, %ax
         // XXX - should verify ebda_seg looks sane.
-        movw %ax, %ss
         movw %ax, %ds
+        movw %ax, %ss
         movl $EBDA_OFFSET_TOP_STACK, %esp
 
         // Call handler.
diff --git a/src/util.c b/src/util.c
index af5dbd3..9d85b29 100644
--- a/src/util.c
+++ b/src/util.c
@@ -75,25 +75,24 @@
     if (!MODE16)
         __force_link_error__stack_hop_only_in_16bit_mode();
 
-    u32 ebda_seg = get_ebda_seg();
-    u32 tmp;
+    u16 ebda_seg = get_ebda_seg(), bkup_ss;
+    u32 bkup_esp;
     asm volatile(
-        // Backup current %ss value.
-        "movl %%ss, %4\n"
-        // Copy ebda seg to %ss and %ds
-        "movl %3, %%ss\n"
-        "movl %3, %%ds\n"
-        // Backup %esp and set it to new value
-        "movl %%esp, %3\n"
+        // Backup current %ss/%esp values.
+        "movw %%ss, %w3\n"
+        "movl %%esp, %4\n"
+        // Copy ebda seg to %ds/%ss and set %esp
+        "movw %w6, %%ds\n"
+        "movw %w6, %%ss\n"
         "movl %5, %%esp\n"
         // Call func
-        "calll %6\n"
+        "calll %7\n"
         // Restore segments and stack
-        "movl %3, %%esp\n"
-        "movl %4, %%ss\n"
-        "movl %4, %%ds\n"
-        : "+a" (eax), "+d" (edx), "+c" (ecx), "+r" (ebda_seg), "=r" (tmp)
-        : "i" (EBDA_OFFSET_TOP_STACK), "m" (*(u8*)func)
+        "movw %w3, %%ds\n"
+        "movw %w3, %%ss\n"
+        "movl %4, %%esp\n"
+        : "+a" (eax), "+d" (edx), "+c" (ecx), "=&r" (bkup_ss), "=&r" (bkup_esp)
+        : "i" (EBDA_OFFSET_TOP_STACK), "r" (ebda_seg), "m" (*(u8*)func)
         : "cc", "memory");
     return eax;
 }