Don't pass return address to transition(32,16,16big) on stack.

It's difficult to have a uniform view of the stack when transition
modes, so pass the return address in a register.  As a result, the
transition functions only access memory via the %cs selector now.
diff --git a/src/entryfuncs.S b/src/entryfuncs.S
index 3c29b3f..afc5e61 100644
--- a/src/entryfuncs.S
+++ b/src/entryfuncs.S
@@ -156,7 +156,7 @@
         xorw %ax, %ax
         movw %ax, %ss
         movl $ BUILD_STACK_ADDR , %esp
-        pushl $ \cfunc
+        movl $ \cfunc , %edx
         jmp transition32
         .endm
 
diff --git a/src/resume.c b/src/resume.c
index 81ad1ac..2a743c9 100644
--- a/src/resume.c
+++ b/src/resume.c
@@ -40,7 +40,7 @@
             asm volatile(
                 "movw %w1, %%ss\n"
                 "movl %0, %%esp\n"
-                "pushl $s3_resume\n"
+                "movl $s3_resume, %%edx\n"
                 "jmp transition32\n"
                 : : "i"(BUILD_S3RESUME_STACK_ADDR), "r"(0)
                 );
diff --git a/src/romlayout.S b/src/romlayout.S
index a469596..d83f337 100644
--- a/src/romlayout.S
+++ b/src/romlayout.S
@@ -26,6 +26,7 @@
  ****************************************************************/
 
 // Place CPU into 32bit mode from 16bit mode.
+// %edx = return location (in 32bit mode)
 // Clobbers: ecx, flags, segment registers, cr0, idt/gdt
         DECLFUNC transition32
 transition32:
@@ -68,9 +69,10 @@
         movw %ax, %gs
 
         movl %ecx, %eax
-        retl
+        jmpl *%edx
 
 // Place CPU into 16bit mode from 32bit mode.
+// %edx = return location (in 16bit mode)
 // Clobbers: ecx, flags, segment registers, cr0, idt/gdt
         DECLFUNC transition16
         .global transition16big
@@ -130,7 +132,7 @@
         movw %ax, %ss  // Assume stack is in segment 0
 
         movl %ecx, %eax
-        retl
+        jmpl *%edx
 
 // Call a 16bit function from 16bit mode with a specified cpu register state
 // %eax = address of struct bregs
@@ -195,18 +197,22 @@
         .global __call16big_from32
         .code32
 __call16_from32:
-        pushl $1f
+        movl $1f, %edx
         jmp transition16
 __call16big_from32:
-        pushl $1f
+        movl $1f, %edx
         jmp transition16big
 
         // Make call.
         .code16gcc
 1:      calll __call16
         // Return via transition32
+        movl $(2f + BUILD_BIOS_ADDR), %edx
         jmp transition32
+        .code32
+2:      retl
 
+        .code16gcc
 // IRQ trampolines
         .macro IRQ_TRAMPOLINE num
         DECLFUNC irq_trampoline_0x\num
diff --git a/src/stacks.c b/src/stacks.c
index 14f6f8a..a68d37c 100644
--- a/src/stacks.c
+++ b/src/stacks.c
@@ -66,11 +66,11 @@
         "  movl %%ss, %0\n"
 
         // Transition to 32bit mode, call func, return to 16bit
-        "  pushl $(" __stringify(BUILD_BIOS_ADDR) " + 1f)\n"
+        "  movl $(" __stringify(BUILD_BIOS_ADDR) " + 1f), %%edx\n"
         "  jmp transition32\n"
         "  .code32\n"
         "1:calll *%2\n"
-        "  pushl $2f\n"
+        "  movl $2f, %%edx\n"
         "  jmp transition16big\n"
 
         // Restore ds/ss/esp