Reduce stack usage of hw irq handlers.

Avoid using call16_int() -- it consumes too much stack space.
    Instead, use a new function (call16_simpint).  This assumes that
    the handler wont corrupt regs - which should be a safe assumption,
    because if they did corrupt regs they wouldn't work on any bios.
Avoid enabling irqs in the hw irq handlers - there are no loops in the
    handlers that could cause any notable latency.
diff --git a/src/clock.c b/src/clock.c
index c76ea75..caa94ba 100644
--- a/src/clock.c
+++ b/src/clock.c
@@ -411,7 +411,6 @@
 handle_08()
 {
     debug_isr(DEBUG_ISR_08);
-    irq_enable();
 
     floppy_tick();
 
@@ -427,11 +426,8 @@
     SET_BDA(timer_counter, counter);
 
     // chain to user timer tick INT #0x1c
-    struct bregs br;
-    memset(&br, 0, sizeof(br));
-    call16_int(0x1c, &br);
-
-    irq_disable();
+    u32 eax=0, flags;
+    call16_simpint(0x1c, &eax, &flags);
 
     eoi_pic1();
 }
@@ -541,10 +537,8 @@
         goto done;
     if (registerC & 0x20) {
         // Handle Alarm Interrupt.
-        struct bregs br;
-        memset(&br, 0, sizeof(br));
-        call16_int(0x4a, &br);
-        irq_disable();
+        u32 eax=0, flags;
+        call16_simpint(0x4a, &eax, &flags);
     }
     if (!(registerC & 0x40))
         goto done;
diff --git a/src/kbd.c b/src/kbd.c
index 9c59005..3af7033 100644
--- a/src/kbd.c
+++ b/src/kbd.c
@@ -418,7 +418,10 @@
     { 0x8600, 0x8800, 0x8a00, 0x8c00, none }, /* F12 */
 };
 
-static void
+// Handle a scancode read from the ps2 port.  Note that "noinline" is
+// used to make sure the call to call16_simpint in handle_09 doesn't
+// have the overhead of this function's stack.
+static void noinline
 process_key(u8 scancode)
 {
     u8 shift_flags = GET_BDA(kbd_flag0);
@@ -620,23 +623,17 @@
     }
     u8 key = inb(PORT_PS2_DATA);
 
-    irq_enable();
     if (CONFIG_KBD_CALL_INT15_4F) {
         // allow for keyboard intercept
-        struct bregs tr;
-        memset(&tr, 0, sizeof(tr));
-        tr.al = key;
-        tr.ah = 0x4f;
-        tr.flags = F_CF;
-        call16_int(0x15, &tr);
-        if (!(tr.flags & F_CF))
+        u32 eax = (0x4f << 8) | key;
+        u32 flags;
+        call16_simpint(0x15, &eax, &flags);
+        if (!(flags & F_CF))
             goto done;
-        key = tr.al;
+        key = eax;
     }
     process_key(key);
 
-    irq_disable();
-
 done:
     eoi_pic1();
 }
diff --git a/src/mouse.c b/src/mouse.c
index 6c74fc1..65baa20 100644
--- a/src/mouse.c
+++ b/src/mouse.c
@@ -319,6 +319,8 @@
     SET_EBDA2(ebda_seg, mouse_flag1, 0);
 
     u32 func = GET_EBDA2(ebda_seg, far_call_pointer);
+
+    irq_enable();
     asm volatile(
         "pushl %0\n"
         "pushw %w1\n"  // status
@@ -328,10 +330,11 @@
         "lcallw *8(%%esp)\n"
         "addl $12, %%esp\n"
         "cld\n"
-        : "+a" (func), "+b" (status), "+c" (X), "+d" (Y)
         :
-        : "esi", "edi", "ebp", "cc"
+        : "r"(func), "r"(status), "r"(X), "r"(Y)
+        : "cc"
         );
+    irq_disable();
 }
 
 // INT74h : PS/2 mouse hardware interrupt
@@ -342,9 +345,7 @@
     if (! CONFIG_PS2_MOUSE)
         goto done;
 
-    irq_enable();
     int74_function();
-    irq_disable();
 
 done:
     eoi_pic2();
diff --git a/src/system.c b/src/system.c
index 235e88c..47d5817 100644
--- a/src/system.c
+++ b/src/system.c
@@ -399,7 +399,6 @@
     // clear interrupt
     eoi_pic2();
     // legacy nmi call
-    struct bregs br;
-    memset(&br, 0, sizeof(br));
-    call16_int(0x02, &br);
+    u32 eax=0, flags;
+    call16_simpint(0x02, &eax, &flags);
 }
diff --git a/src/util.c b/src/util.c
index b94f0d1..abc1fb2 100644
--- a/src/util.c
+++ b/src/util.c
@@ -48,6 +48,25 @@
     call16(callregs);
 }
 
+inline void
+call16_simpint(int nr, u32 *eax, u32 *flags)
+{
+    extern void __force_link_error__call16_simpint_only_in_16bit_mode();
+    if (!MODE16)
+        __force_link_error__call16_simpint_only_in_16bit_mode();
+
+    asm volatile(
+        "stc\n"
+        "int %2\n"
+        "pushfl\n"
+        "popl %1\n"
+        "cld\n"
+        "cli\n"
+        : "+a"(*eax), "=r"(*flags)
+        : "i"(nr)
+        : "cc", "memory");
+}
+
 // Switch to the extra stack in ebda and call a function.
 inline u32
 stack_hop(u32 eax, u32 edx, u32 ecx, void *func)
diff --git a/src/util.h b/src/util.h
index 5db1ba0..acd5d0c 100644
--- a/src/util.h
+++ b/src/util.h
@@ -65,11 +65,13 @@
     return val;
 }
 
+// util.c
+inline u32 stack_hop(u32 eax, u32 edx, u32 ecx, void *func);
+u8 checksum(u8 *far_data, u32 len);
 void *memset(void *s, int c, size_t n);
 void *memcpy(void *d1, const void *s1, size_t len);
 void *memcpy_far(void *far_d1, const void *far_s1, size_t len);
 void *memmove(void *d, const void *s, size_t len);
-
 struct bregs;
 inline void call16(struct bregs *callregs);
 inline void call16big(struct bregs *callregs);
@@ -78,6 +80,7 @@
         extern void irq_trampoline_ ##nr ();                    \
         __call16_int((callregs), (u32)&irq_trampoline_ ##nr );  \
     } while (0)
+inline void call16_simpint(int nr, u32 *eax, u32 *flags);
 
 // output.c
 void debug_serial_setup();
@@ -137,10 +140,6 @@
 // pcibios.c
 void handle_1ab1(struct bregs *regs);
 
-// util.c
-inline u32 stack_hop(u32 eax, u32 edx, u32 ecx, void *func);
-u8 checksum(u8 *far_data, u32 len);
-
 // shadow.c
 void make_bios_writable();
 void make_bios_readonly();