Allow wait_irq to be called in 32bit code.

If wait_irq() is called from 32bit code, then jump to 16bit mode for
the wait.

Have wait_irq check for threads, and have it use yield if threads are
pending.  This ensures threads aren't delayed if anything calls
wait_irq.

Use wait_irq() in 32bit mode during a failed boot.
diff --git a/Makefile b/Makefile
index d0b8881..72d711d 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@
 SRC32FLAT=$(SRCBOTH) post.c shadow.c memmap.c coreboot.c boot.c \
       acpi.c smm.c mptable.c smbios.c pciinit.c optionroms.c mtrr.c \
       lzmadecode.c usb-hub.c paravirt.c
-SRC32SEG=util.c output.c pci.c pcibios.c apm.c
+SRC32SEG=util.c output.c pci.c pcibios.c apm.c stacks.c
 
 cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \
               /dev/null 2>&1`"; then echo "$(2)"; else echo "$(3)"; fi ;)
diff --git a/src/boot.c b/src/boot.c
index 36450f0..335522f 100644
--- a/src/boot.c
+++ b/src/boot.c
@@ -449,7 +449,7 @@
         printf("No bootable device.\n");
         // Loop with irqs enabled - this allows ctrl+alt+delete to work.
         for (;;)
-            biosusleep(1000000);
+            wait_irq();
     }
 
     /* Do the loading, and set up vector as a far pointer to the boot
diff --git a/src/stacks.c b/src/stacks.c
index 859de3f..f5feeeb 100644
--- a/src/stacks.c
+++ b/src/stacks.c
@@ -8,6 +8,20 @@
 #include "util.h" // dprintf
 #include "bregs.h" // CR0_PE
 
+// Thread info - stored at bottom of each thread stack - don't change
+// without also updating the inline assembler below.
+struct thread_info {
+    struct thread_info *next;
+    void *stackpos;
+    struct thread_info **pprev;
+};
+struct thread_info VAR16VISIBLE MainThread;
+
+
+/****************************************************************
+ * Low level helpers
+ ****************************************************************/
+
 static inline u32 getcr0(void) {
     u32 cr0;
     asm("movl %%cr0, %0" : "=r"(cr0));
@@ -77,6 +91,65 @@
     return 0;
 }
 
+// 16bit trampoline for enabling irqs from 32bit mode.
+ASM16(
+    "  .global trampoline_checkirqs\n"
+    "trampoline_checkirqs:\n"
+    "  rep ; nop\n"
+    "  lretw"
+    );
+
+static void
+check_irqs(void)
+{
+    if (MODESEGMENT) {
+        asm volatile(
+            "sti\n"
+            "nop\n"
+            "rep ; nop\n"
+            "cli\n"
+            "cld\n"
+            : : :"memory");
+        return;
+    }
+    extern void trampoline_checkirqs();
+    struct bregs br;
+    br.flags = F_IF;
+    br.code.seg = SEG_BIOS;
+    br.code.offset = (u32)&trampoline_checkirqs;
+    call16big(&br);
+}
+
+// 16bit trampoline for waiting for an irq from 32bit mode.
+ASM16(
+    "  .global trampoline_waitirq\n"
+    "trampoline_waitirq:\n"
+    "  sti\n"
+    "  hlt\n"
+    "  lretw"
+    );
+
+// Wait for next irq to occur.
+void
+wait_irq(void)
+{
+    if (MODESEGMENT) {
+        asm volatile("sti ; hlt ; cli ; cld": : :"memory");
+        return;
+    }
+    if (CONFIG_THREADS && MainThread.next != &MainThread) {
+        // Threads still active - do a yield instead.
+        yield();
+        return;
+    }
+    extern void trampoline_waitirq();
+    struct bregs br;
+    br.flags = 0;
+    br.code.seg = SEG_BIOS;
+    br.code.offset = (u32)&trampoline_waitirq;
+    call16big(&br);
+}
+
 
 /****************************************************************
  * Stack in EBDA
@@ -115,16 +188,6 @@
  ****************************************************************/
 
 #define THREADSTACKSIZE 4096
-
-// Thread info - stored at bottom of each thread stack - don't change
-// without also updating the inline assembler below.
-struct thread_info {
-    struct thread_info *next;
-    void *stackpos;
-    struct thread_info **pprev;
-};
-
-struct thread_info VAR16VISIBLE MainThread;
 int VAR16VISIBLE CanPreempt;
 
 void
diff --git a/src/util.c b/src/util.c
index e146c97..b078d5f 100644
--- a/src/util.c
+++ b/src/util.c
@@ -57,35 +57,6 @@
     call16(callregs);
 }
 
-// 16bit trampoline for enabling irqs from 32bit mode.
-ASM16(
-    "  .global trampoline_checkirqs\n"
-    "trampoline_checkirqs:\n"
-    "  rep ; nop\n"
-    "  lretw"
-    );
-
-void
-check_irqs(void)
-{
-    if (MODE16) {
-        asm volatile(
-            "sti\n"
-            "nop\n"
-            "rep ; nop\n"
-            "cli\n"
-            "cld\n"
-            : : :"memory");
-    } else {
-        extern void trampoline_checkirqs();
-        struct bregs br;
-        br.flags = F_IF;
-        br.code.seg = SEG_BIOS;
-        br.code.offset = (u32)&trampoline_checkirqs;
-        call16big(&br);
-    }
-}
-
 
 /****************************************************************
  * String ops
diff --git a/src/util.h b/src/util.h
index e47860f..a5c5b75 100644
--- a/src/util.h
+++ b/src/util.h
@@ -21,7 +21,7 @@
 static inline unsigned long irq_save(void)
 {
     unsigned long flags;
-    asm volatile("pushfl ; popl %0" : "=g" (flags));
+    asm volatile("pushfl ; popl %0" : "=g" (flags): :"memory");
     irq_disable();
     return flags;
 }
@@ -36,12 +36,6 @@
     asm volatile("rep ; nop": : :"memory");
 }
 
-// Atomically enable irqs and sleep until an irq; then re-disable irqs.
-static inline void wait_irq(void)
-{
-    asm volatile("sti ; hlt ; cli ; cld": : :"memory");
-}
-
 static inline void nop(void)
 {
     asm volatile("nop");
@@ -49,12 +43,12 @@
 
 static inline void hlt(void)
 {
-    asm volatile("hlt");
+    asm volatile("hlt": : :"memory");
 }
 
 static inline void wbinvd(void)
 {
-    asm volatile("wbinvd");
+    asm volatile("wbinvd": : :"memory");
 }
 
 #define CPUID_MSR (1 << 5)
@@ -182,7 +176,6 @@
         extern void irq_trampoline_ ##nr ();                    \
         __call16_int((callregs), (u32)&irq_trampoline_ ##nr );  \
     } while (0)
-void check_irqs(void);
 u8 checksum_far(u16 buf_seg, void *buf_far, u32 len);
 u8 checksum(void *buf, u32 len);
 size_t strlen(const char *s);
@@ -209,6 +202,7 @@
 void thread_setup(void);
 struct thread_info *getCurThread(void);
 void yield(void);
+void wait_irq(void);
 void run_thread(void (*func)(void*), void *data);
 void wait_threads(void);
 struct mutex_s { u32 isLocked; };