Replace irq_enable() regions with explicit calls to check for irqs.

Add new function yield() which will permit irqs to trigger.
The yield() call enables irqs to occur in 32bit mode.
Add [num]sleep calls that yield instead of just spinning.
Rename existing int 1586 usleep call to biosusleep.
Convert many calls to mdelay to msleep.
diff --git a/src/ata.c b/src/ata.c
index 050269e..2401557 100644
--- a/src/ata.c
+++ b/src/ata.c
@@ -40,6 +40,7 @@
             dprintf(1, "IDE time out\n");
             return -1;
         }
+        yield();
     }
 }
 
@@ -90,7 +91,7 @@
     outb(ATA_CB_DC_HD15 | ATA_CB_DC_NIEN | ATA_CB_DC_SRST, iobase2+ATA_CB_DC);
     udelay(5);
     outb(ATA_CB_DC_HD15 | ATA_CB_DC_NIEN, iobase2+ATA_CB_DC);
-    mdelay(2);
+    msleep(2);
 
     // wait for device to become not busy.
     int status = await_not_bsy(iobase1);
@@ -653,6 +654,7 @@
             dprintf(1, "powerup IDE time out\n");
             return -1;
         }
+        yield();
     }
     dprintf(6, "powerup iobase=%x st=%x\n", base, status);
     return status;
diff --git a/src/block.c b/src/block.c
index ce6c807..a62536c 100644
--- a/src/block.c
+++ b/src/block.c
@@ -321,12 +321,8 @@
             , dop.drive_g, (u32)dop.lba, dop.buf_fl
             , dop.count, dop.command);
 
-    irq_enable();
-
     int status = process_op(&dop);
 
-    irq_disable();
-
     // Update count with total sectors transferred.
     SET_FARVAR(op_seg, op_far->count, dop.count);
 
diff --git a/src/boot.c b/src/boot.c
index e4cdbbc..668ddb0 100644
--- a/src/boot.c
+++ b/src/boot.c
@@ -5,7 +5,7 @@
 //
 // This file may be distributed under the terms of the GNU LGPLv3 license.
 
-#include "util.h" // irq_enable
+#include "util.h" // dprintf
 #include "biosvar.h" // GET_EBDA
 #include "config.h" // CONFIG_*
 #include "disk.h" // cdrom_boot
@@ -431,7 +431,7 @@
         printf("No bootable device.\n");
         // Loop with irqs enabled - this allows ctrl+alt+delete to work.
         for (;;)
-            usleep(1000000);
+            biosusleep(1000000);
     }
 
     /* Do the loading, and set up vector as a far pointer to the boot
diff --git a/src/clock.c b/src/clock.c
index 7735c70..7077631 100644
--- a/src/clock.c
+++ b/src/clock.c
@@ -101,23 +101,33 @@
         cpu_relax();
 }
 
-void
-ndelay(u32 count)
+static void
+tscsleep(u64 diff)
 {
-    u32 khz = GET_GLOBAL(cpu_khz);
-    tscdelay(count * khz / 1000000);
+    u64 start = rdtscll();
+    u64 end = start + diff;
+    while (!check_time(end))
+        yield();
 }
-void
-udelay(u32 count)
-{
-    u32 khz = GET_GLOBAL(cpu_khz);
-    tscdelay(count * khz / 1000);
+
+void ndelay(u32 count) {
+    tscdelay(count * GET_GLOBAL(cpu_khz) / 1000000);
 }
-void
-mdelay(u32 count)
-{
-    u32 khz = GET_GLOBAL(cpu_khz);
-    tscdelay(count * khz);
+void udelay(u32 count) {
+    tscdelay(count * GET_GLOBAL(cpu_khz) / 1000);
+}
+void mdelay(u32 count) {
+    tscdelay(count * GET_GLOBAL(cpu_khz));
+}
+
+void nsleep(u32 count) {
+    tscsleep(count * GET_GLOBAL(cpu_khz) / 1000000);
+}
+void usleep(u32 count) {
+    tscsleep(count * GET_GLOBAL(cpu_khz) / 1000);
+}
+void msleep(u32 count) {
+    tscsleep(count * GET_GLOBAL(cpu_khz));
 }
 
 // Return the TSC value that is 'msecs' time in the future.
diff --git a/src/floppy.c b/src/floppy.c
index 311fc4b..a8e2ac9 100644
--- a/src/floppy.c
+++ b/src/floppy.c
@@ -9,7 +9,7 @@
 #include "disk.h" // DISK_RET_SUCCESS
 #include "config.h" // CONFIG_FLOPPY
 #include "biosvar.h" // SET_BDA
-#include "util.h" // irq_disable
+#include "util.h" // wait_irq
 #include "cmos.h" // inb_cmos
 #include "pic.h" // eoi_pic1
 #include "bregs.h" // struct bregs
@@ -174,12 +174,11 @@
 static int
 wait_floppy_irq()
 {
+    ASSERT16();
     u8 v;
     for (;;) {
-        if (!GET_BDA(floppy_motor_counter)) {
-            irq_disable();
+        if (!GET_BDA(floppy_motor_counter))
             return -1;
-        }
         v = GET_BDA(floppy_recalibration_status);
         if (v & FRS_TIMEOUT)
             break;
diff --git a/src/kbd.c b/src/kbd.c
index 6b14940..29eb29a 100644
--- a/src/kbd.c
+++ b/src/kbd.c
@@ -234,9 +234,7 @@
 handle_160a(struct bregs *regs)
 {
     u8 param[2];
-    irq_enable();
     int ret = kbd_command(ATKBD_CMD_GETID, param);
-    irq_disable();
     if (ret) {
         regs->bx = 0;
         return;
@@ -306,9 +304,7 @@
     if (shift_flags == led_flags)
         return;
 
-    irq_enable();
     int ret = kbd_command(ATKBD_CMD_SETLEDS, &shift_flags);
-    irq_disable();
     if (ret)
         // Error
         return;
diff --git a/src/mouse.c b/src/mouse.c
index e7ec0c1..5a1b81f 100644
--- a/src/mouse.c
+++ b/src/mouse.c
@@ -269,8 +269,6 @@
         return;
     }
 
-    irq_enable();
-
     switch (regs->al) {
     case 0x00: mouse_15c200(regs); break;
     case 0x01: mouse_15c201(regs); break;
diff --git a/src/ps2port.c b/src/ps2port.c
index 25d4544..87aa02e 100644
--- a/src/ps2port.c
+++ b/src/ps2port.c
@@ -54,21 +54,16 @@
 i8042_flush(void)
 {
     dprintf(7, "i8042_flush\n");
-    unsigned long flags = irq_save();
-
     int i;
     for (i=0; i<I8042_BUFFER_SIZE; i++) {
         u8 status = inb(PORT_PS2_STATUS);
-        if (! (status & I8042_STR_OBF)) {
-            irq_restore(flags);
+        if (! (status & I8042_STR_OBF))
             return 0;
-        }
         udelay(50);
         u8 data = inb(PORT_PS2_DATA);
         dprintf(7, "i8042 flushed %x (status=%x)\n", data, status);
     }
 
-    irq_restore(flags);
     dprintf(1, "i8042 timeout on flush\n");
     return -1;
 }
@@ -110,9 +105,7 @@
 i8042_command(int command, u8 *param)
 {
     dprintf(7, "i8042_command cmd=%x\n", command);
-    unsigned long flags = irq_save();
     int ret = __i8042_command(command, param);
-    irq_restore(flags);
     if (ret)
         dprintf(2, "i8042 command %x failed\n", command);
     return ret;
@@ -122,14 +115,9 @@
 i8042_kbd_write(u8 c)
 {
     dprintf(7, "i8042_kbd_write c=%d\n", c);
-    unsigned long flags = irq_save();
-
     int ret = i8042_wait_write();
     if (! ret)
         outb(c, PORT_PS2_DATA);
-
-    irq_restore(flags);
-
     return ret;
 }
 
@@ -152,30 +140,31 @@
 {
     u64 end = calc_future_tsc(timeout);
     for (;;) {
+        u8 status = inb(PORT_PS2_STATUS);
+        if (status & I8042_STR_OBF) {
+            u8 data = inb(PORT_PS2_DATA);
+            dprintf(7, "ps2 read %x\n", data);
+
+            if (!!(status & I8042_STR_AUXDATA) == aux) {
+                if (!needack)
+                    return data;
+                if (data == PS2_RET_ACK)
+                    return data;
+                if (data == PS2_RET_NAK) {
+                    dprintf(1, "Got ps2 nak (status=%x); continuing\n", status);
+                    return data;
+                }
+            }
+
+            // This data not for us - XXX - just discard it for now.
+            dprintf(1, "Discarding ps2 data %x (status=%x)\n", data, status);
+        }
+
         if (check_time(end)) {
             dprintf(1, "ps2_recvbyte timeout\n");
             return -1;
         }
-
-        u8 status = inb(PORT_PS2_STATUS);
-        if (! (status & I8042_STR_OBF))
-            continue;
-        u8 data = inb(PORT_PS2_DATA);
-        dprintf(7, "ps2 read %x\n", data);
-
-        if (!!(status & I8042_STR_AUXDATA) == aux) {
-            if (!needack)
-                return data;
-            if (data == PS2_RET_ACK)
-                return data;
-            if (data == PS2_RET_NAK) {
-                dprintf(1, "Got ps2 nak (status=%x); continuing\n", status);
-                return data;
-            }
-        }
-
-        // This data not for us - XXX - just discard it for now.
-        dprintf(1, "Discarding ps2 data %x (status=%x)\n", data, status);
+        yield();
     }
 }
 
diff --git a/src/serial.c b/src/serial.c
index a24f83f..19e39ca 100644
--- a/src/serial.c
+++ b/src/serial.c
@@ -118,7 +118,6 @@
     if (!addr)
         return;
     struct tick_timer_s tt = initTickTimer(GET_BDA(com_timeout[regs->dx]));
-    irq_enable();
     for (;;) {
         u8 lsr = inb(addr+SEROFF_LSR);
         if ((lsr & 0x60) == 0x60) {
@@ -133,8 +132,8 @@
             regs->ah = lsr | 0x80;
             break;
         }
+        yield();
     }
-    irq_disable();
     set_success(regs);
 }
 
@@ -146,7 +145,6 @@
     if (!addr)
         return;
     struct tick_timer_s tt = initTickTimer(GET_BDA(com_timeout[regs->dx]));
-    irq_enable();
     for (;;) {
         u8 lsr = inb(addr+SEROFF_LSR);
         if (lsr & 0x01) {
@@ -160,8 +158,8 @@
             regs->ah = lsr | 0x80;
             break;
         }
+        yield();
     }
-    irq_disable();
     set_success(regs);
 }
 
@@ -265,7 +263,6 @@
         return;
 
     struct tick_timer_s tt = initTickTimer(GET_BDA(lpt_timeout[regs->dx]));
-    irq_enable();
 
     outb(regs->al, addr);
     u8 val8 = inb(addr+2);
@@ -285,9 +282,9 @@
             regs->ah = (v ^ 0x48) | 0x01;
             break;
         }
+        yield();
     }
 
-    irq_disable();
     set_success(regs);
 }
 
diff --git a/src/smp.c b/src/smp.c
index ffeb5ae..a912857 100644
--- a/src/smp.c
+++ b/src/smp.c
@@ -98,7 +98,7 @@
 
     // Wait for other CPUs to process the SIPI.
     if (CONFIG_COREBOOT)
-        mdelay(10);
+        msleep(10);
     else
         while (inb_cmos(CMOS_BIOS_SMP_COUNT) + 1 != readl(&CountCPUs))
             ;
diff --git a/src/system.c b/src/system.c
index a9f271e..e8ed5a7 100644
--- a/src/system.c
+++ b/src/system.c
@@ -5,7 +5,7 @@
 //
 // This file may be distributed under the terms of the GNU LGPLv3 license.
 
-#include "util.h" // irq_restore
+#include "util.h" // memcpy_far
 #include "biosvar.h" // BIOS_CONFIG_TABLE
 #include "ioport.h" // inb
 #include "memmap.h" // E820_RAM
diff --git a/src/usb-ohci.c b/src/usb-ohci.c
index 71202f8..9e07c89 100644
--- a/src/usb-ohci.c
+++ b/src/usb-ohci.c
@@ -25,7 +25,7 @@
     // Do reset
     writel(&cntl->ohci.regs->control, OHCI_USB_RESET | oldrwc);
     readl(&cntl->ohci.regs->control); // flush writes
-    mdelay(50);
+    msleep(50);
 
     // Do software init (min 10us, max 2ms)
     u64 end = calc_future_tsc_usec(10);
@@ -81,7 +81,7 @@
     rha &= ~(RH_A_PSM | RH_A_OCPM);
     writel(&cntl->ohci.regs->roothub_status, RH_HS_LPSC);
     writel(&cntl->ohci.regs->roothub_b, RH_B_PPCM);
-    mdelay((rha >> 24) * 2);
+    msleep((rha >> 24) * 2);
 
     // Count and reset connected devices
     int ports = rha & RH_A_NDP;
@@ -96,7 +96,7 @@
         // No devices connected
         goto shutdown;
 
-    mdelay(60);    // XXX - should poll instead of using timer.
+    msleep(60);    // XXX - should poll instead of using timer.
 
     totalcount = 0;
     for (i=0; i<ports; i++) {
@@ -229,7 +229,7 @@
 
     int ret = wait_ed(ed);
     ed->hwINFO = ED_SKIP;
-    udelay(1); // XXX - in case controller still accessing tds
+    usleep(1); // XXX - in case controller still accessing tds
     free(tds);
     return ret;
 }
diff --git a/src/usb-uhci.c b/src/usb-uhci.c
index 14a5300..8045375 100644
--- a/src/usb-uhci.c
+++ b/src/usb-uhci.c
@@ -97,10 +97,10 @@
         outw(USBPORTSC_PR, cntl->uhci.iobase + USBPORTSC1);
     if (port2 & USBPORTSC_CCS)
         outw(USBPORTSC_PR, cntl->uhci.iobase + USBPORTSC2);
-    mdelay(50);
+    msleep(50);
     outw(0, cntl->uhci.iobase + USBPORTSC1);
     outw(0, cntl->uhci.iobase + USBPORTSC2);
-    mdelay(10);
+    msleep(10);
 
     // Configure ports
     int totalcount = 0;
diff --git a/src/usb.c b/src/usb.c
index ecb8683..cb75e78 100644
--- a/src/usb.c
+++ b/src/usb.c
@@ -121,7 +121,7 @@
     int ret = send_default_control(endp, &req, NULL);
     if (ret)
         return 0;
-    mdelay(2);
+    msleep(2);
 
     cntl->maxaddr++;
     return mkendp(cntl, cntl->maxaddr, 0, endp2speed(endp), endp2maxsize(endp));
diff --git a/src/util.c b/src/util.c
index c09b851..4086948 100644
--- a/src/util.c
+++ b/src/util.c
@@ -9,6 +9,11 @@
 #include "farptr.h" // GET_FLATPTR
 #include "biosvar.h" // get_ebda_seg
 
+
+/****************************************************************
+ * 16bit calls
+ ****************************************************************/
+
 // Call a function with a specified register state.  Note that on
 // return, the interrupt enable/disable flag may be altered.
 inline void
@@ -76,6 +81,41 @@
     return eax;
 }
 
+// 16bit trampoline for enabling irqs from 32bit mode.
+ASM16(
+    "  .global trampoline_yield\n"
+    "trampoline_yield:\n"
+    "  rep ; nop\n"
+    "  lretw"
+    );
+
+// Briefly permit irqs to occur.
+void
+yield()
+{
+    if (MODE16) {
+        asm volatile(
+            "sti\n"
+            "nop\n"
+            "rep ; nop\n"
+            "cli\n"
+            "cld\n"
+            : : :"memory");
+        return;
+    }
+    extern void trampoline_yield();
+    struct bregs br;
+    br.flags = F_IF;
+    br.code.seg = SEG_BIOS;
+    br.code.offset = (u32)&trampoline_yield;
+    call16big(&br);
+}
+
+
+/****************************************************************
+ * String ops
+ ****************************************************************/
+
 // Sum the bytes in the specified area.
 u8
 checksum_far(u16 buf_seg, void *buf_far, u32 len)
@@ -233,9 +273,14 @@
     return dest;
 }
 
-// Wait for 'usec' microseconds with irqs enabled.
+
+/****************************************************************
+ * Keyboard calls
+ ****************************************************************/
+
+// Wait for 'usec' microseconds using (with irqs enabled) using int 1586.
 void
-usleep(u32 usec)
+biosusleep(u32 usec)
 {
     struct bregs br;
     memset(&br, 0, sizeof(br));
@@ -278,7 +323,7 @@
             return get_raw_keystroke();
         if (msec <= 0)
             return -1;
-        usleep(50*1000);
+        biosusleep(50*1000);
         msec -= 50;
     }
 }
diff --git a/src/util.h b/src/util.h
index f95cdb5..01f46c0 100644
--- a/src/util.h
+++ b/src/util.h
@@ -163,7 +163,8 @@
         extern void irq_trampoline_ ##nr ();                    \
         __call16_int((callregs), (u32)&irq_trampoline_ ##nr );  \
     } while (0)
-void usleep(u32 usec);
+void yield();
+void biosusleep(u32 usec);
 int get_keystroke(int msec);
 
 // output.c
@@ -221,6 +222,9 @@
 void ndelay(u32 count);
 void udelay(u32 count);
 void mdelay(u32 count);
+void nsleep(u32 count);
+void usleep(u32 count);
+void msleep(u32 count);
 u64 calc_future_tsc(u32 msecs);
 u64 calc_future_tsc_usec(u32 usecs);
 void handle_1583(struct bregs *regs);