Some USB UHCI and OHCI fixes and cleanups.

Don't send a data packet on OHCI if no data to be sent.
Add some barrier() calls where needed.
Move toggle definition from generic pipe struct to uhci pipe struct.
Check for malloc failure on ohci and uhci "tds" request.
Be sure to always allocate an even number of intr tds on uhci - toggle
    setting depends on it.
diff --git a/src/usb-ohci.c b/src/usb-ohci.c
index 0048e2b..5fb7fec 100644
--- a/src/usb-ohci.c
+++ b/src/usb-ohci.c
@@ -352,8 +352,8 @@
         return NULL;
     }
     memset(pipe, 0, sizeof(*pipe));
-    pipe->ed.hwINFO = ED_SKIP;
     memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
+    pipe->ed.hwINFO = ED_SKIP;
 
     // Add queue head to controller list.
     pipe->ed.hwNextED = cntl->regs->ed_controlhead;
@@ -383,24 +383,34 @@
 
     // Setup transfer descriptors
     struct ohci_td *tds = malloc_tmphigh(sizeof(*tds) * 3);
-    tds[0].hwINFO = TD_DP_SETUP | TD_T_DATA0 | TD_CC;
-    tds[0].hwCBP = (u32)cmd;
-    tds[0].hwNextTD = (u32)&tds[1];
-    tds[0].hwBE = (u32)cmd + cmdsize - 1;
-    tds[1].hwINFO = (dir ? TD_DP_IN : TD_DP_OUT) | TD_T_DATA1 | TD_CC;
-    tds[1].hwCBP = datasize ? (u32)data : 0;
-    tds[1].hwNextTD = (u32)&tds[2];
-    tds[1].hwBE = (u32)data + datasize - 1;
-    tds[2].hwINFO = (dir ? TD_DP_OUT : TD_DP_IN) | TD_T_DATA1 | TD_CC;
-    tds[2].hwCBP = 0;
-    tds[2].hwNextTD = (u32)&tds[3];
-    tds[2].hwBE = 0;
+    if (!tds) {
+        warn_noalloc();
+        return -1;
+    }
+    struct ohci_td *td = tds;
+    td->hwINFO = TD_DP_SETUP | TD_T_DATA0 | TD_CC;
+    td->hwCBP = (u32)cmd;
+    td->hwNextTD = (u32)&td[1];
+    td->hwBE = (u32)cmd + cmdsize - 1;
+    td++;
+    if (datasize) {
+        td->hwINFO = (dir ? TD_DP_IN : TD_DP_OUT) | TD_T_DATA1 | TD_CC;
+        td->hwCBP = (u32)data;
+        td->hwNextTD = (u32)&td[1];
+        td->hwBE = (u32)data + datasize - 1;
+        td++;
+    }
+    td->hwINFO = (dir ? TD_DP_OUT : TD_DP_IN) | TD_T_DATA1 | TD_CC;
+    td->hwCBP = 0;
+    td->hwNextTD = (u32)&td[1];
+    td->hwBE = 0;
+    td++;
 
     // Transfer data
     pipe->ed.hwINFO = ED_SKIP;
     barrier();
-    pipe->ed.hwHeadP = (u32)&tds[0];
-    pipe->ed.hwTailP = (u32)&tds[3];
+    pipe->ed.hwHeadP = (u32)tds;
+    pipe->ed.hwTailP = (u32)td;
     barrier();
     pipe->ed.hwINFO = devaddr | (maxpacket << 16) | (lowspeed ? ED_LOWSPEED : 0);
     writel(&cntl->regs->cmdstatus, OHCI_CLF);
@@ -435,6 +445,11 @@
     void *data = malloc_low(maxpacket * count);
     if (!pipe || !tds || !data)
         goto err;
+    memset(pipe, 0, sizeof(*pipe));
+    memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
+    pipe->data = data;
+    pipe->count = count;
+    pipe->tds = tds;
 
     struct ohci_ed *ed = &pipe->ed;
     ed->hwHeadP = (u32)&tds[0];
@@ -464,10 +479,6 @@
             hcca->int_table[i] = (u32)ed;
     }
 
-    pipe->data = data;
-    pipe->count = count;
-    pipe->tds = tds;
-    memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
     return &pipe->pipe;
 
 err:
@@ -510,7 +521,7 @@
     SET_FLATPTR(tail->hwCBP, (u32)intrdata);
     SET_FLATPTR(tail->hwNextTD, (u32)next);
     SET_FLATPTR(tail->hwBE, (u32)intrdata + maxpacket - 1);
-
+    barrier();
     SET_FLATPTR(pipe->ed.hwTailP, (u32)next);
 
     return 0;
diff --git a/src/usb-uhci.c b/src/usb-uhci.c
index 3384504..c26b95b 100644
--- a/src/usb-uhci.c
+++ b/src/usb-uhci.c
@@ -12,7 +12,6 @@
 #include "pci_regs.h" // PCI_BASE_ADDRESS_4
 #include "usb.h" // struct usb_s
 #include "farptr.h" // GET_FLATPTR
-#include "biosvar.h" // GET_GLOBAL
 #include "usb-hub.h" // struct usbhub_s
 
 struct usb_uhci_s {
@@ -181,6 +180,7 @@
     free(fl);
     free(intr_qh);
     free(term_qh);
+    free(cntl);
 }
 
 void
@@ -255,6 +255,7 @@
     struct uhci_td *next_td;
     struct usb_pipe pipe;
     u16 iobase;
+    u8 toggle;
 };
 
 void
@@ -306,9 +307,9 @@
         return NULL;
     }
     memset(pipe, 0, sizeof(*pipe));
+    memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
     pipe->qh.element = UHCI_PTR_TERM;
     pipe->iobase = cntl->iobase;
-    memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
 
     // Add queue head to controller list.
     struct uhci_qh *control_qh = cntl->control_qh;
@@ -339,6 +340,10 @@
     // Setup transfer descriptors
     int count = 2 + DIV_ROUND_UP(datasize, maxpacket);
     struct uhci_td *tds = malloc_tmphigh(sizeof(*tds) * count);
+    if (!tds) {
+        warn_noalloc();
+        return -1;
+    }
 
     tds[0].link = (u32)&tds[1] | UHCI_PTR_DEPTH;
     tds[0].status = (uhci_maxerr(3) | (lowspeed ? TD_CTRL_LS : 0)
@@ -395,9 +400,9 @@
         return NULL;
     }
     memset(pipe, 0, sizeof(*pipe));
+    memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
     pipe->qh.element = UHCI_PTR_TERM;
     pipe->iobase = cntl->iobase;
-    memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
 
     // Add queue head to controller list.
     struct uhci_qh *bulk_qh = cntl->bulk_qh;
@@ -436,6 +441,8 @@
 int
 uhci_send_bulk(struct usb_pipe *p, int dir, void *data, int datasize)
 {
+    if (! CONFIG_USB_UHCI)
+        return -1;
     struct uhci_pipe *pipe = container_of(p, struct uhci_pipe, pipe);
     dprintf(7, "uhci_send_bulk qh=%p dir=%d data=%p size=%d\n"
             , &pipe->qh, dir, data, datasize);
@@ -443,7 +450,7 @@
     int lowspeed = GET_FLATPTR(pipe->pipe.lowspeed);
     int devaddr = (GET_FLATPTR(pipe->pipe.devaddr)
                    | (GET_FLATPTR(pipe->pipe.ep) << 7));
-    int toggle = GET_FLATPTR(pipe->pipe.toggle) ? TD_TOKEN_TOGGLE : 0;
+    int toggle = GET_FLATPTR(pipe->toggle) ? TD_TOKEN_TOGGLE : 0;
 
     // Allocate 4 tds on stack (16byte aligned)
     u8 tdsbuf[sizeof(struct uhci_td) * STACKTDS + TDALIGN - 1];
@@ -451,6 +458,7 @@
     memset(tds, 0, sizeof(*tds) * STACKTDS);
 
     // Enable tds
+    barrier();
     SET_FLATPTR(pipe->qh.element, (u32)MAKE_FLATPTR(GET_SEG(SS), tds));
 
     int tdpos = 0;
@@ -486,7 +494,7 @@
             goto fail;
     }
 
-    SET_FLATPTR(pipe->pipe.toggle, !!toggle);
+    SET_FLATPTR(pipe->toggle, !!toggle);
     return 0;
 fail:
     dprintf(1, "uhci_send_bulk failed\n");
@@ -512,6 +520,7 @@
     // Determine number of entries needed for 2 timer ticks.
     int ms = 1<<frameexp;
     int count = DIV_ROUND_UP(PIT_TICK_INTERVAL * 1000 * 2, PIT_TICK_RATE * ms);
+    count = ALIGN(count, 2);
     struct uhci_pipe *pipe = malloc_low(sizeof(*pipe));
     struct uhci_td *tds = malloc_low(sizeof(*tds) * count);
     void *data = malloc_low(maxpacket * count);
@@ -520,10 +529,10 @@
         goto fail;
     }
     memset(pipe, 0, sizeof(*pipe));
+    memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
     pipe->qh.element = (u32)tds;
     pipe->next_td = &tds[0];
     pipe->iobase = cntl->iobase;
-    memcpy(&pipe->pipe, dummy, sizeof(pipe->pipe));
 
     int toggle = 0;
     int i;
diff --git a/src/usb.h b/src/usb.h
index e1ca45a..10f824f 100644
--- a/src/usb.h
+++ b/src/usb.h
@@ -11,7 +11,6 @@
     u8 devaddr;
     u8 lowspeed;
     u16 maxpacket;
-    u8 toggle;
 };
 
 // Common information for usb controllers.