vgabios: Some tweaks to optimize stack space.

Gcc seems to handle passing structs by value if they are 4 bytes in
size instead of 3 bytes.  So, add a pad byte to struct carattr and
struct cursorpos.

Reorganize set_cursor_pos(), verify_scroll(), and handle_1013() so
there are less live variables.

Don't inline the VBE functions into the main handler code.  Code
calling VBE functions are newer and are more likely to provide
adequate stack space - inlining can cause more stack usage for older
functions (which may be stack constrained).

Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
diff --git a/tools/checkstack.py b/tools/checkstack.py
index 428c296..717de2d 100755
--- a/tools/checkstack.py
+++ b/tools/checkstack.py
@@ -149,7 +149,7 @@
                 stackusage += int(im.group('num'), 16)
 
             if atstart:
-                if insn == 'movl   %esp,%ebp':
+                if '%esp' in insn or insn.startswith('leal'):
                     # Still part of initial header
                     continue
                 cur[1] = stackusage
diff --git a/vgasrc/vbe.c b/vgasrc/vbe.c
index 852edbc..227a244 100644
--- a/vgasrc/vbe.c
+++ b/vgasrc/vbe.c
@@ -390,7 +390,7 @@
     regs->ax = 0x0100;
 }
 
-void
+void noinline
 handle_104f(struct bregs *regs)
 {
     if (!CONFIG_VGA_VBE) {
diff --git a/vgasrc/vgabios.c b/vgasrc/vgabios.c
index bca2e9a..58e467d 100644
--- a/vgasrc/vgabios.c
+++ b/vgasrc/vgabios.c
@@ -115,12 +115,14 @@
 static void
 set_cursor_pos(struct cursorpos cp)
 {
+    u8 page = cp.page, x = cp.x, y = cp.y;
+
     // Should not happen...
-    if (cp.page > 7)
+    if (page > 7)
         return;
 
     // Bios cursor pos
-    SET_BDA(cursor_pos[cp.page], (cp.y << 8) | cp.x);
+    SET_BDA(cursor_pos[page], (y << 8) | x);
 
     // Set the hardware cursor
     u8 current = GET_BDA(video_page);
@@ -128,8 +130,8 @@
         return;
 
     // Calculate the memory address
-    int address = (GET_BDA(video_pagesize) * cp.page
-                   + (cp.x + cp.y * GET_BDA(video_cols)) * 2);
+    int address = (GET_BDA(video_pagesize) * page
+                   + (x + y * GET_BDA(video_cols)) * 2);
     stdvga_set_cursor_pos(address);
 }
 
@@ -234,9 +236,9 @@
         cp.y++;
         break;
     case '\t':
+        ca.car = ' ';
         do {
-            struct carattr dummyca = {' ', ca.attr, ca.use_attr};
-            vgafb_write_char(cp, dummyca);
+            vgafb_write_char(cp, ca);
             cp.x++;
         } while (cp.x < nbcols && cp.x % 8);
         break;
@@ -261,34 +263,6 @@
     scroll_one(nbrows, nbcols, cp.page);
 }
 
-// Write out a buffer of alternating characters and attributes.
-static void
-write_attr_string(struct cursorpos *pcp, u16 count, u16 seg, u8 *offset_far)
-{
-    while (count--) {
-        u8 car = GET_FARVAR(seg, *offset_far);
-        offset_far++;
-        u8 attr = GET_FARVAR(seg, *offset_far);
-        offset_far++;
-
-        struct carattr ca = {car, attr, 1};
-        write_teletype(pcp, ca);
-    }
-}
-
-// Write out a buffer of characters.
-static void
-write_string(struct cursorpos *pcp, u8 attr, u16 count, u16 seg, u8 *offset_far)
-{
-    while (count--) {
-        u8 car = GET_FARVAR(seg, *offset_far);
-        offset_far++;
-
-        struct carattr ca = {car, attr, 1};
-        write_teletype(pcp, ca);
-    }
-}
-
 
 /****************************************************************
  * Save and restore bda state
@@ -492,24 +466,24 @@
 static void
 verify_scroll(struct bregs *regs, int dir)
 {
-    u8 page = GET_BDA(video_page);
-    struct cursorpos ul = {regs->cl, regs->ch, page};
-    struct cursorpos lr = {regs->dl, regs->dh, page};
-
+    u8 ulx = regs->cl, uly = regs->ch, lrx = regs->dl, lry = regs->dh;
     u16 nbrows = GET_BDA(video_rows) + 1;
-    if (lr.y >= nbrows)
-        lr.y = nbrows - 1;
+    if (lry >= nbrows)
+        lry = nbrows - 1;
     u16 nbcols = GET_BDA(video_cols);
-    if (lr.x >= nbcols)
-        lr.x = nbcols - 1;
+    if (lrx >= nbcols)
+        lrx = nbcols - 1;
 
-    if (ul.x > lr.x || ul.y > lr.y)
+    if (ulx > lrx || uly > lry)
         return;
 
-    u16 nblines = regs->al;
-    if (!nblines || nblines > lr.y - ul.y + 1)
-        nblines = lr.y - ul.y + 1;
+    int nblines = regs->al;
+    if (!nblines || nblines > lry - uly + 1)
+        nblines = lry - uly + 1;
 
+    u8 page = GET_BDA(video_page);
+    struct cursorpos ul = {ulx, uly, page};
+    struct cursorpos lr = {lrx, lry, page};
     vgafb_scroll(dir * nblines, regs->bh, ul, lr);
 }
 
@@ -1014,17 +988,29 @@
 static void noinline
 handle_1013(struct bregs *regs)
 {
-    struct cursorpos cp = {regs->dl, regs->dh, regs->bh};
-    // if row=0xff special case : use current cursor position
-    if (cp.y == 0xff)
-        cp = get_cursor_pos(cp.page);
-    u8 flag = regs->al;
-    if (flag & 2)
-        write_attr_string(&cp, regs->cx, regs->es, (void*)(regs->bp + 0));
+    struct cursorpos cp;
+    if (regs->dh == 0xff)
+        // if row=0xff special case : use current cursor position
+        cp = get_cursor_pos(regs->bh);
     else
-        write_string(&cp, regs->bl, regs->cx, regs->es, (void*)(regs->bp + 0));
+        cp = (struct cursorpos) {regs->dl, regs->dh, regs->bh};
 
-    if (flag & 1)
+    u16 count = regs->cx;
+    u8 *offset_far = (void*)(regs->bp + 0);
+    u8 attr = regs->bl;
+    while (count--) {
+        u8 car = GET_FARVAR(regs->es, *offset_far);
+        offset_far++;
+        if (regs->al & 2) {
+            attr = GET_FARVAR(regs->es, *offset_far);
+            offset_far++;
+        }
+
+        struct carattr ca = {car, attr, 1};
+        write_teletype(&cp, ca);
+    }
+
+    if (regs->al & 1)
         set_cursor_pos(cp);
 }
 
diff --git a/vgasrc/vgabios.h b/vgasrc/vgabios.h
index 13e5cce..ff5ec45 100644
--- a/vgasrc/vgabios.h
+++ b/vgasrc/vgabios.h
@@ -82,10 +82,10 @@
 extern int HaveRunInit;
 #define SET_VGA(var, val) SET_FARVAR(get_global_seg(), (var), (val))
 struct carattr {
-    u8 car, attr, use_attr;
+    u8 car, attr, use_attr, pad;
 };
 struct cursorpos {
-    u8 x, y, page;
+    u8 x, y, page, pad;
 };
 int vga_bpp(struct vgamode_s *vmode_g);
 u16 calc_page_size(u8 memmodel, u16 width, u16 height);