Do garbage collection of unused sections.

Implement -ffunction-sections and -fdata-sections in both 32bit and
    16bit code.
Make sure all sections have unique names (even asm and discarded
    sections).
Enhance tools/layoutrom.py script to find all sections reachable from
    exported 16bit code - prune all other sections.
Mark sections with "export" if they can be visible outside of code -
    these sections wont be dropped when pruning unused sections.
diff --git a/Makefile b/Makefile
index bd5c184..ff594e7 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,7 @@
                -mpreferred-stack-boundary=2 -mrtd -freg-struct-return \
                -ffreestanding -fwhole-program -fomit-frame-pointer \
                -fno-delete-null-pointer-checks -Wno-strict-aliasing \
+               -ffunction-sections -fdata-sections \
                -minline-all-stringops
 COMMONCFLAGS += $(call cc-option,$(CC),-nopie,)
 COMMONCFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
@@ -33,7 +34,6 @@
 CFLAGS16INC = $(COMMONCFLAGS) -DMODE16=1 -fno-jump-tables -fno-defer-pop \
               $(call cc-option,$(CC),-fno-tree-switch-conversion,) \
               $(call cc-option,$(CC),--param large-stack-frame=4,)
-CFLAGS16INC += -ffunction-sections -fdata-sections
 CFLAGS16 = $(CFLAGS16INC) -g
 
 all: $(OUT) $(OUT)bios.bin
@@ -107,11 +107,14 @@
 	@echo "  Linking (no relocs) $@"
 	$(Q)$(LD) -r -T $(OUT)rombios32.lds $< -o $@
 
-$(OUT)romlayout.lds: $(OUT)romlayout16.o
+$(OUT)romlayout16.lds $(OUT)romlayout32.lds: $(OUT)ccode32.o $(OUT)romlayout16.o
 	@echo "  Building layout information $@"
-	$(Q)$(OBJDUMP) -h $< | ./tools/layoutrom.py $@
+	$(Q)$(OBJDUMP) -thr $(OUT)ccode32.o > $(OUT)ccode32.o.objdump
+	$(Q)$(OBJDUMP) -thr $(OUT)romlayout16.o > $(OUT)romlayout16.o.objdump
+	$(Q)./tools/layoutrom.py $(OUT)romlayout16.o.objdump $(OUT)ccode32.o.objdump $(OUT)romlayout16.lds $(OUT)romlayout32.lds
 
-$(OUT)layout16.lds: $(OUT)romlayout.lds
+$(OUT)layout16.lds: $(OUT)romlayout16.lds
+$(OUT)rombios32.lds: $(OUT)romlayout32.lds
 
 $(OUT)rom16.o: $(OUT)romlayout16.o $(OUT)rom32.o $(OUT)layout16.lds
 	@echo "  Linking (no relocs) $@"
@@ -148,7 +151,7 @@
 
 $(OUT)vgarom.o: $(OUT)vgalayout16.o $(OUT)vgalayout.lds
 	@echo "  Linking $@"
-	$(Q)$(LD) -T $(OUT)vgalayout.lds $(OUT)vgalayout16.o -o $@
+	$(Q)$(LD) --gc-sections -T $(OUT)vgalayout.lds $(OUT)vgalayout16.o -o $@
 
 $(OUT)vgabios.bin.raw: $(OUT)vgarom.o
 	@echo "  Extracting binary $@"
diff --git a/src/entryfuncs.S b/src/entryfuncs.S
index 65218e3..3c29b3f 100644
--- a/src/entryfuncs.S
+++ b/src/entryfuncs.S
@@ -165,3 +165,9 @@
         .section .text.asm.\func
         .global \func
         .endm
+
+        // Declare an exported function
+        .macro EXPORTFUNC func
+        .section .text.asm.export.\func
+        .global \func
+        .endm
diff --git a/src/layout16.lds.S b/src/layout16.lds.S
index 7b27dd4..40f3664 100644
--- a/src/layout16.lds.S
+++ b/src/layout16.lds.S
@@ -13,7 +13,7 @@
 
 // The actual placement of the 16bit sections is determined by the
 // script tools/layoutrom.py
-#include "../out/romlayout.lds"
+#include "../out/romlayout16.lds"
 
         // Discard regular data sections to force a link error if
         // 16bit code attempts to access data not marked with VAR16.
diff --git a/src/pirtable.c b/src/pirtable.c
index 777a94b..a96ffbc 100644
--- a/src/pirtable.c
+++ b/src/pirtable.c
@@ -18,7 +18,7 @@
 
 extern struct pir_table PIR_TABLE;
 #if CONFIG_PIRTABLE && !CONFIG_COREBOOT
-struct pir_table PIR_TABLE __aligned(16) VAR16_32 = {
+struct pir_table PIR_TABLE __aligned(16) VAR16EXPORT = {
     .pir = {
         .version = 0x0100,
         .size = sizeof(struct pir_table),
diff --git a/src/pnpbios.c b/src/pnpbios.c
index b07b585..cf78f00 100644
--- a/src/pnpbios.c
+++ b/src/pnpbios.c
@@ -28,7 +28,7 @@
 extern char pnp_string[];
 
 #if CONFIG_PNPBIOS
-struct pnpheader PNPHEADER __aligned(16) VAR16_32 = {
+struct pnpheader PNPHEADER __aligned(16) VAR16EXPORT = {
     .signature = PNP_SIGNATURE,
     .version = 0x10,
     .length = sizeof(PNPHEADER),
diff --git a/src/rombios.lds.S b/src/rombios.lds.S
index 20f7bfb..0f6e388 100644
--- a/src/rombios.lds.S
+++ b/src/rombios.lds.S
@@ -10,11 +10,14 @@
 SECTIONS
 {
         .text code32_start : {
-                *(.text)
+                *(.text32)
 
                 . = code16_start ;
                 *(.text16)
                 final_code16_end = . ;
                 }
-        /DISCARD/ : { *(.discard*) *(.eh_frame) }
+        /DISCARD/ : {
+                *(.text*) *(.data*) *(.bss*) *(.rodata*)
+                *(COMMON) *(.discard*) *(.eh_frame)
+                }
 }
diff --git a/src/rombios32.lds.S b/src/rombios32.lds.S
index 908201c..62e92d1 100644
--- a/src/rombios32.lds.S
+++ b/src/rombios32.lds.S
@@ -8,17 +8,14 @@
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH("i386")
-FORCE_COMMON_ALLOCATION
 SECTIONS
 {
-        .text BUILD_BIOS_ADDR : {
+        .text32 BUILD_BIOS_ADDR : {
                 code32_start = ABSOLUTE(.) ;
-                *(.text)
-                code32_rodata = . ;
-                *(.rodata*)
-                *(.data)
-                *(.bss)
-                *(COMMON)
+
+// The actual sections kept is determined by the script tools/layoutrom.py
+#include "../out/romlayout32.lds"
+
                 freespace_start = . ;
                 code32_end = ABSOLUTE(.) ;
                 }
diff --git a/src/romlayout.S b/src/romlayout.S
index cb1da85..cbd9483 100644
--- a/src/romlayout.S
+++ b/src/romlayout.S
@@ -332,7 +332,7 @@
         lretl
 
 // 32bit elf entry point
-        DECLFUNC post32
+        EXPORTFUNC post32
 post32:
         cli
         cld
diff --git a/src/smm.c b/src/smm.c
index ac65837..31e56d8 100644
--- a/src/smm.c
+++ b/src/smm.c
@@ -11,8 +11,7 @@
 #include "ioport.h" // outb
 #include "pci_ids.h" // PCI_VENDOR_ID_INTEL
 
-#if CONFIG_USE_SMM
-asm(
+ASM32(
     ".global smm_relocation_start\n"
     ".global smm_relocation_end\n"
     ".global smm_code_start\n"
@@ -69,7 +68,6 @@
     "smm_code_end:\n"
     "  .code32\n"
     );
-#endif
 
 extern u8 smm_relocation_start, smm_relocation_end;
 extern u8 smm_code_start, smm_code_end;
diff --git a/src/types.h b/src/types.h
index 52e508b..a004474 100644
--- a/src/types.h
+++ b/src/types.h
@@ -23,26 +23,35 @@
 
 #define __VISIBLE __attribute__((externally_visible))
 
+#define UNIQSEC __FILE__ "." __stringify(__LINE__)
+
+#define __ASM(code) asm(".section .text.asm." UNIQSEC "\n\t" code)
+
 #if MODE16 == 1
 // Notes a function as externally visible in the 16bit code chunk.
 # define VISIBLE16 __VISIBLE
 // Notes a function as externally visible in the 32bit code chunk.
 # define VISIBLE32
 // Designate a variable as (only) visible to 16bit code.
-# define VAR16 __attribute__((section(".data16." __FILE__ "." __stringify(__LINE__))))
+# define VAR16 __section(".data16." UNIQSEC)
 // Designate a variable as visible to both 32bit and 16bit code.
 # define VAR16_32 VAR16 __VISIBLE
+// Designate a variable visible externally.
+# define VAR16EXPORT __section(".data16.export." UNIQSEC) __VISIBLE
 // Designate a variable at a specific 16bit address
-# define VAR16FIXED(addr) __aligned(1) __VISIBLE  __attribute__((section(".fixedaddr." __stringify(addr))))
+# define VAR16FIXED(addr) __aligned(1) __VISIBLE __section(".fixedaddr." __stringify(addr))
 // Designate top-level assembler as 16bit only.
-# define ASM16(code) asm(".section .text.asm." __FILE__ "." __stringify(__LINE__) "\n\t" code)
+# define ASM16(code) __ASM(code)
+# define ASM32(code)
 #else
 # define VISIBLE16
 # define VISIBLE32 __VISIBLE
-# define VAR16 __attribute__((section(".discard.var16")))
-# define VAR16_32 VAR16 __VISIBLE __attribute__((weak))
+# define VAR16 __section(".discard.var16." UNIQSEC)
+# define VAR16_32 VAR16 __VISIBLE __weak
+# define VAR16EXPORT VAR16_32
 # define VAR16FIXED(addr) VAR16_32
 # define ASM16(code)
+# define ASM32(code) __ASM(code)
 #endif
 
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
@@ -54,6 +63,9 @@
 
 #define NULL ((void *)0)
 
+#define __weak __attribute__((weak))
+#define __section(S) __attribute__((section(S)))
+
 #define PACKED __attribute__((packed))
 #define __aligned(x) __attribute__((aligned(x)))
 
diff --git a/tools/layoutrom.py b/tools/layoutrom.py
index 319111b..7b2841f 100755
--- a/tools/layoutrom.py
+++ b/tools/layoutrom.py
@@ -7,34 +7,19 @@
 
 import sys
 
-def main():
-    # Get output name
-    outname = sys.argv[1]
-
-    # Read in section names and sizes
-    # sections = [(size, align, name), ...]
-    sections = []
-    for line in sys.stdin.readlines():
-        try:
-            idx, name, size, vma, lma, fileoff, align = line.split()
-            if align[:3] != '2**':
-                continue
-            sections.append((int(size, 16), 2**int(align[3:]), name))
-        except:
-            pass
-
-    doLayout(sections, outname)
 
 def alignpos(pos, alignbytes):
     mask = alignbytes - 1
     return (pos + mask) & ~mask
 
+
+######################################################################
+# 16bit fixed address section fitting
+######################################################################
+
 MAXPOS = 0x10000
 
-def outsection(file, name):
-    file.write("*(%s)\n" % (name,))
-
-def doLayout(sections, outname):
+def doLayout16(sections, outname):
     textsections = []
     rodatasections = []
     datasections = []
@@ -151,7 +136,7 @@
         name = section[2]
         if name == rodatasections[0][2]:
             output.write("code16_rodata = . ;\n")
-        outsection(output, name)
+        output.write("*(%s)\n" % (name,))
 
     # Write fixed sections
     for addr, section, extrasections in fixedsections:
@@ -159,7 +144,7 @@
         output.write(". = ( 0x%x - code16_start ) ;\n" % (addr,))
         output.write("*(%s)\n" % (name,))
         for extrasection in extrasections:
-            outsection(output, extrasection[2])
+            output.write("*(%s)\n" % (extrasection[2],))
 
     # Write trailer
     output.write("""
@@ -168,5 +153,142 @@
 """)
 
 
+######################################################################
+# 32bit section outputting
+######################################################################
+
+def outsections(file, sections, prefix):
+    lp = len(prefix)
+    for size, align, name in sections:
+        if name[:lp] == prefix:
+            file.write("*(%s)\n" % (name,))
+
+def doLayout32(sections, outname):
+    output = open(outname, 'wb')
+    outsections(output, sections, '.text.')
+    output.write("code32_rodata = . ;\n")
+    outsections(output, sections, '.rodata')
+    outsections(output, sections, '.data.')
+    outsections(output, sections, '.bss.')
+
+
+######################################################################
+# Section garbage collection
+######################################################################
+
+def keepsection(name, pri, alt):
+    if name in pri[3]:
+        # Already kept - nothing to do.
+        return
+    pri[3].append(name)
+    relocs = pri[2].get(name)
+    if relocs is None:
+        return
+    # Keep all sections that this section points to
+    for symbol in relocs:
+        section = pri[1].get(symbol)
+        if section is not None and section[:9] != '.discard.':
+            keepsection(section, pri, alt)
+            continue
+        # Not in primary sections - it may be a cross 16/32 reference
+        section = alt[1].get(symbol)
+        if section is not None:
+            keepsection(section, alt, pri)
+
+def gc(info16, info32):
+    # pri = (sections, symbols, relocs, keep sections)
+    pri = (info16[0], info16[1], info16[2], [])
+    alt = (info32[0], info32[1], info32[2], [])
+    # Start by keeping sections that are globally visible.
+    for size, align, section in info16[0]:
+        if section[:11] == '.fixedaddr.' or '.export.' in section:
+            keepsection(section, pri, alt)
+    # Return sections found.
+    sections16 = []
+    for info in info16[0]:
+        size, align, section = info
+        if section not in pri[3]:
+#            print "gc16", section
+            continue
+        sections16.append(info)
+    sections32 = []
+    for info in info32[0]:
+        size, align, section = info
+        if section not in alt[3]:
+#            print "gc32", section
+            continue
+        sections32.append(info)
+    return sections16, sections32
+
+
+######################################################################
+# Startup and input parsing
+######################################################################
+
+# Read in output from objdump
+def parseObjDump(file):
+    # sections = [(size, align, section), ...]
+    sections = []
+    # symbols[symbol] = section
+    symbols = {}
+    # relocs[section] = [symbol, ...]
+    relocs = {}
+
+    state = None
+    for line in file.readlines():
+        line = line.rstrip()
+        if line == 'Sections:':
+            state = 'section'
+            continue
+        if line == 'SYMBOL TABLE:':
+            state = 'symbol'
+            continue
+        if line[:24] == 'RELOCATION RECORDS FOR [':
+            state = 'reloc'
+            relocsection = line[24:-2]
+            continue
+
+        if state == 'section':
+            try:
+                idx, name, size, vma, lma, fileoff, align = line.split()
+                if align[:3] != '2**':
+                    continue
+                sections.append((int(size, 16), 2**int(align[3:]), name))
+            except:
+                pass
+            continue
+        if state == 'symbol':
+            try:
+                section, off, symbol = line[17:].split()
+                off = int(off, 16)
+                if '*' not in section:
+                    symbols[symbol] = section
+            except:
+                pass
+            continue
+        if state == 'reloc':
+            try:
+                off, type, symbol = line.split()
+                off = int(off, 16)
+                relocs.setdefault(relocsection, []).append(symbol)
+            except:
+                pass
+    return sections, symbols, relocs
+
+def main():
+    # Get output name
+    in16, in32, out16, out32 = sys.argv[1:]
+
+    infile16 = open(in16, 'rb')
+    infile32 = open(in32, 'rb')
+
+    info16 = parseObjDump(infile16)
+    info32 = parseObjDump(infile32)
+
+    sections16, sections32 = gc(info16, info32)
+
+    doLayout16(sections16, out16)
+    doLayout32(sections32, out32)
+
 if __name__ == '__main__':
     main()
diff --git a/vgasrc/vgalayout.lds.S b/vgasrc/vgalayout.lds.S
index b5f0319..08a5f32 100644
--- a/vgasrc/vgalayout.lds.S
+++ b/vgasrc/vgalayout.lds.S
@@ -10,7 +10,7 @@
 SECTIONS
 {
         .text 0 : {
-                *(.rom.header)
+                KEEP(*(.rom.header))
                 *(.text.*)
                 _rodata = . ;
                 *(.rodata.__func__.*)