Introduce bootblock self-decompression

Masked ROMs are the silent killers of boot speed on devices without
memory-mapped SPI flash. They often contain awfully slow SPI drivers
(presumably bit-banged) that take hundreds of milliseconds to load our
bootblock, and every extra kilobyte of bootblock size has a hugely
disproportionate impact on boot speed. The coreboot timestamps can never
show that component, but it impacts our users all the same.

This patch tries to alleviate that issue a bit by allowing us to
compress the bootblock with LZ4, which can cut its size down to nearly
half. Of course, masked ROMs usually don't come with decompression
algorithms built in, so we need to introduce a little decompression stub
that can decompress the rest of the bootblock. This is done by creating
a new "decompressor" stage which runs before the bootblock, but includes
the compressed bootblock code in its data section. It needs to be as
small as possible to get a real benefit from this approach, which means
no device drivers, no console output, no exception handling, etc.
Besides the decompression algorithm itself we only include the timer
driver so that we can measure the boot speed impact of decompression. On
ARM and ARM64 systems, we also need to give SoC code a chance to
initialize the MMU, since running decompression without MMU is
prohibitively slow on these architectures.

This feature is implemented for ARM and ARM64 architectures for now,
although most of it is architecture-independent and it should be
relatively simple to port to other platforms where a masked ROM loads
the bootblock into SRAM. It is also supposed to be a clean starting
point from which later optimizations can hopefully cut down the
decompression stub size (currently ~4K on RK3399) a bit more.

NOTE: Bootblock compression is not for everyone. Possible side effects
include trying to run LZ4 on CPUs that come out of reset extremely
underclocked or enabling this too early in SoC bring-up and getting
frustrated trying to find issues in an undebuggable environment. Ask
your SoC vendor if bootblock compression is right for you.

Change-Id: I0dc1cad9ae7508892e477739e743cd1afb5945e8
Signed-off-by: Julius Werner <jwerner@chromium.org>
Reviewed-on: https://review.coreboot.org/26340
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Aaron Durbin <adurbin@chromium.org>
diff --git a/Makefile.inc b/Makefile.inc
index c40cec8..ec086b9 100644
--- a/Makefile.inc
+++ b/Makefile.inc
@@ -104,7 +104,7 @@
 
 #######################################################################
 # Add source classes and their build options
-classes-y := ramstage romstage bootblock postcar smm smmstub cpu_microcode verstage
+classes-y := ramstage romstage bootblock decompressor postcar smm smmstub cpu_microcode verstage
 
 # Add dynamic classes for rmodules
 $(foreach supported_arch,$(ARCH_SUPPORTED), \
@@ -186,6 +186,7 @@
 	$(eval $(d)ramstage.a: $(call files-in-dir,$(d),$(filter-out %.ld,$(1))); rm -f $$@ && $(AR_ramstage) rcsT $$@ $$^ ) \
 	$(eval ramstage-objs:=$(d)ramstage.a $(filter-out $(filter-out %.ld, $(call files-in-dir,$(d),$(1))),$(ramstage-objs))))
 
+decompressor-generic-ccopts += -D__PRE_RAM__ -D__DECOMPRESSOR__
 bootblock-generic-ccopts += -D__PRE_RAM__ -D__BOOTBLOCK__
 romstage-generic-ccopts += -D__PRE_RAM__ -D__ROMSTAGE__
 ramstage-generic-ccopts += -D__RAMSTAGE__
@@ -631,10 +632,30 @@
 # the linker marked it NOBITS automatically because there are only zeroes in it.
 preserve-bss-flags := --set-section-flags .bss=load,alloc,data --set-section-flags .data=load,alloc,data
 
+ifeq ($(CONFIG_COMPRESS_BOOTBLOCK),y)
+
+$(objcbfs)/bootblock.lz4: $(objcbfs)/bootblock.elf $(objutil)/cbfstool/cbfs-compression-tool
+	@printf "    LZ4        $(subst $(obj)/,,$(@))\n"
+	$(OBJCOPY_bootblock) $(preserve-bss-flags) $< $@.tmp
+	$(OBJCOPY_bootblock) -O binary $@.tmp
+	$(objutil)/cbfstool/cbfs-compression-tool rawcompress $@.tmp $@.tmp2 lz4
+	rm -f $@.tmp
+	mv $@.tmp2 $@
+
+# Put assembled decompressor+bootblock into bootblock.raw.elf so that SoC
+# Makefiles wrapping the bootblock in a header can always key off the same file.
+$(objcbfs)/bootblock.raw.elf: $(objcbfs)/decompressor.elf
+	@printf "    OBJCOPY    $(notdir $(@))\n"
+	$(OBJCOPY_bootblock) $(preserve-bss-flags) $< $@
+
+else	# CONFIG_COMPRESS_BOOTBLOCK
+
 $(objcbfs)/bootblock.raw.elf: $(objcbfs)/bootblock.elf
 	@printf "    OBJCOPY    $(notdir $(@))\n"
 	$(OBJCOPY_bootblock) $(preserve-bss-flags) $< $@
 
+endif	# CONFIG_COMPRESS_BOOTBLOCK
+
 $(objcbfs)/bootblock.raw.bin: $(objcbfs)/bootblock.raw.elf
 	@printf "    OBJCOPY    $(notdir $(@))\n"
 	$(OBJCOPY_bootblock) -O binary $< $@
@@ -651,6 +672,7 @@
 	$(OBJCOPY_$(class)) --add-gnu-debuglink=$< $@.tmp
 	mv $@.tmp $@
 
+
 ###########################################################################
 # Build the final rom image
 ###########################################################################
diff --git a/src/Kconfig b/src/Kconfig
index 99a704d..d2b9fc2 100644
--- a/src/Kconfig
+++ b/src/Kconfig
@@ -146,6 +146,18 @@
 	  time spent decompressing. Doesn't work for XIP stages (assume all
 	  ARCH_X86 for now) for obvious reasons.
 
+config COMPRESS_BOOTBLOCK
+	bool
+	help
+	  This option can be used to compress the bootblock with LZ4 and attach
+	  a small self-decompression stub to its front. This can drastically
+	  reduce boot time on platforms where the bootblock is loaded over a
+	  very slow connection and bootblock size trumps all other factors for
+	  speed. Since this using this option usually requires changes to the
+	  SoC memlayout and possibly extra support code, it should not be
+	  user-selectable. (There's no real point in offering this to the user
+	  anyway... if it works and saves boot time, you would always want it.)
+
 config INCLUDE_CONFIG_FILE
 	bool "Include the coreboot .config file into the ROM image"
 	# Default value set at the end of the file
diff --git a/src/arch/arm/Makefile.inc b/src/arch/arm/Makefile.inc
index 013a4dd..06adfe4 100644
--- a/src/arch/arm/Makefile.inc
+++ b/src/arch/arm/Makefile.inc
@@ -44,22 +44,35 @@
 
 ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARM),y)
 
+decompressor-y += id.S
 bootblock-y += id.S
+$(call src-to-obj,decompressor,$(dir)/id.S): $(obj)/build.h
 $(call src-to-obj,bootblock,$(dir)/id.S): $(obj)/build.h
 
+decompressor-y += boot.c
 bootblock-y += boot.c
-bootblock-y += stages.c
-bootblock-y += eabi_compat.c
-bootblock-y += memset.S
-bootblock-y += memcpy.S
-bootblock-y += memmove.S
+decompressor-y += div0.c
 bootblock-y += div0.c
+decompressor-y += eabi_compat.c
+bootblock-y += eabi_compat.c
+decompressor-y += memset.S
+bootblock-y += memset.S
+decompressor-y += memcpy.S
+bootblock-y += memcpy.S
+decompressor-y += memmove.S
+bootblock-y += memmove.S
+
 bootblock-y += clock.c
+bootblock-y += stages.c
 
 $(objcbfs)/bootblock.debug: $$(bootblock-objs)
 	@printf "    LINK       $(subst $(obj)/,,$(@))\n"
 	$(LD_bootblock) $(LDFLAGS_bootblock) -o $@ -L$(obj) -T $(call src-to-obj,bootblock,src/mainboard/$(MAINBOARDDIR)/memlayout.ld) --whole-archive --start-group $(filter-out %.ld,$(bootblock-objs)) --end-group
 
+$(objcbfs)/decompressor.debug: $$(decompressor-objs)
+	@printf "    LINK       $(subst $(obj)/,,$(@))\n"
+	$(LD_bootblock) $(LDFLAGS_bootblock) -o $@ -L$(obj) -T $(call src-to-obj,decompressor,src/mainboard/$(MAINBOARDDIR)/memlayout.ld) --whole-archive --start-group $(filter-out %.ld,$(decompressor-objs)) --end-group
+
 endif # CONFIG_ARCH_BOOTBLOCK_ARM
 
 ###############################################################################
diff --git a/src/arch/arm/armv7/Makefile.inc b/src/arch/arm/armv7/Makefile.inc
index fe0b446..1d3ae52 100644
--- a/src/arch/arm/armv7/Makefile.inc
+++ b/src/arch/arm/armv7/Makefile.inc
@@ -28,18 +28,27 @@
 ###############################################################################
 
 ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV7),y)
+decompressor-generic-ccopts += $(armv7-a_flags)
+decompressor-S-ccopts += $(armv7_asm_flags)
 bootblock-generic-ccopts += $(armv7-a_flags)
 bootblock-S-ccopts += $(armv7_asm_flags)
 
 ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y)
+decompressor-y += bootblock.S
+ifneq ($(CONFIG_COMPRESS_BOOTBLOCK),y)
 bootblock-y += bootblock.S
 endif
+endif
 
+decompressor-y += cache.c
 bootblock-y += cache.c
+decompressor-y += cpu.S
 bootblock-y += cpu.S
+decompressor-y += mmu.c
+bootblock-y += mmu.c
+
 bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception.c
 bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception_asm.S
-bootblock-y += mmu.c
 
 else ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV7_M),y)
 bootblock-generic-ccopts += $(armv7-m_flags)
diff --git a/src/arch/arm/include/arch/header.ld b/src/arch/arm/include/arch/header.ld
index 8a10778..8947310 100644
--- a/src/arch/arm/include/arch/header.ld
+++ b/src/arch/arm/include/arch/header.ld
@@ -13,6 +13,8 @@
  * GNU General Public License for more details.
  */
 
+#include <rules.h>
+
 /* We use ELF as output format. So that we can debug the code in some form. */
 OUTPUT_FORMAT("elf32-littlearm", "elf32-littlearm", "elf32-littlearm")
 OUTPUT_ARCH(arm)
@@ -22,7 +24,7 @@
 	to_load PT_LOAD;
 }
 
-#ifdef __BOOTBLOCK__
+#if ENV_DECOMPRESSOR || ENV_BOOTBLOCK || ENV_RMODULE
 ENTRY(_start)
 #else
 ENTRY(stage_entry)
diff --git a/src/arch/arm/libgcc/Makefile.inc b/src/arch/arm/libgcc/Makefile.inc
index cb91107..9a8d4fc 100644
--- a/src/arch/arm/libgcc/Makefile.inc
+++ b/src/arch/arm/libgcc/Makefile.inc
@@ -19,6 +19,7 @@
 libgcc_files += udivmoddi4.c umoddi3.c
 
 ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARM),y)
+decompressor-y += $(libgcc_files)
 bootblock-y += $(libgcc_files)
 endif
 
diff --git a/src/arch/arm64/Makefile.inc b/src/arch/arm64/Makefile.inc
index 997c2da..f57ef72 100644
--- a/src/arch/arm64/Makefile.inc
+++ b/src/arch/arm64/Makefile.inc
@@ -39,17 +39,25 @@
 
 ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARM64),y)
 
+decompressor-y += boot.c
+bootblock-y += boot.c
+decompressor-y += div0.c
 bootblock-y += div0.c
+decompressor-y += eabi_compat.c
+bootblock-y += eabi_compat.c
+decompressor-y += id.S
 bootblock-y += id.S
+$(call src-to-obj,decompressor,$(dir)/id.S): $(obj)/build.h
 $(call src-to-obj,bootblock,$(dir)/id.S): $(obj)/build.h
 
-bootblock-y += boot.c
-bootblock-y += eabi_compat.c
 bootblock-$(CONFIG_ARM64_USE_ARCH_TIMER) += arch_timer.c
 bootblock-y += transition.c transition_asm.S
 
+decompressor-y += memset.S
 bootblock-y += memset.S
+decompressor-y += memcpy.S
 bootblock-y += memcpy.S
+decompressor-y += memmove.S
 bootblock-y += memmove.S
 
 # Build the bootblock
@@ -58,6 +66,10 @@
 	@printf "    LINK       $(subst $(obj)/,,$(@))\n"
 	$(LD_bootblock) $(LDFLAGS_bootblock) -o $@ -L$(obj) --whole-archive --start-group $(filter-out %.ld,$(bootblock-objs)) --end-group -T $(call src-to-obj,bootblock,src/mainboard/$(MAINBOARDDIR)/memlayout.ld)
 
+$(objcbfs)/decompressor.debug: $$(decompressor-objs) $(obj)/config.h
+	@printf "    LINK       $(subst $(obj)/,,$(@))\n"
+	$(LD_bootblock) $(LDFLAGS_bootblock) -o $@ -L$(obj) --whole-archive --start-group $(filter-out %.ld,$(decompressor-objs)) --end-group -T $(call src-to-obj,decompressor,src/mainboard/$(MAINBOARDDIR)/memlayout.ld)
+
 endif # CONFIG_ARCH_BOOTBLOCK_ARM64
 
 ###############################################################################
diff --git a/src/arch/arm64/armv8/Makefile.inc b/src/arch/arm64/armv8/Makefile.inc
index 14a784b..db7bd33 100644
--- a/src/arch/arm64/armv8/Makefile.inc
+++ b/src/arch/arm64/armv8/Makefile.inc
@@ -31,19 +31,27 @@
 ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV8_64),y)
 
 ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y)
+decompressor-y += bootblock.S
+ifneq ($(CONFIG_COMPRESS_BOOTBLOCK),y)
 bootblock-y += bootblock.S
 endif
-bootblock-y += cache.c
+endif
+decompressor-y += cpu.S
 bootblock-y += cpu.S
+decompressor-y += cache.c
+bootblock-y += cache.c
+decompressor-y += mmu.c
 bootblock-y += mmu.c
 
 bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception.c
 
+decompressor-generic-ccopts += $(armv8_flags)
 bootblock-generic-ccopts += $(armv8_flags)
 
 # Required to access unaligned timestamp struct members before MMU is active
 # (TODO: Maybe use explicit unaligned accesses in timestamp code instead, or
 # evaluate redesigning timestamp data structures to avoid misaligned members.)
+decompressor-c-ccopts += -mstrict-align
 bootblock-c-ccopts += -mstrict-align
 
 endif
diff --git a/src/arch/arm64/armv8/lib/Makefile.inc b/src/arch/arm64/armv8/lib/Makefile.inc
index 2bf1a37..bfc87c3 100644
--- a/src/arch/arm64/armv8/lib/Makefile.inc
+++ b/src/arch/arm64/armv8/lib/Makefile.inc
@@ -18,6 +18,7 @@
 lib_access = pstate.c sysctrl.c cache.c tlb.c clock.c
 
 ifeq ($(CONFIG_ARCH_BOOTBLOCK_ARMV8_64),y)
+decompressor-y += $(lib_access)
 bootblock-y += $(lib_access)
 endif
 
diff --git a/src/arch/arm64/include/arch/header.ld b/src/arch/arm64/include/arch/header.ld
index c82cb3f..9d8764e 100644
--- a/src/arch/arm64/include/arch/header.ld
+++ b/src/arch/arm64/include/arch/header.ld
@@ -24,7 +24,7 @@
 	to_load PT_LOAD;
 }
 
-#if ENV_BOOTBLOCK ||  ENV_RMODULE
+#if ENV_DECOMPRESSOR || ENV_BOOTBLOCK || ENV_RMODULE
 ENTRY(_start)
 #else
 ENTRY(stage_entry)
diff --git a/src/commonlib/Makefile.inc b/src/commonlib/Makefile.inc
index edd17c3..4d89c48 100644
--- a/src/commonlib/Makefile.inc
+++ b/src/commonlib/Makefile.inc
@@ -30,6 +30,7 @@
 smm-y += cbfs.c
 postcar-y += cbfs.c
 
+decompressor-y += lz4_wrapper.c
 bootblock-y += lz4_wrapper.c
 verstage-y += lz4_wrapper.c
 romstage-y += lz4_wrapper.c
diff --git a/src/console/Makefile.inc b/src/console/Makefile.inc
index 13f2b10..d5795d7 100644
--- a/src/console/Makefile.inc
+++ b/src/console/Makefile.inc
@@ -32,3 +32,5 @@
 bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += init.c console.c
 bootblock-y += post.c
 bootblock-y += die.c
+
+decompressor-y += die.c
diff --git a/src/include/bootblock_common.h b/src/include/bootblock_common.h
index fa67098..0f6c5e4 100644
--- a/src/include/bootblock_common.h
+++ b/src/include/bootblock_common.h
@@ -26,6 +26,7 @@
  * The 'early' variants are called prior to console initialization. Also, the
  * SoC functions are called prior to the mainboard fucntions.
  */
+void decompressor_soc_init(void);
 void bootblock_mainboard_early_init(void);
 void bootblock_mainboard_init(void);
 void bootblock_soc_early_init(void);
@@ -47,4 +48,11 @@
 asmlinkage void bootblock_main_with_timestamp(uint64_t base_timestamp,
 	struct timestamp_entry *timestamps, size_t num_timestamps);
 
+/* This is the argument structure passed from decompressor to bootblock. */
+struct bootblock_arg {
+	uint64_t base_timestamp;
+	uint32_t num_timestamps;
+	struct timestamp_entry timestamps[];
+};
+
 #endif	/* __BOOTBLOCK_COMMON_H */
diff --git a/src/include/memlayout.h b/src/include/memlayout.h
index c9c77cf..5de2370 100644
--- a/src/include/memlayout.h
+++ b/src/include/memlayout.h
@@ -102,6 +102,26 @@
 #endif
 
 /* Careful: 'INCLUDE <filename>' must always be at the end of the output line */
+#if ENV_DECOMPRESSOR
+	#define DECOMPRESSOR(addr, sz) \
+		SYMBOL(decompressor, addr) \
+		_edecompressor = _decompressor + sz; \
+		_ = ASSERT(_eprogram - _program <= sz, \
+			STR(decompressor exceeded its allotted size! (sz))); \
+		INCLUDE "decompressor/lib/program.ld"
+
+	#define OVERLAP_DECOMPRESSOR_ROMSTAGE(addr, sz) DECOMPRESSOR(addr, sz)
+	#define OVERLAP_DECOMPRESSOR_VERSTAGE_ROMSTAGE(addr, sz) \
+		DECOMPRESSOR(addr, sz)
+#else
+	#define DECOMPRESSOR(addr, sz) \
+		REGION(decompressor, addr, sz, 1)
+
+	#define OVERLAP_DECOMPRESSOR_ROMSTAGE(addr, sz) ROMSTAGE(addr, sz)
+	#define OVERLAP_DECOMPRESSOR_VERSTAGE_ROMSTAGE(addr, sz) \
+		OVERLAP_VERSTAGE_ROMSTAGE(addr, sz)
+#endif
+
 #if ENV_BOOTBLOCK
 	#define BOOTBLOCK(addr, sz) \
 		SYMBOL(bootblock, addr) \
diff --git a/src/include/program_loading.h b/src/include/program_loading.h
index 5eeef53..e5d26e1 100644
--- a/src/include/program_loading.h
+++ b/src/include/program_loading.h
@@ -28,6 +28,7 @@
 
 enum prog_type {
 	PROG_UNKNOWN,
+	PROG_BOOTBLOCK,
 	PROG_VERSTAGE,
 	PROG_ROMSTAGE,
 	PROG_RAMSTAGE,
diff --git a/src/include/rules.h b/src/include/rules.h
index 4017d37..deea50b 100644
--- a/src/include/rules.h
+++ b/src/include/rules.h
@@ -19,7 +19,20 @@
  * romstage, ramstage or SMM.
  */
 
-#if defined(__BOOTBLOCK__)
+#if defined(__DECOMPRESSOR__)
+#define ENV_DECOMPRESSOR 1
+#define ENV_BOOTBLOCK 0
+#define ENV_ROMSTAGE 0
+#define ENV_RAMSTAGE 0
+#define ENV_SMM 0
+#define ENV_VERSTAGE 0
+#define ENV_RMODULE 0
+#define ENV_POSTCAR 0
+#define ENV_LIBAGESA 0
+#define ENV_STRING "decompressor"
+
+#elif defined(__BOOTBLOCK__)
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 1
 #define ENV_ROMSTAGE 0
 #define ENV_RAMSTAGE 0
@@ -31,6 +44,7 @@
 #define ENV_STRING "bootblock"
 
 #elif defined(__ROMSTAGE__)
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 0
 #define ENV_ROMSTAGE 1
 #define ENV_RAMSTAGE 0
@@ -42,6 +56,7 @@
 #define ENV_STRING "romstage"
 
 #elif defined(__SMM__)
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 0
 #define ENV_ROMSTAGE 0
 #define ENV_RAMSTAGE 0
@@ -53,6 +68,7 @@
 #define ENV_STRING "smm"
 
 #elif defined(__VERSTAGE__)
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 0
 #define ENV_ROMSTAGE 0
 #define ENV_RAMSTAGE 0
@@ -64,6 +80,7 @@
 #define ENV_STRING "verstage"
 
 #elif defined(__RAMSTAGE__)
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 0
 #define ENV_ROMSTAGE 0
 #define ENV_RAMSTAGE 1
@@ -75,6 +92,7 @@
 #define ENV_STRING "ramstage"
 
 #elif defined(__RMODULE__)
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 0
 #define ENV_ROMSTAGE 0
 #define ENV_RAMSTAGE 0
@@ -86,6 +104,7 @@
 #define ENV_STRING "rmodule"
 
 #elif defined(__POSTCAR__)
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 0
 #define ENV_ROMSTAGE 0
 #define ENV_RAMSTAGE 0
@@ -97,6 +116,7 @@
 #define ENV_STRING "postcar"
 
 #elif defined(__LIBAGESA__)
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 0
 #define ENV_ROMSTAGE 0
 #define ENV_RAMSTAGE 0
@@ -112,6 +132,7 @@
  * Default case of nothing set for random blob generation using
  * create_class_compiler that isn't bound to a stage.
  */
+#define ENV_DECOMPRESSOR 0
 #define ENV_BOOTBLOCK 0
 #define ENV_ROMSTAGE 0
 #define ENV_RAMSTAGE 0
diff --git a/src/include/symbols.h b/src/include/symbols.h
index 5b92899..fc9ef21 100644
--- a/src/include/symbols.h
+++ b/src/include/symbols.h
@@ -68,6 +68,10 @@
 
 /* _<stage>_size is always the maximum amount allocated in memlayout, whereas
  * _program_size gives the actual memory footprint *used* by current stage. */
+extern u8 _decompressor[];
+extern u8 _edecompressor[];
+#define _decompressor_size (_edecompressor - _decompressor)
+
 extern u8 _bootblock[];
 extern u8 _ebootblock[];
 #define _bootblock_size (_ebootblock - _bootblock)
diff --git a/src/lib/Makefile.inc b/src/lib/Makefile.inc
index a902e0c..08ad9b2 100644
--- a/src/lib/Makefile.inc
+++ b/src/lib/Makefile.inc
@@ -19,6 +19,19 @@
 CFLAGS_ramstage += -fsanitize=undefined
 endif
 
+decompressor-y += decompressor.c
+$(call src-to-obj,decompressor,$(dir)/decompressor.c): $(objcbfs)/bootblock.lz4
+$(call src-to-obj,decompressor,$(dir)/decompressor.c): CCACHE_EXTRAFILES=$(objcbfs)/bootblock.lz4
+# Must reset CCACHE_EXTRAFILES or make applies it transitively to dependencies.
+$(objcbfs)/bootblock.lz4: CCACHE_EXTRAFILES=
+
+decompressor-y += delay.c
+decompressor-$(CONFIG_GENERIC_GPIO_LIB) += gpio.c
+decompressor-y += memchr.c
+decompressor-y += memcmp.c
+decompressor-y += prog_ops.c
+decompressor-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
+
 ifneq ($(CONFIG_BOOTBLOCK_CUSTOM),y)
 bootblock-y += bootblock.c
 endif
@@ -216,11 +229,13 @@
 ramstage-y += bootmode.c
 verstage-y += bootmode.c
 
+decompressor-y += halt.c
 bootblock-y += halt.c
 romstage-y += halt.c
 ramstage-y += halt.c
 smm-y += halt.c
 
+decompressor-y += reset.c
 bootblock-y += reset.c
 verstage-y += reset.c
 romstage-y += reset.c
@@ -248,6 +263,7 @@
 # Use program.ld for all the platforms which use C fo the bootblock.
 bootblock-$(CONFIG_C_ENVIRONMENT_BOOTBLOCK) += program.ld
 
+decompressor-y += program.ld
 postcar-y += program.ld
 romstage-y += program.ld
 ramstage-y += program.ld
diff --git a/src/lib/bootblock.c b/src/lib/bootblock.c
index 867f1b1..d74bebf 100644
--- a/src/lib/bootblock.c
+++ b/src/lib/bootblock.c
@@ -70,3 +70,20 @@
 
 	bootblock_main_with_timestamp(base_timestamp, NULL, 0);
 }
+
+#if IS_ENABLED(CONFIG_COMPRESS_BOOTBLOCK)
+/*
+ * This is the bootblock entry point when it is run after a decompressor stage.
+ * For non-decompressor builds, _start is generally defined in architecture-
+ * specific assembly code. In decompressor builds that architecture
+ * initialization code already ran in the decompressor, so the bootblock can
+ * start straight into common code with a C environment.
+ */
+void _start(struct bootblock_arg *arg);
+void _start(struct bootblock_arg *arg)
+{
+	bootblock_main_with_timestamp(arg->base_timestamp, arg->timestamps,
+				      arg->num_timestamps);
+}
+
+#endif
diff --git a/src/lib/decompressor.c b/src/lib/decompressor.c
new file mode 100644
index 0000000..7a5bf3b
--- /dev/null
+++ b/src/lib/decompressor.c
@@ -0,0 +1,70 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright 2018 Google Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of
+ * the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <bootblock_common.h>
+#include <commonlib/compression.h>
+#include <delay.h>
+#include <program_loading.h>
+#include <symbols.h>
+
+extern u8 compressed_bootblock[];
+asm (
+	".pushsection .data.compressed_bootblock,\"a\",@progbits\n\t"
+	".type compressed_bootblock, %object\n\t"
+	".balign 8\n"
+	"compressed_bootblock:\n\t"
+	".incbin \"" __BUILD_DIR__ "/cbfs/" CONFIG_CBFS_PREFIX "/bootblock.lz4\"\n\t"
+	".size compressed_bootblock, . - compressed_bootblock\n\t"
+	".popsection\n\t"
+);
+
+struct bootblock_arg arg = {
+	.base_timestamp = 0,
+	.num_timestamps = 2,
+	.timestamps = {
+		{ .entry_id = TS_START_ULZ4F },
+		{ .entry_id = TS_END_ULZ4F },
+	},
+};
+
+struct prog prog_bootblock = {
+	.type = PROG_BOOTBLOCK,
+	.entry = (void *)_bootblock,
+	.arg = &arg,
+};
+
+__weak void decompressor_soc_init(void) { /* no-op */ }
+
+void main(void)
+{
+	init_timer();
+
+	if (IS_ENABLED(CONFIG_COLLECT_TIMESTAMPS))
+		arg.base_timestamp = timestamp_get();
+
+	decompressor_soc_init();
+
+	if (IS_ENABLED(CONFIG_COLLECT_TIMESTAMPS))
+		arg.timestamps[0].entry_stamp = timestamp_get();
+
+	size_t out_size = ulz4f(compressed_bootblock, _bootblock);
+	prog_segment_loaded((uintptr_t)_bootblock, out_size, SEG_FINAL);
+
+	if (IS_ENABLED(CONFIG_COLLECT_TIMESTAMPS))
+		arg.timestamps[1].entry_stamp = timestamp_get();
+
+	prog_run(&prog_bootblock);
+}
diff --git a/src/lib/program.ld b/src/lib/program.ld
index 668b29b..156b862 100644
--- a/src/lib/program.ld
+++ b/src/lib/program.ld
@@ -34,8 +34,10 @@
 	*(.rom.data);
 	*(.text._start);
 	*(.text.stage_entry);
-#if ENV_BOOTBLOCK && !(IS_ENABLED(CONFIG_ARCH_BOOTBLOCK_X86_32) || \
-		       IS_ENABLED(CONFIG_ARCH_BOOTBLOCK_X86_64))
+#if (ENV_DECOMPRESSOR || ENV_BOOTBLOCK && \
+		!IS_ENABLED(CONFIG_COMPRESS_BOOTBLOCK)) && \
+    !(IS_ENABLED(CONFIG_ARCH_BOOTBLOCK_X86_32) || \
+      IS_ENABLED(CONFIG_ARCH_BOOTBLOCK_X86_64))
 	KEEP(*(.id));
 #endif
 	*(.text);
diff --git a/toolchain.inc b/toolchain.inc
index f8ee875..ec57b5d 100644
--- a/toolchain.inc
+++ b/toolchain.inc
@@ -47,7 +47,8 @@
 ROMCC=CCC_CC="$(ROMCC_BIN)" $(CC)
 endif
 
-COREBOOT_STANDARD_STAGES := bootblock verstage romstage ramstage
+COREBOOT_STANDARD_STAGES := decompressor bootblock verstage romstage ramstage
+MAP-decompressor := bootblock
 
 ARCHDIR-i386	:= x86
 ARCHDIR-x86_32	:= x86