Add section header parsing and use it in the mk-payload step

This completes the improvements to the ELF file parsing code.  We can
now parse section headers too, across all 4 combinations of word size
and endianness. I had hoped to completely remove the use of htonl
until I found it in cbfs_image.c. That's a battle for another day.

There's now a handy macro to create magic numbers in host byte order.
I'm using it for all the PAYLOAD_SEGMENT_* constants and maybe
we can use it for the others too, but this is sensitive code and
I'd rather change one thing at a time.

To maximize the ease of use for users, elf parsing is accomplished with
just one function:

int
elf_headers(const struct buffer *pinput,
	    Elf64_Ehdr *ehdr,
	    Elf64_Phdr **pphdr,
	    Elf64_Shdr **pshdr)

which requires the ehdr and pphdr pointers to be non-NULL, but allows
the pshdr to be NULL. If pshdr is NULL, the code will not try to read
in section headers.

To satisfy our powerful scripts, I had to remove the ^M from an unrelated
microcode file.

BUG=None
TEST=Build a peppy image (known to boot) with old and new versions and verify they are bit-for-bit the same. This was also fully tested across all chromebooks for building and booting and running chromeos.
BRANCH=None

Change-Id: I54dad887d922428b6175fdb6a9cdfadd8a6bb889
Signed-off-by: Ronald G. Minnich <rminnich@google.com>
Reviewed-on: https://chromium-review.googlesource.com/181272
Reviewed-by: Ronald Minnich <rminnich@chromium.org>
Commit-Queue: Ronald Minnich <rminnich@chromium.org>
Tested-by: Ronald Minnich <rminnich@chromium.org>
Signed-off-by: Ronald G. Minnich <rminnich@google.com>
Reviewed-on: http://review.coreboot.org/5098
Tested-by: build bot (Jenkins)
Reviewed-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
diff --git a/util/cbfstool/Makefile b/util/cbfstool/Makefile
index b5f74b8..5064782 100644
--- a/util/cbfstool/Makefile
+++ b/util/cbfstool/Makefile
@@ -13,7 +13,7 @@
 BINARY:=$(obj)/cbfstool
 
 COMMON:=cbfstool.o common.o cbfs_image.o compress.o fit.o
-COMMON+=cbfs-mkstage.o cbfs-mkpayload.o xdr.o
+COMMON+=elfheaders.o cbfs-mkstage.o cbfs-mkpayload.o xdr.o
 # LZMA
 COMMON+=lzma/lzma.o
 COMMON+=lzma/C/LzFind.o  lzma/C/LzmaDec.o  lzma/C/LzmaEnc.o
diff --git a/util/cbfstool/Makefile.inc b/util/cbfstool/Makefile.inc
index 4270d75..f3596fb 100644
--- a/util/cbfstool/Makefile.inc
+++ b/util/cbfstool/Makefile.inc
@@ -5,6 +5,7 @@
 cbfsobj += cbfs_image.o
 cbfsobj += cbfs-mkstage.o
 cbfsobj += cbfs-mkpayload.o
+cbfsobj += elfheaders.o
 cbfsobj += xdr.o
 cbfsobj += fit.o
 # LZMA
diff --git a/util/cbfstool/cbfs-mkpayload.c b/util/cbfstool/cbfs-mkpayload.c
index 78988a0..b1dd1c0 100644
--- a/util/cbfstool/cbfs-mkpayload.c
+++ b/util/cbfstool/cbfs-mkpayload.c
@@ -23,18 +23,39 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "elf.h"
 #include "common.h"
 #include "cbfs.h"
-#include "elf.h"
 #include "fv.h"
 #include "coff.h"
 
+/* serialize the seg array into the buffer.
+ * The buffer is assumed to be large enough.
+ */
+static void xdr_segs(struct buffer *output,
+	struct cbfs_payload_segment *segs, int nseg)
+{
+	struct buffer outheader;
+	int i;
+
+	outheader.data = output->data;
+	outheader.size = 0;
+
+	for(i = 0; i < nseg; i++){
+		xdr_be.put32(&outheader, segs[i].type);
+		xdr_be.put32(&outheader, segs[i].compression);
+		xdr_be.put32(&outheader, segs[i].offset);
+		xdr_be.put64(&outheader, segs[i].load_addr);
+		xdr_be.put32(&outheader, segs[i].len);
+		xdr_be.put32(&outheader, segs[i].mem_len);
+	}
+}
 int parse_elf_to_payload(const struct buffer *input,
 			 struct buffer *output, comp_algo algo)
 {
-	Elf32_Phdr *phdr;
-	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)input->data;
-	Elf32_Shdr *shdr;
+	Elf64_Phdr *phdr;
+	Elf64_Ehdr ehdr;
+	Elf64_Shdr *shdr;
 	char *header;
 	char *strtab;
 	int headers;
@@ -44,39 +65,26 @@
 	struct cbfs_payload_segment *segs;
 	int i;
 
-	if(!iself((unsigned char *)input->data)){
-		INFO("The payload file is not in ELF format!\n");
-		return -1;
-	}
-
-	// The tool may work in architecture-independent way.
-	if (arch != CBFS_ARCHITECTURE_UNKNOWN &&
-	    !((ehdr->e_machine == EM_ARM) && (arch == CBFS_ARCHITECTURE_ARMV7)) &&
-	    !((ehdr->e_machine == EM_386) && (arch == CBFS_ARCHITECTURE_X86))) {
-		ERROR("The payload file has the wrong architecture\n");
-		return -1;
-	}
-
 	comp_func_ptr compress = compression_function(algo);
 	if (!compress)
 		return -1;
 
+	if (elf_headers(input, &ehdr, &phdr, &shdr) < 0)
+		return -1;
+
 	DEBUG("start: parse_elf_to_payload\n");
-	headers = ehdr->e_phnum;
-	header = (char *)ehdr;
+	headers = ehdr.e_phnum;
+	header = input->data;
 
-	phdr = (Elf32_Phdr *) & (header[ehdr->e_phoff]);
-	shdr = (Elf32_Shdr *) & (header[ehdr->e_shoff]);
-
-	strtab = &header[shdr[ehdr->e_shstrndx].sh_offset];
+	strtab = &header[shdr[ehdr.e_shstrndx].sh_offset];
 
 	/* Count the number of headers - look for the .notes.pinfo
 	 * section */
 
-	for (i = 0; i < ehdr->e_shnum; i++) {
+	for (i = 0; i < ehdr.e_shnum; i++) {
 		char *name;
 
-		if (i == ehdr->e_shstrndx)
+		if (i == ehdr.e_shstrndx)
 			continue;
 
 		if (shdr[i].sh_size == 0)
@@ -106,29 +114,39 @@
 
 		segments++;
 	}
-
+	/* allocate the segment header array */
+	segs = calloc(segments, sizeof(*segs));
+	if (segs == NULL)
+		return -1;
 	/* Allocate a block of memory to store the data in */
 	if (buffer_create(output, (segments * sizeof(*segs)) + isize,
 			  input->name) != 0)
 		return -1;
 	memset(output->data, 0, output->size);
 
-	doffset = (segments * sizeof(struct cbfs_payload_segment));
+	doffset = (segments * sizeof(*segs));
 
-	segs = (struct cbfs_payload_segment *)output->data;
+	/* set up for output marshaling. This is a bit
+	 * tricky as we are marshaling the headers at the front,
+	 * and the data starting after the headers. We need to convert
+	 * the headers to the right format but the data
+	 * passes through unchanged. Unlike most XDR code,
+	 * we are doing these two concurrently. The doffset is
+	 * used to compute the address for the raw data, and the
+	 * outheader is used to marshal the headers. To make it simpler
+	 * for The Reader, we set up the headers in a separate array,
+	 * then marshal them all at once to the output.
+	 */
 	segments = 0;
 
-	for (i = 0; i < ehdr->e_shnum; i++) {
+	for (i = 0; i < ehdr.e_shnum; i++) {
 		char *name;
-
-		if (i == ehdr->e_shstrndx)
+		if (i == ehdr.e_shstrndx)
 			continue;
 
 		if (shdr[i].sh_size == 0)
 			continue;
-
 		name = (char *)(strtab + shdr[i].sh_name);
-
 		if (!strcmp(name, ".note.pinfo")) {
 			segs[segments].type = PAYLOAD_SEGMENT_PARAMS;
 			segs[segments].load_addr = 0;
@@ -148,17 +166,13 @@
 	for (i = 0; i < headers; i++) {
 		if (phdr[i].p_type != PT_LOAD)
 			continue;
-
 		if (phdr[i].p_memsz == 0)
 			continue;
-
 		if (phdr[i].p_filesz == 0) {
 			segs[segments].type = PAYLOAD_SEGMENT_BSS;
-			segs[segments].load_addr =
-			    (uint64_t)htonll(phdr[i].p_paddr);
-			segs[segments].mem_len =
-			    (uint32_t)htonl(phdr[i].p_memsz);
-			segs[segments].offset = htonl(doffset);
+			segs[segments].load_addr = phdr[i].p_paddr;
+			segs[segments].mem_len = phdr[i].p_memsz;
+			segs[segments].offset = doffset;
 
 			segments++;
 			continue;
@@ -168,37 +182,37 @@
 			segs[segments].type = PAYLOAD_SEGMENT_CODE;
 		else
 			segs[segments].type = PAYLOAD_SEGMENT_DATA;
-		segs[segments].load_addr = (uint64_t)htonll(phdr[i].p_paddr);
-		segs[segments].mem_len = (uint32_t)htonl(phdr[i].p_memsz);
-		segs[segments].compression = htonl(algo);
-		segs[segments].offset = htonl(doffset);
+		segs[segments].load_addr = phdr[i].p_paddr;
+		segs[segments].mem_len = phdr[i].p_memsz;
+		segs[segments].compression = algo;
+		segs[segments].offset = doffset;
 
 		int len;
 		compress((char *)&header[phdr[i].p_offset],
 			 phdr[i].p_filesz, output->data + doffset, &len);
-		segs[segments].len = htonl(len);
+		segs[segments].len = len;
 
 		/* If the compressed section is larger, then use the
 		   original stuff */
 
 		if ((unsigned int)len > phdr[i].p_filesz) {
 			segs[segments].compression = 0;
-			segs[segments].len = htonl(phdr[i].p_filesz);
-
+			segs[segments].len = phdr[i].p_filesz;
 			memcpy(output->data + doffset,
 			       &header[phdr[i].p_offset], phdr[i].p_filesz);
 		}
 
-		doffset += ntohl(segs[segments].len);
-		osize += ntohl(segs[segments].len);
+		doffset += segs[segments].len;
+		osize += segs[segments].len;
 
 		segments++;
 	}
 
 	segs[segments].type = PAYLOAD_SEGMENT_ENTRY;
-	segs[segments++].load_addr = htonll(ehdr->e_entry);
+	segs[segments++].load_addr = ehdr.e_entry;
 
-	output->size = (segments * sizeof(struct cbfs_payload_segment)) + osize;
+	output->size = (segments * sizeof(*segs)) + osize;
+	xdr_segs(output, segs, segments);
 	return 0;
 }
 
@@ -209,7 +223,7 @@
 				 comp_algo algo)
 {
 	comp_func_ptr compress;
-	struct cbfs_payload_segment *segs;
+	struct cbfs_payload_segment segs[2];
 	int doffset, len = 0;
 
 	compress = compression_function(algo);
@@ -217,36 +231,35 @@
 		return -1;
 
 	DEBUG("start: parse_flat_binary_to_payload\n");
-	if (buffer_create(output, (2 * sizeof(*segs) + input->size),
+	if (buffer_create(output, (sizeof(segs) + input->size),
 			  input->name) != 0)
 		return -1;
 	memset(output->data, 0, output->size);
 
-	segs = (struct cbfs_payload_segment *)output->data;
 	doffset = (2 * sizeof(*segs));
 
 	/* Prepare code segment */
 	segs[0].type = PAYLOAD_SEGMENT_CODE;
-	segs[0].load_addr = htonll(loadaddress);
-	segs[0].mem_len = htonl(input->size);
-	segs[0].offset = htonl(doffset);
+	segs[0].load_addr = loadaddress;
+	segs[0].mem_len = input->size;
+	segs[0].offset = doffset;
 
 	compress(input->data, input->size, output->data + doffset, &len);
-	segs[0].compression = htonl(algo);
-	segs[0].len = htonl(len);
+	segs[0].compression = algo;
+	segs[0].len = len;
 
 	if ((unsigned int)len >= input->size) {
 		WARN("Compressing data would make it bigger - disabled.\n");
 		segs[0].compression = 0;
-		segs[0].len = htonl(input->size);
+		segs[0].len = input->size;
 		memcpy(output->data + doffset, input->data, input->size);
 	}
 
 	/* prepare entry point segment */
 	segs[1].type = PAYLOAD_SEGMENT_ENTRY;
-	segs[1].load_addr = htonll(entrypoint);
-	output->size = doffset + ntohl(segs[0].len);
-
+	segs[1].load_addr = entrypoint;
+	output->size = doffset + segs[0].len;
+	xdr_segs(output, segs, 2);
 	return 0;
 }
 
@@ -254,7 +267,7 @@
 			 struct buffer *output, comp_algo algo)
 {
 	comp_func_ptr compress;
-	struct cbfs_payload_segment *segs;
+	struct cbfs_payload_segment segs[2];
 	int doffset, len = 0;
 	firmware_volume_header_t *fv;
 	ffs_file_header_t *fh;
@@ -343,37 +356,36 @@
 		return -1;
 	}
 
-	if (buffer_create(output, (2 * sizeof(*segs) + input->size),
+	if (buffer_create(output, (sizeof(segs) + input->size),
 			  input->name) != 0)
 		return -1;
 
 	memset(output->data, 0, output->size);
 
-	segs = (struct cbfs_payload_segment *)output->data;
-	doffset = (2 * sizeof(*segs));
+	doffset = (sizeof(segs));
 
 	/* Prepare code segment */
 	segs[0].type = PAYLOAD_SEGMENT_CODE;
-	segs[0].load_addr = htonll(loadaddress);
-	segs[0].mem_len = htonl(input->size);
-	segs[0].offset = htonl(doffset);
+	segs[0].load_addr = loadaddress;
+	segs[0].mem_len = input->size;
+	segs[0].offset = doffset;
 
 	compress(input->data, input->size, output->data + doffset, &len);
-	segs[0].compression = htonl(algo);
-	segs[0].len = htonl(len);
+	segs[0].compression = algo;
+	segs[0].len = len;
 
 	if ((unsigned int)len >= input->size) {
 		WARN("Compressing data would make it bigger - disabled.\n");
 		segs[0].compression = 0;
-		segs[0].len = htonl(input->size);
+		segs[0].len = input->size;
 		memcpy(output->data + doffset, input->data, input->size);
 	}
 
 	/* prepare entry point segment */
 	segs[1].type = PAYLOAD_SEGMENT_ENTRY;
-	segs[1].load_addr = htonll(entrypoint);
-	output->size = doffset + ntohl(segs[0].len);
-
+	segs[1].load_addr = entrypoint;
+	output->size = doffset + segs[0].len;
+	xdr_segs(output, segs, 2);
 	return 0;
 
 }
diff --git a/util/cbfstool/cbfs-mkstage.c b/util/cbfstool/cbfs-mkstage.c
index 5c8014f..6a5f6f7 100644
--- a/util/cbfstool/cbfs-mkstage.c
+++ b/util/cbfstool/cbfs-mkstage.c
@@ -24,246 +24,9 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "elf.h"
 #include "common.h"
 #include "cbfs.h"
-#include "elf.h"
-
-/*
- * Short form: this is complicated, but we've tried making it simple
- * and we keep hitting problems with our ELF parsing.
- *
- * The ELF parsing situation has always been a bit tricky.  In fact,
- * we (and most others) have been getting it wrong in small ways for
- * years. Recently this has caused real trouble for the ARM V8 build.
- * In this file we attempt to finally get it right for all variations
- * of endian-ness and word size and target architectures and
- * architectures we might get run on. Phew!. To do this we borrow a
- * page from the FreeBSD NFS xdr model (see elf_ehdr and elf_phdr),
- * the Plan 9 endianness functions (see xdr.c), and Go interfaces (see
- * how we use buffer structs in this file). This ends up being a bit
- * wordy at the lowest level, but greatly simplifies the elf parsing
- * code and removes a common source of bugs, namely, forgetting to
- * flip type endianness when referencing a struct member.
- *
- * ELF files can have four combinations of data layout: 32/64, and
- * big/little endian.  Further, to add to the fun, depending on the
- * word size, the size of the ELF structs varies. The coreboot SELF
- * format is simpler in theory: it's supposed to be always BE, and the
- * various struct members allow room for growth: the entry point is
- * always 64 bits, for example, so the size of a SELF struct is
- * constant, regardless of target architecture word size.  Hence, we
- * need to do some transformation of the ELF files.
- *
- * A given architecture, realistically, only supports one of the four
- * combinations at a time as the 'native' format. Hence, our code has
- * been sprinkled with every variation of [nh]to[hn][sll] over the
- * years. We've never quite gotten it all right, however, and a quick
- * pass over this code revealed another bug.  It's all worked because,
- * until now, all the working platforms that had CBFS were 32 LE. Even then,
- * however, bugs crept in: we recently realized that we're not
- * transforming the entry point to big format when we store into the
- * SELF image.
- *
- * The problem is essentially an XDR operation:
- * we have something in a foreign format and need to transform it.
- * It's most like XDR because:
- * 1) the byte order can be wrong
- * 2) the word size can be wrong
- * 3) the size of elements in the stream depends on the value
- *    of other elements in the stream
- * it's not like XDR because:
- * 1) the byte order can be right
- * 2) the word size can be right
- * 3) the struct members are all on a natural alignment
- *
- * Hence, this new approach.  To cover word size issues, we *always*
- * transform the two structs we care about, the file header and
- * program header, into a native struct in the 64 bit format:
- *
- * [32,little] -> [Elf64_Ehdr, Elf64_Phdr]
- * [64,little] -> [Elf64_Ehdr, Elf64_Phdr]
- * [32,big] -> [Elf64_Ehdr, Elf64_Phdr]
- * [64,big] -> [Elf64_Ehdr, Elf64_Phdr]
- * Then we just use those structs, and all the need for inline ntoh* goes away,
- * as well as all the chances for error.
- * This works because all the SELF structs have fields large enough for
- * the largest ELF 64 struct members, and all the Elf64 struct members
- * are at least large enough for all ELF 32 struct members.
- * We end up with one function to do all our ELF parsing, and two functions
- * to transform the headers. For the put case, we also have
- * XDR functions, and hopefully we'll never again spend 5 years with the
- * wrong endian-ness on an output value :-)
- * This should work for all word sizes and endianness we hope to target.
- * I *really* don't want to be here for 128 bit addresses.
- *
- * The parse functions are called with a pointer to an input buffer
- * struct. One might ask: are there enough bytes in the input buffer?
- * We know there need to be at *least* sizeof(Elf32_Ehdr) +
- * sizeof(Elf32_Phdr) bytes. Realistically, there has to be some data
- * too.  If we start to worry, though we have not in the past, we
- * might apply the simple test: the input buffer needs to be at least
- * sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) bytes because, even if it's
- * ELF 32, there's got to be *some* data! This is not theoretically
- * accurate but it is actually good enough in practice. It allows the
- * header transformation code to ignore the possibility of underrun.
- *
- * We also must accomodate different ELF files, and hence formats,
- * in the same cbfs invocation. We might load a 64-bit payload
- * on a 32-bit machine; we might even have a mixed armv7/armv8
- * SOC or even a system with an x86/ARM!
- *
- * A possibly problematic (though unlikely to be so) assumption
- * is that we expect the BIOS to remain in the lowest 32 bits
- * of the physical address space. Since ARMV8 has standardized
- * on that, and x86_64 also has, this seems a safe assumption.
- *
- * To repeat, ELF structs are different sizes because ELF struct
- * members are different sizes, depending on values in the ELF file
- * header. For this we use the functions defined in xdr.c, which
- * consume bytes, convert the endianness, and advance the data pointer
- * in the buffer struct.
- */
-
-/* Get the ident array, so we can figure out
- * endian-ness, word size, and in future other useful
- * parameters
- */
-static void
-elf_eident(struct buffer *input, Elf64_Ehdr *ehdr)
-{
-	memmove(ehdr->e_ident, input->data, sizeof(ehdr->e_ident));
-	input->data += sizeof(ehdr->e_ident);
-	input->size -= sizeof(ehdr->e_ident);
-}
-
-
-static void
-elf_ehdr(struct buffer *input, Elf64_Ehdr *ehdr, struct xdr *xdr, int bit64)
-{
-	ehdr->e_type = xdr->get16(input);
-	ehdr->e_machine = xdr->get16(input);
-	ehdr->e_version = xdr->get32(input);
-	if (bit64){
-		ehdr->e_entry = xdr->get64(input);
-		ehdr->e_phoff = xdr->get64(input);
-		ehdr->e_shoff = xdr->get64(input);
-	} else {
-		ehdr->e_entry = xdr->get32(input);
-		ehdr->e_phoff = xdr->get32(input);
-		ehdr->e_shoff = xdr->get32(input);
-	}
-	ehdr->e_flags = xdr->get32(input);
-	ehdr->e_ehsize = xdr->get16(input);
-	ehdr->e_phentsize = xdr->get16(input);
-	ehdr->e_phnum = xdr->get16(input);
-	ehdr->e_shentsize = xdr->get16(input);
-	ehdr->e_shnum = xdr->get16(input);
-	ehdr->e_shstrndx = xdr->get16(input);
-}
-
-static void
-elf_phdr(struct buffer *pinput, Elf64_Phdr *phdr,
-	 int entsize, struct xdr *xdr, int bit64)
-{
-	/*
-	 * The entsize need not be sizeof(*phdr).
-	 * Hence, it is easier to keep a copy of the input,
-	 * as the xdr functions may not advance the input
-	 * pointer the full entsize; rather than get tricky
-	 * we just advance it below.
-	 */
-	struct buffer input = *pinput;
-	if (bit64){
-		phdr->p_type = xdr->get32(&input);
-		phdr->p_flags = xdr->get32(&input);
-		phdr->p_offset = xdr->get64(&input);
-		phdr->p_vaddr = xdr->get64(&input);
-		phdr->p_paddr = xdr->get64(&input);
-		phdr->p_filesz = xdr->get64(&input);
-		phdr->p_memsz = xdr->get64(&input);
-		phdr->p_align = xdr->get64(&input);
-	} else {
-		phdr->p_type = xdr->get32(&input);
-		phdr->p_offset = xdr->get32(&input);
-		phdr->p_vaddr = xdr->get32(&input);
-		phdr->p_paddr = xdr->get32(&input);
-		phdr->p_filesz = xdr->get32(&input);
-		phdr->p_memsz = xdr->get32(&input);
-		phdr->p_flags = xdr->get32(&input);
-		phdr->p_align = xdr->get32(&input);
-	}
-	pinput->size -= entsize;
-	pinput->data += entsize;
-}
-
-/* Get the headers from the buffer.
- * Return -1 in the event of an error.
- */
-static int
-elf_headers(const struct buffer *pinput, Elf64_Ehdr *ehdr, Elf64_Phdr **pphdr)
-{
-	int i;
-	struct xdr *xdr = &xdr_le;
-	int bit64 = 0;
-	struct buffer input = *(struct buffer *)pinput;
-	struct buffer phdr_buf;
-	Elf64_Phdr *phdr;
-
-	if (!iself((unsigned char *)pinput->data)) {
-		ERROR("The stage file is not in ELF format!\n");
-		return -1;
-	}
-
-	elf_eident(&input, ehdr);
-	bit64 = ehdr->e_ident[EI_CLASS] == ELFCLASS64;
-	/* Assume LE unless we are sure otherwise.
-	 * We're not going to take on the task of
-	 * fully validating the ELF file. That way
-	 * lies madness.
-	 */
-	if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-		xdr = &xdr_be;
-
-	elf_ehdr(&input, ehdr, xdr, bit64);
-
-	// The tool may work in architecture-independent way.
-	if (arch != CBFS_ARCHITECTURE_UNKNOWN &&
-	    !((ehdr->e_machine == EM_ARM) && (arch == CBFS_ARCHITECTURE_ARMV7)) &&
-	    !((ehdr->e_machine == EM_386) && (arch == CBFS_ARCHITECTURE_X86))) {
-		ERROR("The stage file has the wrong architecture\n");
-		return -1;
-	}
-
-	if (pinput->size < ehdr->e_phoff){
-		ERROR("The program header offset is greater than "
-		      "the remaining file size."
-		      "%ld bytes left, program header offset is %ld \n",
-		      pinput->size, ehdr->e_phoff);
-		return -1;
-	}
-	/* cons up an input buffer for the headers.
-	 * Note that the program headers can be anywhere,
-	 * per the ELF spec, You'd be surprised how many ELF
-	 * readers miss this little detail.
-	 */
-	phdr_buf.data = &pinput->data[ehdr->e_phoff];
-	phdr_buf.size = ehdr->e_phentsize * ehdr->e_phnum;
-	if (phdr_buf.size > (pinput->size - ehdr->e_phoff)){
-		ERROR("The file is not large enough for the program headers."
-		      "%ld bytes left, %ld bytes of headers\n",
-		      pinput->size - ehdr->e_phoff, phdr_buf.size);
-		return -1;
-	}
-	/* gather up all the phdrs.
-	 * We do them all at once because there is more
-	 * than one loop over all the phdrs.
-	 */
-	phdr = calloc(sizeof(*phdr), ehdr->e_phnum);
-	for (i = 0; i < ehdr->e_phnum; i++)
-		elf_phdr(&phdr_buf, &phdr[i], ehdr->e_phentsize, xdr, bit64);
-	*pphdr = phdr;
-	return 0;
-}
 
 /* returns size of result, or -1 if error.
  * Note that, with the new code, this function
@@ -287,7 +50,7 @@
 
 	DEBUG("start: parse_elf_to_stage(location=0x%x)\n", *location);
 
-	if (elf_headers(input, &ehdr, &phdr) < 0)
+	if (elf_headers(input, &ehdr, &phdr, NULL) < 0)
 		return -1;
 
 	headers = ehdr.e_phnum;
diff --git a/util/cbfstool/cbfs.h b/util/cbfstool/cbfs.h
index 35d0670..585a26d 100644
--- a/util/cbfstool/cbfs.h
+++ b/util/cbfstool/cbfs.h
@@ -20,6 +20,18 @@
 #define __CBFS_H
 
 #include <stdint.h>
+#include "elf.h"
+
+/* create a magic number in host-byte order.
+ * b3 is the high order byte.
+ * in the coreboot tools, we go with the 32-bit
+ * magic number convention.
+ * This was an inline func but that breaks anything
+ * that uses it in a case statement.
+ */
+
+#define makemagic(b3, b2, b1, b0)\
+	(((b3)<<24) | ((b2) << 16) | ((b1) << 8) | (b0))
 
 #define CBFS_HEADER_MAGIC  0x4F524243
 #define CBFS_HEADPTR_ADDR_X86 0xFFFFFFFC
@@ -60,11 +72,11 @@
 	uint32_t memlen;
 } __attribute__ ((packed));
 
-#define PAYLOAD_SEGMENT_CODE	0x45444F43
-#define PAYLOAD_SEGMENT_DATA	0x41544144
-#define PAYLOAD_SEGMENT_BSS	0x20535342
-#define PAYLOAD_SEGMENT_PARAMS	0x41524150
-#define PAYLOAD_SEGMENT_ENTRY	0x52544E45
+#define PAYLOAD_SEGMENT_CODE	makemagic('C', 'O', 'D', 'E')
+#define PAYLOAD_SEGMENT_DATA	makemagic('D', 'A', 'T', 'A')
+#define PAYLOAD_SEGMENT_BSS     makemagic(' ', 'B', 'S', 'S')
+#define PAYLOAD_SEGMENT_PARAMS	makemagic('P', 'A', 'R', 'A')
+#define PAYLOAD_SEGMENT_ENTRY	makemagic('E', 'N', 'T', 'R')
 
 struct cbfs_payload_segment {
 	uint32_t type;
@@ -110,7 +122,22 @@
 int cbfs_file_header(unsigned long physaddr);
 #define CBFS_NAME(_c) (((char *) (_c)) + sizeof(struct cbfs_file))
 #define CBFS_SUBHEADER(_p) ( (void *) ((((uint8_t *) (_p)) + ntohl((_p)->offset))) )
+/* cbfs_image.c */
+uint32_t get_cbfs_entry_type(const char *name, uint32_t default_value);
+const char *get_cbfs_entry_type_name(uint32_t type);
+uint32_t get_cbfs_compression(const char *name, uint32_t unknown);
 
+/* common.c */
+int find_master_header(void *romarea, size_t size);
+void recalculate_rom_geometry(void *romarea);
 struct cbfs_file *cbfs_create_empty_file(uint32_t physaddr, uint32_t size);
+const char *strfiletype(uint32_t number);
+
+/* elfheaders.c */
+int
+elf_headers(const struct buffer *pinput,
+	    Elf64_Ehdr *ehdr,
+	    Elf64_Phdr **pphdr,
+	    Elf64_Shdr **pshdr);
 
 #endif
diff --git a/util/cbfstool/cbfs_image.c b/util/cbfstool/cbfs_image.c
index 2cd0c7a..363691f 100644
--- a/util/cbfstool/cbfs_image.c
+++ b/util/cbfstool/cbfs_image.c
@@ -24,6 +24,7 @@
 #include <string.h>
 
 #include "common.h"
+#include "elf.h"
 #include "cbfs_image.h"
 
 /* The file name align is not defined in CBFS spec -- only a preference by
diff --git a/util/cbfstool/cbfstool.c b/util/cbfstool/cbfstool.c
index 34002a9..9935f51 100644
--- a/util/cbfstool/cbfstool.c
+++ b/util/cbfstool/cbfstool.c
@@ -26,6 +26,7 @@
 #include <unistd.h>
 #include <getopt.h>
 #include "common.h"
+#include "elf.h"
 #include "cbfs.h"
 #include "cbfs_image.h"
 #include "fit.h"
diff --git a/util/cbfstool/common.c b/util/cbfstool/common.c
index 8f38a4a..356ba2f 100644
--- a/util/cbfstool/common.c
+++ b/util/cbfstool/common.c
@@ -23,9 +23,9 @@
 #include <stdlib.h>
 #include <string.h>
 #include <libgen.h>
+#include "elf.h"
 #include "common.h"
 #include "cbfs.h"
-#include "elf.h"
 
 /* Utilities */
 
diff --git a/util/cbfstool/common.h b/util/cbfstool/common.h
index ed75a7f..e49a3f6 100644
--- a/util/cbfstool/common.h
+++ b/util/cbfstool/common.h
@@ -144,17 +144,7 @@
 	void (*put64)(struct buffer *input, uint64_t val);
 };
 
-/* common.c */
-
-int find_master_header(void *romarea, size_t size);
-void recalculate_rom_geometry(void *romarea);
-const char *strfiletype(uint32_t number);
-
-/* cbfs_image.c */
-uint32_t get_cbfs_entry_type(const char *name, uint32_t default_value);
-const char *get_cbfs_entry_type_name(uint32_t type);
-uint32_t get_cbfs_compression(const char *name, uint32_t unknown);
-
+/* xdr.c */
 extern struct xdr xdr_le, xdr_be;
 
 #endif
diff --git a/util/cbfstool/elfheaders.c b/util/cbfstool/elfheaders.c
new file mode 100644
index 0000000..fd7a1a1
--- /dev/null
+++ b/util/cbfstool/elfheaders.c
@@ -0,0 +1,343 @@
+/*
+ * elf header parsing.
+ *
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA, 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "elf.h"
+#include "common.h"
+#include "cbfs.h"
+
+/*
+ * Short form: this is complicated, but we've tried making it simple
+ * and we keep hitting problems with our ELF parsing.
+ *
+ * The ELF parsing situation has always been a bit tricky.  In fact,
+ * we (and most others) have been getting it wrong in small ways for
+ * years. Recently this has caused real trouble for the ARM V8 build.
+ * In this file we attempt to finally get it right for all variations
+ * of endian-ness and word size and target architectures and
+ * architectures we might get run on. Phew!. To do this we borrow a
+ * page from the FreeBSD NFS xdr model (see elf_ehdr and elf_phdr),
+ * the Plan 9 endianness functions (see xdr.c), and Go interfaces (see
+ * how we use buffer structs in this file). This ends up being a bit
+ * wordy at the lowest level, but greatly simplifies the elf parsing
+ * code and removes a common source of bugs, namely, forgetting to
+ * flip type endianness when referencing a struct member.
+ *
+ * ELF files can have four combinations of data layout: 32/64, and
+ * big/little endian.  Further, to add to the fun, depending on the
+ * word size, the size of the ELF structs varies. The coreboot SELF
+ * format is simpler in theory: it's supposed to be always BE, and the
+ * various struct members allow room for growth: the entry point is
+ * always 64 bits, for example, so the size of a SELF struct is
+ * constant, regardless of target architecture word size.  Hence, we
+ * need to do some transformation of the ELF files.
+ *
+ * A given architecture, realistically, only supports one of the four
+ * combinations at a time as the 'native' format. Hence, our code has
+ * been sprinkled with every variation of [nh]to[hn][sll] over the
+ * years. We've never quite gotten it all right, however, and a quick
+ * pass over this code revealed another bug.  It's all worked because,
+ * until now, all the working platforms that had CBFS were 32 LE. Even then,
+ * however, bugs crept in: we recently realized that we're not
+ * transforming the entry point to big format when we store into the
+ * SELF image.
+ *
+ * The problem is essentially an XDR operation:
+ * we have something in a foreign format and need to transform it.
+ * It's most like XDR because:
+ * 1) the byte order can be wrong
+ * 2) the word size can be wrong
+ * 3) the size of elements in the stream depends on the value
+ *    of other elements in the stream
+ * it's not like XDR because:
+ * 1) the byte order can be right
+ * 2) the word size can be right
+ * 3) the struct members are all on a natural alignment
+ *
+ * Hence, this new approach.  To cover word size issues, we *always*
+ * transform the two structs we care about, the file header and
+ * program header, into a native struct in the 64 bit format:
+ *
+ * [32,little] -> [Elf64_Ehdr, Elf64_Phdr]
+ * [64,little] -> [Elf64_Ehdr, Elf64_Phdr]
+ * [32,big] -> [Elf64_Ehdr, Elf64_Phdr]
+ * [64,big] -> [Elf64_Ehdr, Elf64_Phdr]
+ * Then we just use those structs, and all the need for inline ntoh* goes away,
+ * as well as all the chances for error.
+ * This works because all the SELF structs have fields large enough for
+ * the largest ELF 64 struct members, and all the Elf64 struct members
+ * are at least large enough for all ELF 32 struct members.
+ * We end up with one function to do all our ELF parsing, and two functions
+ * to transform the headers. For the put case, we also have
+ * XDR functions, and hopefully we'll never again spend 5 years with the
+ * wrong endian-ness on an output value :-)
+ * This should work for all word sizes and endianness we hope to target.
+ * I *really* don't want to be here for 128 bit addresses.
+ *
+ * The parse functions are called with a pointer to an input buffer
+ * struct. One might ask: are there enough bytes in the input buffer?
+ * We know there need to be at *least* sizeof(Elf32_Ehdr) +
+ * sizeof(Elf32_Phdr) bytes. Realistically, there has to be some data
+ * too.  If we start to worry, though we have not in the past, we
+ * might apply the simple test: the input buffer needs to be at least
+ * sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) bytes because, even if it's
+ * ELF 32, there's got to be *some* data! This is not theoretically
+ * accurate but it is actually good enough in practice. It allows the
+ * header transformation code to ignore the possibility of underrun.
+ *
+ * We also must accomodate different ELF files, and hence formats,
+ * in the same cbfs invocation. We might load a 64-bit payload
+ * on a 32-bit machine; we might even have a mixed armv7/armv8
+ * SOC or even a system with an x86/ARM!
+ *
+ * A possibly problematic (though unlikely to be so) assumption
+ * is that we expect the BIOS to remain in the lowest 32 bits
+ * of the physical address space. Since ARMV8 has standardized
+ * on that, and x86_64 also has, this seems a safe assumption.
+ *
+ * To repeat, ELF structs are different sizes because ELF struct
+ * members are different sizes, depending on values in the ELF file
+ * header. For this we use the functions defined in xdr.c, which
+ * consume bytes, convert the endianness, and advance the data pointer
+ * in the buffer struct.
+ */
+
+/* Get the ident array, so we can figure out
+ * endian-ness, word size, and in future other useful
+ * parameters
+ */
+static void
+elf_eident(struct buffer *input, Elf64_Ehdr *ehdr)
+{
+	memmove(ehdr->e_ident, input->data, sizeof(ehdr->e_ident));
+	input->data += sizeof(ehdr->e_ident);
+	input->size -= sizeof(ehdr->e_ident);
+}
+
+
+static void
+elf_ehdr(struct buffer *input, Elf64_Ehdr *ehdr, struct xdr *xdr, int bit64)
+{
+	ehdr->e_type = xdr->get16(input);
+	ehdr->e_machine = xdr->get16(input);
+	ehdr->e_version = xdr->get32(input);
+	if (bit64){
+		ehdr->e_entry = xdr->get64(input);
+		ehdr->e_phoff = xdr->get64(input);
+		ehdr->e_shoff = xdr->get64(input);
+	} else {
+		ehdr->e_entry = xdr->get32(input);
+		ehdr->e_phoff = xdr->get32(input);
+		ehdr->e_shoff = xdr->get32(input);
+	}
+	ehdr->e_flags = xdr->get32(input);
+	ehdr->e_ehsize = xdr->get16(input);
+	ehdr->e_phentsize = xdr->get16(input);
+	ehdr->e_phnum = xdr->get16(input);
+	ehdr->e_shentsize = xdr->get16(input);
+	ehdr->e_shnum = xdr->get16(input);
+	ehdr->e_shstrndx = xdr->get16(input);
+}
+
+static void
+elf_phdr(struct buffer *pinput, Elf64_Phdr *phdr,
+	 int entsize, struct xdr *xdr, int bit64)
+{
+	/*
+	 * The entsize need not be sizeof(*phdr).
+	 * Hence, it is easier to keep a copy of the input,
+	 * as the xdr functions may not advance the input
+	 * pointer the full entsize; rather than get tricky
+	 * we just advance it below.
+	 */
+	struct buffer input = *pinput;
+	if (bit64){
+		phdr->p_type = xdr->get32(&input);
+		phdr->p_flags = xdr->get32(&input);
+		phdr->p_offset = xdr->get64(&input);
+		phdr->p_vaddr = xdr->get64(&input);
+		phdr->p_paddr = xdr->get64(&input);
+		phdr->p_filesz = xdr->get64(&input);
+		phdr->p_memsz = xdr->get64(&input);
+		phdr->p_align = xdr->get64(&input);
+	} else {
+		phdr->p_type = xdr->get32(&input);
+		phdr->p_offset = xdr->get32(&input);
+		phdr->p_vaddr = xdr->get32(&input);
+		phdr->p_paddr = xdr->get32(&input);
+		phdr->p_filesz = xdr->get32(&input);
+		phdr->p_memsz = xdr->get32(&input);
+		phdr->p_flags = xdr->get32(&input);
+		phdr->p_align = xdr->get32(&input);
+	}
+	pinput->size -= entsize;
+	pinput->data += entsize;
+}
+
+static void
+elf_shdr(struct buffer *pinput, Elf64_Shdr *shdr,
+	 int entsize, struct xdr *xdr, int bit64)
+{
+	/*
+	 * The entsize need not be sizeof(*shdr).
+	 * Hence, it is easier to keep a copy of the input,
+	 * as the xdr functions may not advance the input
+	 * pointer the full entsize; rather than get tricky
+	 * we just advance it below.
+	 */
+	struct buffer input = *pinput;
+	if (bit64){
+		shdr->sh_name = xdr->get32(&input);
+		shdr->sh_type = xdr->get32(&input);
+		shdr->sh_flags = xdr->get64(&input);
+		shdr->sh_addr = xdr->get64(&input);
+		shdr->sh_offset = xdr->get64(&input);
+		shdr->sh_size= xdr->get64(&input);
+		shdr->sh_link = xdr->get32(&input);
+		shdr->sh_info = xdr->get32(&input);
+		shdr->sh_addralign = xdr->get64(&input);
+		shdr->sh_entsize = xdr->get64(&input);
+	} else {
+		shdr->sh_name = xdr->get32(&input);
+		shdr->sh_type = xdr->get32(&input);
+		shdr->sh_flags = xdr->get32(&input);
+		shdr->sh_addr = xdr->get32(&input);
+		shdr->sh_offset = xdr->get32(&input);
+		shdr->sh_size = xdr->get32(&input);
+		shdr->sh_link = xdr->get32(&input);
+		shdr->sh_info = xdr->get32(&input);
+		shdr->sh_addralign = xdr->get32(&input);
+		shdr->sh_entsize = xdr->get32(&input);
+	}
+	pinput->size -= entsize;
+	pinput->data += entsize;
+}
+
+/* Get the headers from the buffer.
+ * Return -1 in the event of an error.
+ * The section headers are optional; if NULL
+ * is passed in for pshdr they won't be parsed.
+ * We don't (yet) make payload parsing optional
+ * because we've never seen a use case.
+ */
+int
+elf_headers(const struct buffer *pinput,
+	    Elf64_Ehdr *ehdr,
+	    Elf64_Phdr **pphdr,
+	    Elf64_Shdr **pshdr)
+{
+	int i;
+	struct xdr *xdr = &xdr_le;
+	int bit64 = 0;
+	struct buffer input = *(struct buffer *)pinput;
+	struct buffer phdr_buf;
+	struct buffer shdr_buf;
+	Elf64_Phdr *phdr;
+	Elf64_Shdr *shdr;
+
+	if (!iself((unsigned char *)pinput->data)) {
+		ERROR("The stage file is not in ELF format!\n");
+		return -1;
+	}
+
+	elf_eident(&input, ehdr);
+	bit64 = ehdr->e_ident[EI_CLASS] == ELFCLASS64;
+	/* Assume LE unless we are sure otherwise.
+	 * We're not going to take on the task of
+	 * fully validating the ELF file. That way
+	 * lies madness.
+	 */
+	if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+		xdr = &xdr_be;
+
+	elf_ehdr(&input, ehdr, xdr, bit64);
+
+	// The tool may work in architecture-independent way.
+	if (arch != CBFS_ARCHITECTURE_UNKNOWN &&
+	    !((ehdr->e_machine == EM_ARM) && (arch == CBFS_ARCHITECTURE_ARMV7)) &&
+	    !((ehdr->e_machine == EM_386) && (arch == CBFS_ARCHITECTURE_X86))) {
+		ERROR("The stage file has the wrong architecture\n");
+		return -1;
+	}
+
+	if (pinput->size < ehdr->e_phoff){
+		ERROR("The program header offset is greater than "
+		      "the remaining file size."
+		      "%ld bytes left, program header offset is %ld \n",
+		      pinput->size, ehdr->e_phoff);
+		return -1;
+	}
+	/* cons up an input buffer for the headers.
+	 * Note that the program headers can be anywhere,
+	 * per the ELF spec, You'd be surprised how many ELF
+	 * readers miss this little detail.
+	 */
+	phdr_buf.data = &pinput->data[ehdr->e_phoff];
+	phdr_buf.size = ehdr->e_phentsize * ehdr->e_phnum;
+	if (phdr_buf.size > (pinput->size - ehdr->e_phoff)){
+		ERROR("The file is not large enough for the program headers."
+		      "%ld bytes left, %ld bytes of headers\n",
+		      pinput->size - ehdr->e_phoff, phdr_buf.size);
+		return -1;
+	}
+	/* gather up all the phdrs.
+	 * We do them all at once because there is more
+	 * than one loop over all the phdrs.
+	 */
+	phdr = calloc(sizeof(*phdr), ehdr->e_phnum);
+	for (i = 0; i < ehdr->e_phnum; i++)
+		elf_phdr(&phdr_buf, &phdr[i], ehdr->e_phentsize, xdr, bit64);
+	*pphdr = phdr;
+
+	if (!pshdr)
+		return 0;
+
+	if (pinput->size < ehdr->e_shoff){
+		ERROR("The section header offset is greater than "
+		      "the remaining file size."
+		      "%ld bytes left, program header offset is %ld \n",
+		      pinput->size, ehdr->e_shoff);
+		return -1;
+	}
+	/* cons up an input buffer for the section headers.
+	 * Note that the section headers can be anywhere,
+	 * per the ELF spec, You'd be surprised how many ELF
+	 * readers miss this little detail.
+	 */
+	shdr_buf.data = &pinput->data[ehdr->e_shoff];
+	shdr_buf.size = ehdr->e_shentsize * ehdr->e_shnum;
+	if (shdr_buf.size > (pinput->size - ehdr->e_shoff)){
+		ERROR("The file is not large enough for the section headers."
+		      "%ld bytes left, %ld bytes of headers\n",
+		      pinput->size - ehdr->e_shoff, shdr_buf.size);
+		return -1;
+	}
+	/* gather up all the shdrs. */
+
+	shdr = calloc(sizeof(*shdr), ehdr->e_shnum);
+	for (i = 0; i < ehdr->e_shnum; i++)
+		elf_shdr(&shdr_buf, &shdr[i], ehdr->e_shentsize, xdr, bit64);
+	*pshdr = shdr;
+
+	return 0;
+}
+
diff --git a/util/cbfstool/fit.c b/util/cbfstool/fit.c
index f15ccf5..a368dad 100644
--- a/util/cbfstool/fit.c
+++ b/util/cbfstool/fit.c
@@ -23,6 +23,7 @@
 #include <stdio.h>
 
 #include "common.h"
+#include "elf.h"
 #include "cbfs.h"
 #include "cbfs_image.h"
 #include "fit.h"