Fixup previous memcpy optimization.
Different gcc versions handle __builtin_memcpy differently.
Add -minline-all-string to force inlining of memcpy on old gcc.
Always use __builtin_memcpy for all memcpy calls.
Use memcpy4() for the option rom case where 4-byte accesses is important.
diff --git a/Makefile b/Makefile
index 1cf7ba2..19e63d4 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,8 @@
COMMONCFLAGS = -Wall -Os -MD -m32 -march=i386 -mregparm=3 \
-mpreferred-stack-boundary=2 -mrtd -freg-struct-return \
-ffreestanding -fwhole-program -fomit-frame-pointer \
- -fno-delete-null-pointer-checks -Wno-strict-aliasing
+ -fno-delete-null-pointer-checks -Wno-strict-aliasing \
+ -minline-all-stringops
COMMONCFLAGS += $(call cc-option,$(CC),-nopie,)
COMMONCFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
COMMONCFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)