x86 memcpy: Copy 4 bytes at once
This is a slight improvement over the rep movsb loop
Change-Id: Id71d9bfe5330b154a5c62fac85ce3955ae89b057
Signed-off-by: Stefan Reinauer <reinauer@google.com>
Reviewed-on: http://review.coreboot.org/1742
Tested-by: build bot (Jenkins)
Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
diff --git a/src/arch/x86/lib/memcpy.c b/src/arch/x86/lib/memcpy.c
index f8607cf..7f079ce 100644
--- a/src/arch/x86/lib/memcpy.c
+++ b/src/arch/x86/lib/memcpy.c
@@ -5,11 +5,13 @@
unsigned long d0, d1, d2;
asm volatile(
- "rep movsb"
- : "=S"(d0), "=D"(d1), "=c"(d2)
- : "0"(src), "1"(dest), "2"(n)
+ "rep ; movsl\n\t"
+ "movl %4,%%ecx\n\t"
+ "rep ; movsb\n\t"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
: "memory"
- );
+ );
return dest;
}