lib: Unify log2() and related functions

This patch adds a few bit counting functions that are commonly needed
for certain register calculations. We previously had a log2()
implementation already, but it was awkwardly split between some C code
that's only available in ramstage and an optimized x86-specific
implementation in pre-RAM that prevented other archs from pulling it
into earlier stages.

Using __builtin_clz() as the baseline allows GCC to inline optimized
assembly for most archs (including CLZ on ARM/ARM64 and BSR on x86), and
to perform constant-folding if possible. What was previously named log2f
on pre-RAM x86 is now ffs, since that's the standard name for that
operation and I honestly don't have the slightest idea how it could've
ever ended up being called log2f (which in POSIX is 'binary(2) LOGarithm
with Float result, whereas the Find First Set operation has no direct
correlation to logarithms that I know of). Make ffs result 0-based
instead of the POSIX standard's 1-based since that is consistent with
clz, log2 and the former log2f, and generally closer to what you want
for most applications (a value that can directly be used as a shift to
reach the found bit). Call it __ffs() instead of ffs() to avoid problems
when importing code, since that's what Linux uses for the 0-based
operation.

CQ-DEPEND=CL:273023
BRANCH=None
BUG=None
TEST=Built on Big, Falco, Jerry, Oak and Urara. Compared old and new
log2() and __ffs() results on Falco for a bunch of test values.

Change-Id: I599209b342059e17b3130621edb6b6bbeae26876
Signed-off-by: Patrick Georgi <pgeorgi@chromium.org>
Original-Commit-Id: 3701a16ae944ecff9c54fa9a50d28015690fcb2f
Original-Change-Id: I60f7cf893792508188fa04d088401a8bca4b4af6
Original-Signed-off-by: Julius Werner <jwerner@chromium.org>
Original-Reviewed-on: https://chromium-review.googlesource.com/273008
Original-Reviewed-by: Patrick Georgi <pgeorgi@chromium.org>
Reviewed-on: http://review.coreboot.org/10394
Tested-by: build bot (Jenkins)
Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
diff --git a/src/arch/arm/include/utils.h b/src/arch/arm/include/utils.h
deleted file mode 100644
index 2482c6b..0000000
--- a/src/arch/arm/include/utils.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * (C) Copyright 2010
- * Texas Instruments, <www.ti.com>
- * Aneesh V <aneesh@ti.com>
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc.
- */
-#ifndef _UTILS_H_
-#define _UTILS_H_
-
-static inline s32 log_2_n_round_up(u32 n)
-{
-	s32 log2n = -1;
-	u32 temp = n;
-
-	while (temp) {
-		log2n++;
-		temp >>= 1;
-	}
-
-	if (n & (n - 1))
-		return log2n + 1; /* not power of 2 - round up */
-	else
-		return log2n; /* power of 2 */
-}
-
-static inline s32 log_2_n_round_down(u32 n)
-{
-	s32 log2n = -1;
-	u32 temp = n;
-
-	while (temp) {
-		log2n++;
-		temp >>= 1;
-	}
-
-	return log2n;
-}
-
-#endif
diff --git a/src/arch/x86/include/arch/io.h b/src/arch/x86/include/arch/io.h
index ce94773..5c85019 100644
--- a/src/arch/x86/include/arch/io.h
+++ b/src/arch/x86/include/arch/io.h
@@ -174,8 +174,8 @@
 }
 
 /* Conflicts with definition in lib.h */
-#if defined(__ROMCC__) || defined(__SMM__)
-static inline int log2(int value)
+#if defined(__ROMCC__)
+static inline int log2(u32 value)
 {
         unsigned int r = 0;
         __asm__ volatile (
@@ -187,10 +187,8 @@
         return r;
 
 }
-#endif
 
-#if defined(__PRE_RAM__) || defined(__SMM__)
-static inline int log2f(int value)
+static inline int __ffs(u32 value)
 {
         unsigned int r = 0;
         __asm__ volatile (
diff --git a/src/include/lib.h b/src/include/lib.h
index 7ad33dd..b81b1b1 100644
--- a/src/include/lib.h
+++ b/src/include/lib.h
@@ -24,12 +24,6 @@
 #include <stdint.h>
 #include <types.h>
 
-#if !defined(__ROMCC__) /* Conflicts with inline function in arch/io.h */
-/* Defined in src/lib/clog2.c */
-unsigned long log2(unsigned long x);
-#endif
-unsigned long log2_ceil(unsigned long x);
-
 /* Defined in src/lib/lzma.c */
 unsigned long ulzma(unsigned char *src, unsigned char *dst);
 
@@ -49,4 +43,16 @@
 void hexdump(const void *memory, size_t length);
 void hexdump32(char LEVEL, const void *d, size_t len);
 
+#if !defined(__ROMCC__)
+/* Count Leading Zeroes: clz(0) == 32, clz(0xf) == 28, clz(1 << 31) == 0 */
+static inline int clz(u32 x) { return x ? __builtin_clz(x) : sizeof(x) * 8; }
+/* Integer binary logarithm (rounding down): log2(0) == -1, log2(5) == 2 */
+static inline int log2(u32 x) { return sizeof(x) * 8 - clz(x) - 1; }
+/* Find First Set: __ffs(1) == 0, __ffs(0) == -1, __ffs(1<<31) == 31 */
+static inline int __ffs(u32 x) { return log2(x & (u32)(-(s32)x)); }
+#endif
+
+/* Integer binary logarithm (rounding up): log2_ceil(0) == -1, log2(5) == 3 */
+static inline int log2_ceil(u32 x) { return (x == 0) ? -1 : log2(x * 2 - 1); }
+
 #endif /* __LIB_H__ */
diff --git a/src/lib/Makefile.inc b/src/lib/Makefile.inc
index 557cd66..76c8fd3 100644
--- a/src/lib/Makefile.inc
+++ b/src/lib/Makefile.inc
@@ -25,7 +25,7 @@
 bootblock-y += cbfs_boot_props.c
 bootblock-$(CONFIG_COMMON_CBFS_SPI_WRAPPER) += cbfs_spi.c
 bootblock-$(CONFIG_GENERIC_GPIO_LIB) += gpio.c
-
+bootblock-y += libgcc.c
 bootblock-$(CONFIG_GENERIC_UDELAY) += timer.c
 
 bootblock-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
@@ -45,6 +45,7 @@
 verstage-y += halt.c
 verstage-y += fmap.c
 verstage-y += cbfs_boot_props.c
+verstage-y += libgcc.c
 verstage-y += memcmp.c
 verstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c
 verstage-y += region.c
@@ -77,6 +78,7 @@
 romstage-y += cbfs_boot_props.c
 romstage-$(CONFIG_COMMON_CBFS_SPI_WRAPPER) += cbfs_spi.c
 romstage-$(CONFIG_COMPRESS_RAMSTAGE) += lzma.c lzmadecode.c
+romstage-y += libgcc.c
 romstage-$(CONFIG_PRIMITIVE_MEMTEST) += primitive_memtest.c
 ramstage-$(CONFIG_PRIMITIVE_MEMTEST) += primitive_memtest.c
 romstage-$(CONFIG_CACHE_AS_RAM) += ramtest.c
@@ -115,8 +117,6 @@
 ramstage-$(CONFIG_COMMON_CBFS_SPI_WRAPPER) += cbfs_spi.c
 ramstage-y += lzma.c lzmadecode.c
 ramstage-y += stack.c
-ramstage-y += clog2.c
-romstage-y += clog2.c
 ramstage-$(CONFIG_CONSOLE_CBMEM) += cbmem_console.c
 ramstage-$(CONFIG_BOOTSPLASH) += jpeg.c
 ramstage-$(CONFIG_TRACE) += trace.c
diff --git a/src/lib/clog2.c b/src/lib/clog2.c
deleted file mode 100644
index 5e0d591..0000000
--- a/src/lib/clog2.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <console/console.h>
-#include <lib.h>
-
-/* Assume 8 bits per byte */
-#define CHAR_BIT 8
-
-unsigned long log2(unsigned long x)
-{
-	/* assume 8 bits per byte. */
-	unsigned long pow = sizeof(x) * CHAR_BIT - 1ULL;
-	unsigned long i = 1ULL << pow;
-
-	if (!x) {
-		printk(BIOS_WARNING, "%s called with invalid parameter of 0\n",
-			__func__);
-		return -1;
-	}
-
-	for (; i > x; i >>= 1, pow--);
-
-	return pow;
-}
-
-unsigned long log2_ceil(unsigned long x)
-{
-	unsigned long pow;
-
-	if (!x)
-		return -1;
-
-	pow = log2(x);
-
-	if (x > (1ULL << pow))
-		pow++;
-
-	return pow;
-}
diff --git a/src/lib/libgcc.c b/src/lib/libgcc.c
new file mode 100644
index 0000000..14685be
--- /dev/null
+++ b/src/lib/libgcc.c
@@ -0,0 +1,56 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright 2015 Google Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc.
+ */
+
+#include <types.h>
+
+/*
+ * Provide platform-independent backend implementation for __builtin_clz() in
+ * <lib.h> in case GCC does not have an assembly version for this arch.
+ */
+
+#if !IS_ENABLED(CONFIG_ARCH_X86) /* work around lack of --gc-sections on x86 */
+int __clzsi2(u32 a);
+int __clzsi2(u32 a)
+{
+	static const u8 four_bit_table[] = {
+		[0x0] = 4, [0x1] = 3, [0x2] = 2, [0x3] = 2,
+		[0x4] = 1, [0x5] = 1, [0x6] = 1, [0x7] = 1,
+		[0x8] = 0, [0x9] = 0, [0xa] = 0, [0xb] = 0,
+		[0xc] = 0, [0xd] = 0, [0xe] = 0, [0xf] = 0,
+	};
+	int r = 0;
+
+	if (!(a & (0xffff << 16))) {
+		r += 16;
+		a <<= 16;
+	}
+
+	if (!(a & (0xff << 24))) {
+		r += 8;
+		a <<= 8;
+	}
+
+	if (!(a & (0xf << 28))) {
+		r += 4;
+		a <<= 4;
+	}
+
+	return r + four_bit_table[a >> 28];
+}
+#endif
diff --git a/src/northbridge/amd/amdk8/coherent_ht.c b/src/northbridge/amd/amdk8/coherent_ht.c
index a8d8700..6554e07 100644
--- a/src/northbridge/amd/amdk8/coherent_ht.c
+++ b/src/northbridge/amd/amdk8/coherent_ht.c
@@ -66,6 +66,7 @@
 #include <device/pci_def.h>
 #include <device/pci_ids.h>
 #include <device/hypertransport_def.h>
+#include <lib.h>
 #include <stdlib.h>
 #include <arch/io.h>
 #include <pc80/mc146818rtc.h>
diff --git a/src/northbridge/amd/amdk8/raminit.c b/src/northbridge/amd/amdk8/raminit.c
index 4213cfb..c58abb1 100644
--- a/src/northbridge/amd/amdk8/raminit.c
+++ b/src/northbridge/amd/amdk8/raminit.c
@@ -7,6 +7,7 @@
 #include <cpu/x86/cache.h>
 #include <cpu/x86/mtrr.h>
 #include <cpu/amd/mtrr.h>
+#include <lib.h>
 #include <stdlib.h>
 #include <arch/acpi.h>
 #include <reset.h>
@@ -1655,7 +1656,7 @@
 	/* if the next lower frequency gives a CL at least one whole cycle
 	 * shorter, select that (see end of BKDG 4.1.1.1) */
 	if (freq < sizeof(cl_at_freq)-1 && cl_at_freq[freq+1] &&
-		log2f(cl_at_freq[freq]) - log2f(cl_at_freq[freq+1]) >= 2)
+		__ffs(cl_at_freq[freq]) - __ffs(cl_at_freq[freq+1]) >= 2)
 			freq++;
 
 	if (freq == sizeof(cl_at_freq))
@@ -1690,7 +1691,7 @@
 	/* Update DRAM Timing Low with our selected cas latency */
 	value = pci_read_config32(ctrl->f2, DRAM_TIMING_LOW);
 	value &= ~(DTL_TCL_MASK << DTL_TCL_SHIFT);
-	value |= latencies[log2f(cl_at_freq[freq]) - 2] << DTL_TCL_SHIFT;
+	value |= latencies[__ffs(cl_at_freq[freq]) - 2] << DTL_TCL_SHIFT;
 	pci_write_config32(ctrl->f2, DRAM_TIMING_LOW, value);
 
 	result.dimm_mask = dimm_mask;
diff --git a/src/northbridge/amd/amdk8/raminit_f.c b/src/northbridge/amd/amdk8/raminit_f.c
index 1469684..1c1a6ea 100644
--- a/src/northbridge/amd/amdk8/raminit_f.c
+++ b/src/northbridge/amd/amdk8/raminit_f.c
@@ -25,6 +25,7 @@
 #include <cpu/x86/tsc.h>
 #include <cpu/amd/mtrr.h>
 
+#include <lib.h>
 #include <stdlib.h>
 #include <arch/acpi.h>
 #include "raminit.h"
diff --git a/src/northbridge/amd/amdk8/raminit_test.c b/src/northbridge/amd/amdk8/raminit_test.c
index be46f27..87e281d 100644
--- a/src/northbridge/amd/amdk8/raminit_test.c
+++ b/src/northbridge/amd/amdk8/raminit_test.c
@@ -1,5 +1,6 @@
 #include <unistd.h>
 #include <limits.h>
+#include <lib.h>
 #include <stdint.h>
 #include <string.h>
 #include <setjmp.h>
diff --git a/src/northbridge/intel/e7501/raminit.c b/src/northbridge/intel/e7501/raminit.c
index f4fc9a8..93a3a5b 100644
--- a/src/northbridge/intel/e7501/raminit.c
+++ b/src/northbridge/intel/e7501/raminit.c
@@ -12,6 +12,7 @@
 /* converted to C 6/2004 yhlu */
 
 #include <assert.h>
+#include <lib.h>
 #include <spd.h>
 #include <sdram_mode.h>
 #include <stdlib.h>
diff --git a/src/northbridge/intel/i3100/raminit_ep80579.c b/src/northbridge/intel/i3100/raminit_ep80579.c
index 85660e9..ee8c4fd 100644
--- a/src/northbridge/intel/i3100/raminit_ep80579.c
+++ b/src/northbridge/intel/i3100/raminit_ep80579.c
@@ -22,6 +22,7 @@
 #include <cpu/x86/mtrr.h>
 #include <cpu/x86/cache.h>
 #include <cpu/intel/speedstep.h>
+#include <lib.h>
 #include "raminit_ep80579.h"
 #include "ep80579.h"
 
diff --git a/src/northbridge/intel/i855/raminit.c b/src/northbridge/intel/i855/raminit.c
index 81dcbb5..a43f5be 100644
--- a/src/northbridge/intel/i855/raminit.c
+++ b/src/northbridge/intel/i855/raminit.c
@@ -19,6 +19,7 @@
  */
 
 #include <assert.h>
+#include <lib.h>
 #include <spd.h>
 #include <sdram_mode.h>
 #include <stdlib.h>
diff --git a/src/northbridge/intel/i945/raminit.c b/src/northbridge/intel/i945/raminit.c
index a3be680..7bd6240 100644
--- a/src/northbridge/intel/i945/raminit.c
+++ b/src/northbridge/intel/i945/raminit.c
@@ -20,6 +20,7 @@
 #include <console/console.h>
 #include <cpu/x86/mtrr.h>
 #include <cpu/x86/cache.h>
+#include <lib.h>
 #include <pc80/mc146818rtc.h>
 #include <spd.h>
 #include <string.h>
diff --git a/src/soc/rockchip/rk3288/clock.c b/src/soc/rockchip/rk3288/clock.c
index 2c56376..a2e8d88 100644
--- a/src/soc/rockchip/rk3288/clock.c
+++ b/src/soc/rockchip/rk3288/clock.c
@@ -21,6 +21,7 @@
 #include <assert.h>
 #include <console/console.h>
 #include <delay.h>
+#include <lib.h>
 #include <soc/addressmap.h>
 #include <soc/clock.h>
 #include <soc/grf.h>
@@ -236,22 +237,6 @@
 	return 0;
 }
 
-/*
-    TODO:
-    it should be replaced by lib.h function
-   'unsigned long log2(unsigned long x)'
-*/
-static unsigned int log2(unsigned int value)
-{
-	unsigned int div = 0;
-
-	while (value != 1) {
-		div++;
-		value = ALIGN_UP(value, 2) / 2;
-	}
-	return div;
-}
-
 void rkclk_init(void)
 {
 	u32 aclk_div;
diff --git a/src/southbridge/via/k8t890/traf_ctrl.c b/src/southbridge/via/k8t890/traf_ctrl.c
index b4620da..24c1e65 100644
--- a/src/southbridge/via/k8t890/traf_ctrl.c
+++ b/src/southbridge/via/k8t890/traf_ctrl.c
@@ -24,10 +24,9 @@
 #include <arch/acpi.h>
 #include <arch/acpigen.h>
 #include <cpu/amd/powernow.h>
+#include <lib.h>
 #include "k8t890.h"
 
-extern unsigned long log2(unsigned long x);
-
 static void mmconfig_set_resources(device_t dev)
 {
 	struct resource *resource;