commonlib/device_tree: Improve node and property allocation speed

Now that the device tree code has been made available in libpayload, we
should reintroduce the node and property allocation optimization for
libpayload's memory allocator that was originally dropped when porting
this code from depthcharge to coreboot.

On a Qualcomm SC7180 unflattening a normal ChromeOS kernel device tree,
this saves roughly ~145ms. The total scratch space used is about ~1350
nodes and ~5200 properties, so we leave a little room to grow with the
constants hardcoded here.

Change-Id: I0f4d80a8b750febfb069b32ef47304ccecdc35af
Signed-off-by: Julius Werner <jwerner@chromium.org>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/83208
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Maximilian Brune <maximilian.brune@9elements.com>
Reviewed-by: Raul Rangel <rrangel@chromium.org>
diff --git a/src/commonlib/device_tree.c b/src/commonlib/device_tree.c
index f70aaf7..0ec6ea9 100644
--- a/src/commonlib/device_tree.c
+++ b/src/commonlib/device_tree.c
@@ -25,6 +25,41 @@
 #define FDT_MAX_MEMORY_REGIONS 16 // should be a good enough upper bound
 
 /*
+ * libpayload's malloc() has a linear allocation complexity, which means that it
+ * degrades massively if we make a few thousand small allocations. Preventing
+ * that problem with a custom scratchpad is well-worth some increase in BSS
+ * size (64 * 2000 + 40 * 10000 = ~1/2 MB).
+ */
+
+/* Try to give these a healthy margin above what the average kernel DT needs. */
+#define LP_ALLOC_NODE_SCRATCH_COUNT 2000
+#define LP_ALLOC_PROP_SCRATCH_COUNT 10000
+
+static struct device_tree_node *alloc_node(void)
+{
+#ifndef __COREBOOT__
+	static struct device_tree_node scratch[LP_ALLOC_NODE_SCRATCH_COUNT];
+	static int counter = 0;
+
+	if (counter < ARRAY_SIZE(scratch))
+		return &scratch[counter++];
+#endif
+	return xzalloc(sizeof(struct device_tree_node));
+}
+
+static struct device_tree_property *alloc_prop(void)
+{
+#ifndef __COREBOOT__
+	static struct device_tree_property scratch[LP_ALLOC_PROP_SCRATCH_COUNT];
+	static int counter = 0;
+
+	if (counter < ARRAY_SIZE(scratch))
+		return &scratch[counter++];
+#endif
+	return xzalloc(sizeof(struct device_tree_property));
+}
+
+/*
  * Functions for picking apart flattened trees.
  */
 
@@ -625,14 +660,14 @@
 		return 0;
 	offset += size;
 
-	struct device_tree_node *node = xzalloc(sizeof(*node));
+	struct device_tree_node *node = alloc_node();
 	*new_node = node;
 	node->name = name;
 
 	struct fdt_property fprop;
 	last = &node->properties;
 	while ((size = fdt_next_property(blob, offset, &fprop))) {
-		struct device_tree_property *prop = xzalloc(sizeof(*prop));
+		struct device_tree_property *prop = alloc_prop();
 		prop->prop = fprop;
 
 		if (dt_prop_is_phandle(prop)) {
@@ -1003,9 +1038,7 @@
 		if (!create)
 			return NULL;
 
-		found = calloc(1, sizeof(*found));
-		if (!found)
-			return NULL;
+		found = alloc_node();
 		found->name = strdup(*path);
 		if (!found->name)
 			return NULL;
@@ -1333,7 +1366,7 @@
 		}
 	}
 
-	prop = xzalloc(sizeof(*prop));
+	prop = alloc_prop();
 	list_insert_after(&prop->list_node, &node->properties);
 	prop->prop.name = name;
 	prop->prop.data = data;
@@ -1847,7 +1880,7 @@
 				continue;
 			}
 		} else {
-			dst_prop = xzalloc(sizeof(*dst_prop));
+			dst_prop = alloc_prop();
 			list_insert_after(&dst_prop->list_node,
 					  &dst->properties);
 		}
@@ -1866,7 +1899,7 @@
 			}
 
 		if (!dst_node) {
-			dst_node = xzalloc(sizeof(*dst_node));
+			dst_node = alloc_node();
 			*dst_node = *src_node;
 			list_insert_after(&dst_node->list_node, &dst->children);
 		} else {