rpi4: Accommodate "armstub8.bin" header at the beginning of BL31 image

The Raspberry Pi GPU firmware checks for a magic value at offset 240
(0xf0) of the armstub8.bin image it loads. If that value matches,
it writes the kernel load address and the DTB address into subsequent
memory locations.
We can use these addresses to avoid hardcoding these values into the BL31
image, to make it more flexible and a drop-in replacement for the
official armstub8.bin.

Reserving just 16 bytes at offset 240 of the final image file is not easily
possible, though, as this location is in the middle of the generic BL31
entry point code.
However we can prepend an extra section before the actual BL31 image, to
contain the magic and addresses. This needs to be 4KB, because the
actual BL31 entry point needs to be page aligned.

Use the platform linker script hook that the generic code provides, to
add an almost empty 4KB code block before the entry point code. The very
first word contains a branch instruction to jump over this page, into
the actual entry code.
This also gives us plenty of room for the SMP pens later.

Change-Id: I38caa5e7195fa39cbef8600933a03d86f09263d6
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
diff --git a/plat/rpi/rpi4/aarch64/armstub8_header.S b/plat/rpi/rpi4/aarch64/armstub8_header.S
new file mode 100644
index 0000000..246358d
--- /dev/null
+++ b/plat/rpi/rpi4/aarch64/armstub8_header.S
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/*
+ * armstub8.bin header to let the GPU firmware recognise this code.
+ * It will then write the load address of the kernel image and the DT
+ * after the header magic in RAM, so we can read those addresses at runtime.
+ */
+
+.text
+	b	armstub8_end
+
+.global stub_magic
+.global dtb_ptr32
+.global kernel_entry32
+
+.org 0xf0
+armstub8:
+stub_magic:
+	.word 0x5afe570b
+stub_version:
+	.word 0
+dtb_ptr32:
+	.word 0x0
+kernel_entry32:
+	.word 0x0
+
+/*
+ * Technically an offset of 0x100 would suffice, but the follow-up code
+ * (bl31_entrypoint.S at BL31_BASE) needs to be page aligned, so pad here
+ * till the end of the first 4K page.
+ */
+.org 0x1000
+armstub8_end:
diff --git a/plat/rpi/rpi4/include/plat.ld.S b/plat/rpi/rpi4/include/plat.ld.S
new file mode 100644
index 0000000..9262fad
--- /dev/null
+++ b/plat/rpi/rpi4/include/plat.ld.S
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Stub linker script to provide the armstub8.bin header before the actual
+ * code. If the GPU firmware finds a magic value at offset 240 in
+ * armstub8.bin, it will put the DTB and kernel load address in subsequent
+ * words. We can then read those values to find the proper NS entry point
+ * and find our DTB more flexibly.
+ */
+
+MEMORY {
+    PRERAM (rwx): ORIGIN = 0, LENGTH = 4096
+}
+
+SECTIONS
+{
+    .armstub8 . : {
+        *armstub8_header.o(.text*)
+        KEEP(*(.armstub8))
+    } >PRERAM
+}
diff --git a/plat/rpi/rpi4/platform.mk b/plat/rpi/rpi4/platform.mk
index 3ff180e..b20251c 100644
--- a/plat/rpi/rpi4/platform.mk
+++ b/plat/rpi/rpi4/platform.mk
@@ -16,6 +16,7 @@
 
 BL31_SOURCES		+=	lib/cpus/aarch64/cortex_a72.S		\
 				plat/rpi/rpi4/aarch64/plat_helpers.S	\
+				plat/rpi/rpi4/aarch64/armstub8_header.S	\
 				drivers/arm/gic/common/gic_common.c     \
 				drivers/arm/gic/v2/gicv2_helpers.c      \
 				drivers/arm/gic/v2/gicv2_main.c         \
@@ -41,6 +42,8 @@
     TF_CFLAGS_aarch64	+=	-mtune=cortex-a72
 endif
 
+# Add support for platform supplied linker script for BL31 build
+$(eval $(call add_define,PLAT_EXTRA_LD_SCRIPT))
 
 # Enable all errata workarounds for Cortex-A72
 ERRATA_A72_859971		:= 1
diff --git a/plat/rpi/rpi4/rpi4_bl31_setup.c b/plat/rpi/rpi4/rpi4_bl31_setup.c
index de582b3..58025b2 100644
--- a/plat/rpi/rpi4/rpi4_bl31_setup.c
+++ b/plat/rpi/rpi4/rpi4_bl31_setup.c
@@ -13,12 +13,21 @@
 #include <lib/mmio.h>
 #include <lib/xlat_tables/xlat_mmu_helpers.h>
 #include <lib/xlat_tables/xlat_tables_defs.h>
+#include <lib/xlat_tables/xlat_tables_v2.h>
 #include <plat/common/platform.h>
 
 #include <drivers/arm/gicv2.h>
 
 #include <rpi_shared.h>
 
+/*
+ * Fields at the beginning of armstub8.bin.
+ * While building the BL31 image, we put the stub magic into the binary.
+ * The GPU firmware detects this at boot time, clears that field as a
+ * confirmation and puts the kernel and DT address in the following words.
+ */
+extern uint32_t stub_magic;
+
 static const gicv2_driver_data_t rpi4_gic_data = {
 	.gicd_base = RPI4_GIC_GICD_BASE,
 	.gicc_base = RPI4_GIC_GICC_BASE,
@@ -141,6 +150,14 @@
 
 void bl31_plat_arch_setup(void)
 {
+	/*
+	 * Add the first page of memory, which holds the stub magic,
+	 * the kernel and the DT address.
+	 * This is read-only, as the GPU already populated the header,
+	 * we just need to read it.
+	 */
+	mmap_add_region(0, 0, 4096, MT_MEMORY | MT_RO | MT_SECURE);
+
 	rpi3_setup_page_tables(BL31_BASE, BL31_END - BL31_BASE,
 			       BL_CODE_BASE, BL_CODE_END,
 			       BL_RO_DATA_BASE, BL_RO_DATA_END