Merge changes from topic "a5ds-multicore" into integration

* changes:
  a5ds: add multicore support
  a5ds: Hold the secondary cpus in pen rather than panic
diff --git a/common/fdt_fixup.c b/common/fdt_fixup.c
index 8843404..99d0eee 100644
--- a/common/fdt_fixup.c
+++ b/common/fdt_fixup.c
@@ -29,6 +29,33 @@
 	return fdt_appendprop(fdt, offs, "compatible", str, strlen(str) + 1);
 }
 
+/*
+ * Those defines are for PSCI v0.1 legacy clients, which we expect to use
+ * the same execution state (AArch32/AArch64) as TF-A.
+ * Kernels running in AArch32 on an AArch64 TF-A should use PSCI v0.2.
+ */
+#ifdef __aarch64__
+#define PSCI_CPU_SUSPEND_FNID	PSCI_CPU_SUSPEND_AARCH64
+#define PSCI_CPU_ON_FNID	PSCI_CPU_ON_AARCH64
+#else
+#define PSCI_CPU_SUSPEND_FNID	PSCI_CPU_SUSPEND_AARCH32
+#define PSCI_CPU_ON_FNID	PSCI_CPU_ON_AARCH32
+#endif
+
+/*******************************************************************************
+ * dt_add_psci_node() - Add a PSCI node into an existing device tree
+ * @fdt:	pointer to the device tree blob in memory
+ *
+ * Add a device tree node describing PSCI into the root level of an existing
+ * device tree blob in memory.
+ * This will add v0.1, v0.2 and v1.0 compatible strings and the standard
+ * function IDs for v0.1 compatibility.
+ * An existing PSCI node will not be touched, the function will return success
+ * in this case. This function will not touch the /cpus enable methods, use
+ * dt_add_psci_cpu_enable_methods() for that.
+ *
+ * Return: 0 on success, -1 otherwise.
+ ******************************************************************************/
 int dt_add_psci_node(void *fdt)
 {
 	int offs;
@@ -52,15 +79,11 @@
 		return -1;
 	if (fdt_setprop_string(fdt, offs, "method", "smc"))
 		return -1;
-	if (fdt_setprop_u32(fdt, offs, "cpu_suspend", PSCI_CPU_SUSPEND_AARCH64))
+	if (fdt_setprop_u32(fdt, offs, "cpu_suspend", PSCI_CPU_SUSPEND_FNID))
 		return -1;
 	if (fdt_setprop_u32(fdt, offs, "cpu_off", PSCI_CPU_OFF))
 		return -1;
-	if (fdt_setprop_u32(fdt, offs, "cpu_on", PSCI_CPU_ON_AARCH64))
-		return -1;
-	if (fdt_setprop_u32(fdt, offs, "sys_poweroff", PSCI_SYSTEM_OFF))
-		return -1;
-	if (fdt_setprop_u32(fdt, offs, "sys_reset", PSCI_SYSTEM_RESET))
+	if (fdt_setprop_u32(fdt, offs, "cpu_on", PSCI_CPU_ON_FNID))
 		return -1;
 	return 0;
 }
@@ -113,6 +136,17 @@
 	return offs;
 }
 
+/*******************************************************************************
+ * dt_add_psci_cpu_enable_methods() - switch CPU nodes in DT to use PSCI
+ * @fdt:	pointer to the device tree blob in memory
+ *
+ * Iterate over all CPU device tree nodes (/cpus/cpu@x) in memory to change
+ * the enable-method to PSCI. This will add the enable-method properties, if
+ * required, or will change existing properties to read "psci".
+ *
+ * Return: 0 on success, or a negative error value otherwise.
+ ******************************************************************************/
+
 int dt_add_psci_cpu_enable_methods(void *fdt)
 {
 	int offs, ret;
@@ -130,6 +164,25 @@
 
 #define HIGH_BITS(x) ((sizeof(x) > 4) ? ((x) >> 32) : (typeof(x))0)
 
+/*******************************************************************************
+ * fdt_add_reserved_memory() - reserve (secure) memory regions in DT
+ * @dtb:	pointer to the device tree blob in memory
+ * @node_name:	name of the subnode to be used
+ * @base:	physical base address of the reserved region
+ * @size:	size of the reserved region
+ *
+ * Add a region of memory to the /reserved-memory node in a device tree in
+ * memory, creating that node if required. Each region goes into a subnode
+ * of that node and has a @node_name, a @base address and a @size.
+ * This will prevent any device tree consumer from using that memory. It
+ * can be used to announce secure memory regions, as it adds the "no-map"
+ * property to prevent mapping and speculative operations on that region.
+ *
+ * See reserved-memory/reserved-memory.txt in the (Linux kernel) DT binding
+ * documentation for details.
+ *
+ * Return: 0 on success, a negative error value otherwise.
+ ******************************************************************************/
 int fdt_add_reserved_memory(void *dtb, const char *node_name,
 			    uintptr_t base, size_t size)
 {
diff --git a/docs/getting_started/porting-guide.rst b/docs/getting_started/porting-guide.rst
index 5786dd3..9eb7c17 100644
--- a/docs/getting_started/porting-guide.rst
+++ b/docs/getting_started/porting-guide.rst
@@ -546,6 +546,13 @@
    PLAT_PARTITION_MAX_ENTRIES := 12
    $(eval $(call add_define,PLAT_PARTITION_MAX_ENTRIES))
 
+-  **PLAT_PARTITION_BLOCK_SIZE**
+   The size of partition block. It could be either 512 bytes or 4096 bytes.
+   The default value is 512.
+   `For example, define the build flag in platform.mk`_:
+   PLAT_PARTITION_BLOCK_SIZE := 4096
+   $(eval $(call add_define,PLAT_PARTITION_BLOCK_SIZE))
+
 The following constant is optional. It should be defined to override the default
 behaviour of the ``assert()`` function (for example, to save memory).
 
@@ -2202,6 +2209,19 @@
 above the CPU might require initialization due to having previously been in
 low power states. The generic code expects the handler to succeed.
 
+plat_psci_ops.pwr_domain_on_finish_late() [optional]
+...........................................................
+
+This optional function is called by the PSCI implementation after the calling
+CPU is fully powered on with respective data caches enabled. The calling CPU and
+the associated cluster are guaranteed to be participating in coherency. This
+function gives the flexibility to perform any platform-specific actions safely,
+such as initialization or modification of shared data structures, without the
+overhead of explicit cache maintainace operations.
+
+The ``target_state`` has a similar meaning as described in the ``pwr_domain_on_finish()``
+operation. The generic code expects the handler to succeed.
+
 plat_psci_ops.pwr_domain_suspend_finish()
 .........................................
 
diff --git a/docs/plat/index.rst b/docs/plat/index.rst
index 5951413..eaeee84 100644
--- a/docs/plat/index.rst
+++ b/docs/plat/index.rst
@@ -12,6 +12,7 @@
    imx8m
    intel-stratix10
    ls1043a
+   marvell/index
    meson-gxbb
    meson-gxl
    mt8183
diff --git a/docs/plat/marvell/build.rst b/docs/plat/marvell/build.rst
new file mode 100644
index 0000000..c8923e4
--- /dev/null
+++ b/docs/plat/marvell/build.rst
@@ -0,0 +1,254 @@
+TF-A Build Instructions for Marvell Platforms
+=============================================
+
+This section describes how to compile the Trusted Firmware-A (TF-A) project for Marvell's platforms.
+
+Build Instructions
+------------------
+(1) Set the cross compiler
+
+    .. code:: shell
+
+        > export CROSS_COMPILE=/path/to/toolchain/aarch64-linux-gnu-
+
+(2) Set path for FIP images:
+
+Set U-Boot image path (relatively to TF-A root or absolute path)
+
+    .. code:: shell
+
+        > export BL33=path/to/u-boot.bin
+
+For example: if U-Boot project (and its images) is located at ``~/project/u-boot``,
+BL33 should be ``~/project/u-boot/u-boot.bin``
+
+    .. note::
+
+       *u-boot.bin* should be used and not *u-boot-spl.bin*
+
+Set MSS/SCP image path (mandatory only for Armada80x0)
+
+    .. code:: shell
+
+        > export SCP_BL2=path/to/mrvl_scp_bl2*.img
+
+(3) Armada-37x0 build requires WTP tools installation.
+
+See below in the section "Tools and external components installation".
+Install ARM 32-bit cross compiler, which is required for building WTMI image for CM3
+
+    .. code:: shell
+
+        > sudo apt-get install gcc-arm-linux-gnueabi
+
+(4) Clean previous build residuals (if any)
+
+    .. code:: shell
+
+        > make distclean
+
+(5) Build TF-A
+
+There are several build options:
+
+- DEBUG
+
+        Default is without debug information (=0). in order to enable it use ``DEBUG=1``.
+        Must be disabled when building UART recovery images due to current console driver
+        implementation that is not compatible with Xmodem protocol used for boot image download.
+
+- LOG_LEVEL
+
+        Defines the level of logging which will be purged to the default output port.
+
+        LOG_LEVEL_NONE		0
+        LOG_LEVEL_ERROR		10
+        LOG_LEVEL_NOTICE	20
+        LOG_LEVEL_WARNING	30
+        LOG_LEVEL_INFO		40
+        LOG_LEVEL_VERBOSE	50
+
+- USE_COHERENT_MEM
+
+        This flag determines whether to include the coherent memory region in the
+        BL memory map or not.
+
+- LLC_ENABLE
+
+        Flag defining the LLC (L3) cache state. The cache is enabled by default (``LLC_ENABLE=1``).
+
+- MARVELL_SECURE_BOOT
+
+        Build trusted(=1)/non trusted(=0) image, default is non trusted.
+
+- BLE_PATH
+
+        Points to BLE (Binary ROM extension) sources folder. Only required for A8K builds.
+        The parameter is optional, its default value is ``plat/marvell/a8k/common/ble``.
+
+- MV_DDR_PATH
+
+        For A7/8K, use this parameter to point to mv_ddr driver sources to allow BLE build. For A37x0,
+        it is used for ddr_tool build.
+
+        Usage example: MV_DDR_PATH=path/to/mv_ddr
+
+        The parameter is optional for A7/8K, when this parameter is not set, the mv_ddr
+        sources are expected to be located at: drivers/marvell/mv_ddr. However, the parameter
+        is necessary for A37x0.
+
+        For the mv_ddr source location, check the section "Tools and external components installation"
+
+- DDR_TOPOLOGY
+
+        For Armada37x0 only, the DDR topology map index/name, default is 0.
+
+        Supported Options:
+            - DDR3 1CS (0): DB-88F3720-DDR3-Modular (512MB); EspressoBIN (512MB)
+            - DDR4 1CS (1): DB-88F3720-DDR4-Modular (512MB)
+            - DDR3 2CS (2): EspressoBIN V3-V5 (1GB)
+            - DDR4 2CS (3): DB-88F3720-DDR4-Modular (4GB)
+            - DDR3 1CS (4): DB-88F3720-DDR3-Modular (1GB)
+            - DDR4 1CS (5): EspressoBin V7 (1GB)
+            - DDR4 2CS (6): EspressoBin V7 (2GB)
+            - CUSTOMER (CUST): Customer board, DDR3 1CS 512MB
+
+- CLOCKSPRESET
+
+        For Armada37x0 only, the clock tree configuration preset including CPU and DDR frequency,
+        default is CPU_800_DDR_800.
+
+            - CPU_600_DDR_600	-	CPU at 600 MHz, DDR at 600 MHz
+            - CPU_800_DDR_800	-	CPU at 800 MHz, DDR at 800 MHz
+            - CPU_1000_DDR_800	-	CPU at 1000 MHz, DDR at 800 MHz
+            - CPU_1200_DDR_750	-	CPU at 1200 MHz, DDR at 750 MHz
+
+- BOOTDEV
+
+        For Armada37x0 only, the flash boot device, default is ``SPINOR``.
+
+        Currently, Armada37x0 only supports ``SPINOR``, ``SPINAND``, ``EMMCNORM`` and ``SATA``:
+
+            - SPINOR - SPI NOR flash boot
+            - SPINAND - SPI NAND flash boot
+            - EMMCNORM - eMMC Download Mode
+
+                Download boot loader or program code from eMMC flash into CM3 or CA53
+                Requires full initialization and command sequence
+
+            - SATA - SATA device boot
+
+- PARTNUM
+
+        For Armada37x0 only, the boot partition number, default is 0.
+
+        To boot from eMMC, the value should be aligned with the parameter in
+        U-Boot with name of ``CONFIG_SYS_MMC_ENV_PART``, whose value by default is
+        1. For details about CONFIG_SYS_MMC_ENV_PART, please refer to the U-Boot
+        build instructions.
+
+- WTMI_IMG
+
+        For Armada37x0 only, the path of the WTMI image can point to an image which
+        does nothing, an image which supports EFUSE or a customized CM3 firmware
+        binary. The default image is wtmi.bin that built from sources in WTP
+        folder, which is the next option. If the default image is OK, then this
+        option should be skipped.
+
+- WTP
+
+    For Armada37x0 only, use this parameter to point to wtptools source code
+    directory, which can be found as a3700_utils.zip in the release. Usage
+    example: ``WTP=/path/to/a3700_utils``
+
+    For example, in order to build the image in debug mode with log level up to 'notice' level run
+
+    .. code:: shell
+
+        > make DEBUG=1 USE_COHERENT_MEM=0 LOG_LEVEL=20 PLAT=<MARVELL_PLATFORM> all fip
+
+    And if we want to build a Armada37x0 image in debug mode with log level up to 'notice' level,
+    the image has the preset CPU at 1000 MHz, preset DDR3 at 800 MHz, the DDR topology of DDR4 2CS,
+    the image boot from SPI NOR flash partition 0, and the image is non trusted in WTP, the command
+    line is as following
+
+    .. code:: shell
+
+        > make DEBUG=1 USE_COHERENT_MEM=0 LOG_LEVEL=20 CLOCKSPRESET=CPU_1000_DDR_800 \
+            MARVELL_SECURE_BOOT=0 DDR_TOPOLOGY=3 BOOTDEV=SPINOR PARTNUM=0 PLAT=a3700 all fip
+
+    Supported MARVELL_PLATFORM are:
+        - a3700 (for both A3720 DB and EspressoBin)
+        - a70x0
+        - a70x0_amc (for AMC board)
+        - a80x0
+        - a80x0_mcbin (for MacciatoBin)
+
+Special Build Flags
+--------------------
+
+- PLAT_RECOVERY_IMAGE_ENABLE
+    When set this option to enable secondary recovery function when build atf.
+    In order to build UART recovery image this operation should be disabled for
+    a70x0 and a80x0 because of hardware limitation (boot from secondary image
+    can interrupt UART recovery process). This MACRO definition is set in
+    ``plat/marvell/a8k/common/include/platform_def.h`` file.
+
+For more information about build options, please refer to section
+'Summary of build options' in the :ref:`User Guide`.
+
+
+Build output
+------------
+Marvell's TF-A compilation generates 7 files:
+
+    - ble.bin		- BLe image
+    - bl1.bin		- BL1 image
+    - bl2.bin		- BL2 image
+    - bl31.bin		- BL31 image
+    - fip.bin		- FIP image (contains BL2, BL31 & BL33 (U-Boot) images)
+    - boot-image.bin	- TF-A image (contains BL1 and FIP images)
+    - flash-image.bin	- Image which contains boot-image.bin and SPL image.
+      Should be placed on the boot flash/device.
+
+
+Tools and external components installation
+------------------------------------------
+
+Armada37x0 Builds require installation of 3 components
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+(1) ARM cross compiler capable of building images for the service CPU (CM3).
+    This component is usually included in the Linux host packages.
+    On Debian/Ubuntu hosts the default GNU ARM tool chain can be installed
+    using the following command
+
+    .. code:: shell
+
+        > sudo apt-get install gcc-arm-linux-gnueabi
+
+    Only if required, the default tool chain prefix ``arm-linux-gnueabi-`` can be
+    overwritten using the environment variable ``CROSS_CM3``.
+    Example for BASH shell
+
+    .. code:: shell
+
+        > export CROSS_CM3=/opt/arm-cross/bin/arm-linux-gnueabi
+
+(2) DDR initialization library sources (mv_ddr) available at the following repository
+    (use the "mv_ddr-armada-atf-mainline" branch):
+
+    https://github.com/MarvellEmbeddedProcessors/mv-ddr-marvell.git
+
+(3) Armada3700 tools available at the following repository (use the latest release branch):
+
+    https://github.com/MarvellEmbeddedProcessors/A3700-utils-marvell.git
+
+Armada70x0 and Armada80x0 Builds require installation of an additional component
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+(1) DDR initialization library sources (mv_ddr) available at the following repository
+    (use the "mv_ddr-armada-atf-mainline" branch):
+
+    https://github.com/MarvellEmbeddedProcessors/mv-ddr-marvell.git
+
diff --git a/docs/plat/marvell/build.txt b/docs/plat/marvell/build.txt
deleted file mode 100644
index 7b75196..0000000
--- a/docs/plat/marvell/build.txt
+++ /dev/null
@@ -1,194 +0,0 @@
-TF-A Build Instructions
-======================
-
-This section describes how to compile the ARM Trusted Firmware (TF-A) project for Marvell's platforms.
-
-Build Instructions
-------------------
-(1) Set the cross compiler::
-
-		> export CROSS_COMPILE=/path/to/toolchain/aarch64-linux-gnu-
-
-(2) Set path for FIP images:
-
-	Set U-Boot image path (relatively to TF-A root or absolute path)::
-
-		> export BL33=path/to/u-boot.bin
-
-	For example: if U-Boot project (and its images) is located at ~/project/u-boot,
-	BL33 should be ~/project/u-boot/u-boot.bin
-
-	.. note::
-
-	   u-boot.bin should be used and not u-boot-spl.bin
-
-	Set MSS/SCP image path (mandatory only for Armada80x0)::
-
-		> export SCP_BL2=path/to/mrvl_scp_bl2*.img
-
-(3) Armada-37x0 build requires WTP tools installation.
-
-	See below in the section "Tools and external components installation".
-	Install ARM 32-bit cross compiler, which is required for building WTMI image for CM3::
-
-		> sudo apt-get install gcc-arm-linux-gnueabi
-
-(4) Clean previous build residuals (if any)::
-
-		> make distclean
-
-(5) Build TF-A:
-
-	There are several build options:
-
-	- DEBUG: default is without debug information (=0). in order to enable it use DEBUG=1
-		Must be disabled when building UART recovery images due to current console driver
-		implementation that is not compatible with Xmodem protocol used for boot image download.
-
-	- LOG_LEVEL: defines the level of logging which will be purged to the default output port.
-
-		LOG_LEVEL_NONE		0
-		LOG_LEVEL_ERROR		10
-		LOG_LEVEL_NOTICE	20
-		LOG_LEVEL_WARNING	30
-		LOG_LEVEL_INFO		40
-		LOG_LEVEL_VERBOSE	50
-
-	- USE_COHERENT_MEM: This flag determines whether to include the coherent memory region in the
-		BL memory map or not.
-
-	- LLC_ENABLE: Flag defining the LLC (L3) cache state. The cache is enabled by default (LLC_ENABLE=1).
-
-	- MARVELL_SECURE_BOOT: build trusted(=1)/non trusted(=0) image, default is non trusted.
-
-	- BLE_PATH:
-		Points to BLE (Binary ROM extension) sources folder. Only required for A8K builds.
-		The parameter is optional, its default value is "plat/marvell/a8k/common/ble".
-
-	- MV_DDR_PATH:
-		For A7/8K, use this parameter to point to mv_ddr driver sources to allow BLE build. For A37x0,
-		it is used for ddr_tool build.
-		Usage example: MV_DDR_PATH=path/to/mv_ddr
-		The parameter is optional for A7/8K, when this parameter is not set, the mv_ddr
-		sources are expected to be located at: drivers/marvell/mv_ddr. However, the parameter
-		is necessary for A37x0.
-		For the mv_ddr source location, check the section "Tools and external components installation"
-
-	- DDR_TOPOLOGY: For Armada37x0 only, the DDR topology map index/name, default is 0.
-		Supported Options:
-			- DDR3 1CS (0): DB-88F3720-DDR3-Modular (512MB); EspressoBIN (512MB)
-			- DDR4 1CS (1): DB-88F3720-DDR4-Modular (512MB)
-			- DDR3 2CS (2): EspressoBIN V3-V5 (1GB)
-			- DDR4 2CS (3): DB-88F3720-DDR4-Modular (4GB)
-			- DDR3 1CS (4): DB-88F3720-DDR3-Modular (1GB)
-			- DDR4 1CS (5): EspressoBin V7 (1GB)
-			- DDR4 2CS (6): EspressoBin V7 (2GB)
-			- CUSTOMER (CUST): Customer board, DDR3 1CS 512MB
-
-	- CLOCKSPRESET: For Armada37x0 only, the clock tree configuration preset including CPU and DDR frequency,
-		default is CPU_800_DDR_800.
-			- CPU_600_DDR_600	-	CPU at 600 MHz, DDR at 600 MHz
-			- CPU_800_DDR_800	-	CPU at 800 MHz, DDR at 800 MHz
-			- CPU_1000_DDR_800	-	CPU at 1000 MHz, DDR at 800 MHz
-			- CPU_1200_DDR_750	-	CPU at 1200 MHz, DDR at 750 MHz
-
-	- BOOTDEV: For Armada37x0 only, the flash boot device, default is SPINOR,
-		Currently, Armada37x0 only supports SPINOR, SPINAND, EMMCNORM and SATA:
-
-			- SPINOR - SPI NOR flash boot
-			- SPINAND - SPI NAND flash boot
-			- EMMCNORM - eMMC Download Mode
-				Download boot loader or program code from eMMC flash into CM3 or CA53
-				Requires full initialization and command sequence
-			- SATA - SATA device boot
-
-	- PARTNUM: For Armada37x0 only, the boot partition number, default is 0. To boot from eMMC, the value
-		should be aligned with the parameter in U-Boot with name of CONFIG_SYS_MMC_ENV_PART, whose
-		value by default is 1.
-		For details about CONFIG_SYS_MMC_ENV_PART, please refer to the U-Boot build instructions.
-
-	- WTMI_IMG: For Armada37x0 only, the path of the WTMI image can point to an image which does
-		nothing, an image which supports EFUSE or a customized CM3 firmware binary. The default image
-		is wtmi.bin that built from sources in WTP folder, which is the next option. If the default
-		image is OK, then this option should be skipped.
-
-	- WTP: For Armada37x0 only, use this parameter to point to wtptools source code directory, which
-		can be found as a3700_utils.zip in the release.
-		Usage example: WTP=/path/to/a3700_utils
-
-	For example, in order to build the image in debug mode with log level up to 'notice' level run::
-
-		> make DEBUG=1 USE_COHERENT_MEM=0 LOG_LEVEL=20 PLAT=<MARVELL_PLATFORM> all fip
-
-	And if we want to build a Armada37x0 image in debug mode with log level up to 'notice' level,
-	the image has the preset CPU at 1000 MHz, preset DDR3 at 800 MHz, the DDR topology of DDR4 2CS,
-	the image boot from SPI NOR flash partition 0, and the image is non trusted in WTP, the command
-	line is as following::
-
-		> make DEBUG=1 USE_COHERENT_MEM=0 LOG_LEVEL=20 CLOCKSPRESET=CPU_1000_DDR_800 \
-			MARVELL_SECURE_BOOT=0 DDR_TOPOLOGY=3 BOOTDEV=SPINOR PARTNUM=0 PLAT=a3700 all fip
-
-	Supported MARVELL_PLATFORM are:
-		- a3700 (for both A3720 DB and EspressoBin)
-		- a70x0
-		- a70x0_amc (for AMC board)
-		- a80x0
-		- a80x0_mcbin (for MacciatoBin)
-
-Special Build Flags
---------------------
-	- PLAT_RECOVERY_IMAGE_ENABLE: When set this option to enable secondary recovery function when build
-		atf. In order to build UART recovery image this operation should be disabled for a70x0 and a80x0
-		because of hardware limitation (boot from secondary image can interrupt UART recovery process).
-		This MACRO definition is set in plat/marvell/a8k/common/include/platform_def.h file
-
-(for more information about build options, please refer to section 'Summary of build options' in  TF-A user-guide:
- https://github.com/ARM-software/arm-trusted-firmware/blob/master/docs/user-guide.md)
-
-
-Build output
--------------
-Marvell's TF-A compilation generates 7 files:
-	- ble.bin		- BLe image
-	- bl1.bin		- BL1 image
-	- bl2.bin		- BL2 image
-	- bl31.bin		- BL31 image
-	- fip.bin		- FIP image (contains BL2, BL31 & BL33 (U-Boot) images)
-	- boot-image.bin	- TF-A image (contains BL1 and FIP images)
-	- flash-image.bin	- Image which contains boot-image.bin and SPL image;
-				  should be placed on the boot flash/device.
-
-
-Tools and external components installation
-==========================================
-
-Armada37x0 Builds require installation of 3 components
--------------------------------------------------------
-
-(1) ARM cross compiler capable of building images for the service CPU (CM3).
-    This component is usually included in the Linux host packages.
-    On Debian/Ubuntu hosts the default GNU ARM tool chain can be installed
-    using the following command::
-
-		> sudo apt-get install gcc-arm-linux-gnueabi
-
-    Only if required, the default tool chain prefix "arm-linux-gnueabi-" can be
-    overwritten using the environment variable CROSS_CM3.
-    Example for BASH shell::
-
-		> export CROSS_CM3=/opt/arm-cross/bin/arm-linux-gnueabi
-
-(2) DDR initialization library sources (mv_ddr) available at the following repository
-    (use the "mv_ddr-armada-atf-mainline" branch)::
-    https://github.com/MarvellEmbeddedProcessors/mv-ddr-marvell.git
-
-(3) Armada3700 tools available at the following repository (use the latest release branch)::
-    https://github.com/MarvellEmbeddedProcessors/A3700-utils-marvell.git
-
-Armada70x0 and Armada80x0 Builds require installation of an additional component
---------------------------------------------------------------------------------
-
-(1) DDR initialization library sources (mv_ddr) available at the following repository
-    (use the "mv_ddr-armada-atf-mainline" branch)::
-    https://github.com/MarvellEmbeddedProcessors/mv-ddr-marvell.git
-
diff --git a/docs/plat/marvell/index.rst b/docs/plat/marvell/index.rst
new file mode 100644
index 0000000..89ebdc0
--- /dev/null
+++ b/docs/plat/marvell/index.rst
@@ -0,0 +1,14 @@
+Marvell
+=======
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Contents
+
+   build
+   porting
+   misc/mvebu-a8k-addr-map
+   misc/mvebu-amb
+   misc/mvebu-ccu
+   misc/mvebu-io-win
+   misc/mvebu-iob
diff --git a/docs/plat/marvell/misc/mvebu-a8k-addr-map.rst b/docs/plat/marvell/misc/mvebu-a8k-addr-map.rst
new file mode 100644
index 0000000..e88a458
--- /dev/null
+++ b/docs/plat/marvell/misc/mvebu-a8k-addr-map.rst
@@ -0,0 +1,49 @@
+Address decoding flow and address translation units of Marvell Armada 8K SoC family
+===================================================================================
+
+::
+
+  +--------------------------------------------------------------------------------------------------+
+  |                                                              +-------------+    +--------------+ |
+  |                                                              | Memory      +-----   DRAM CS    | |
+  |+------------+ +-----------+ +-----------+                    | Controller  |    +--------------+ |
+  ||  AP DMA    | |           | |           |                    +-------------+                     |
+  ||  SD/eMMC   | | CA72 CPUs | |  AP MSS   |                    +-------------+                     |
+  ||  MCI-0/1   | |           | |           |                    | Memory      |                     |
+  |+------+-----+ +--+--------+ +--------+--+  +------------+    | Controller  |     +-------------+ |
+  |       |          |                   |     |            +----- Translaton  |     |AP           | |
+  |       |          |                   |     |            |    +-------------+     |Configuration| |
+  |       |          |                   +-----+            +-------------------------Space        | |
+  |       |          | +-------------+         |  CCU       |                        +-------------+ |
+  |       |          | | MMU         +---------+  Windows   |   +-----------+        +-------------+ |
+  |       |          +-| translation |         |  Lookup    +----           +---------   AP SPI    | |
+  |       |            +-------------+         |            |   |           |        +-------------+ |
+  |       |            +-------------+         |            |   |  IO       |        +-------------+ |
+  |       +------------| SMMU        +---------+            |   |  Windows  +---------  AP MCI0/1  | |
+  |                    | translation |         +------------+   |  Lookup   |        +-------------+ |
+  |                    +---------+---+                          |           |        +-------------+ |
+  |             -                |                              |           +---------   AP STM    | |
+  |             +-----------------                              |           |        +-------------+ |
+  | AP          |                |                              +-+---------+                        |
+  +---------------------------------------------------------------|----------------------------------+
+  +-------------|-------------------------------------------------|----------------------------------+
+  | CP          |            +-------------+               +------+-----+      +-------------------+ |
+  |             |            |             |               |            +-------   SB CFG Space    | |
+  |             |            |   DIOB      |               |            |      +-------------------+ |
+  |             |            |   Windows   -----------------  IOB       |      +-------------------+ |
+  |             |            |   Control   |               |  Windows   +------| SB PCIe-0 - PCIe2 | |
+  |             |            |             |               |  Lookup    |      +-------------------+ |
+  |             |            +------+------+               |            |      +-------------------+ |
+  |             |                   |                      |            +------+      SB NAND      | |
+  |             |                   |                      +------+-----+      +-------------------+ |
+  |             |                   |                             |                                  |
+  |             |                   |                             |                                  |
+  |   +------------------+   +------------+                +------+-----+      +-------------------+ |
+  |   | Network Engine   |   |            |                |            +-------  SB SPI-0/SPI-1   | |
+  |   | Security Engine  |   | PCIe, MSS  |                |  RUNIT     |      +-------------------+ |
+  |   | SATA, USB        |   | DMA        |                |  Windows   |      +-------------------+ |
+  |   | SD/eMMC          |   |            |                |  Lookup    +-------   SB Device Bus   | |
+  |   | TDM, I2C         |   |            |                |            |      +-------------------+ |
+  |   +------------------+   +------------+                +------------+                            |
+  |                                                                                                  |
+  +--------------------------------------------------------------------------------------------------+
diff --git a/docs/plat/marvell/misc/mvebu-a8k-addr-map.txt b/docs/plat/marvell/misc/mvebu-a8k-addr-map.txt
deleted file mode 100644
index 586e8b7..0000000
--- a/docs/plat/marvell/misc/mvebu-a8k-addr-map.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-Address decoding flow and address translation units of Marvell Armada 8K SoC family
-
-+--------------------------------------------------------------------------------------------------+
-|                                                              +-------------+    +--------------+ |
-|                                                              | Memory      +-----   DRAM CS    | |
-|+------------+ +-----------+ +-----------+                    | Controller  |    +--------------+ |
-||  AP DMA    | |           | |           |                    +-------------+                     |
-||  SD/eMMC   | | CA72 CPUs | |  AP MSS   |                    +-------------+                     |
-||  MCI-0/1   | |           | |           |                    | Memory      |                     |
-|+------+-----+ +--+--------+ +--------+--+  +------------+    | Controller  |     +-------------+ |
-|       |          |                   |     |            +----- Translaton  |     |AP           | |
-|       |          |                   |     |            |    +-------------+     |Configuration| |
-|       |          |                   +-----+            +-------------------------Space        | |
-|       |          | +-------------+         |  CCU       |                        +-------------+ |
-|       |          | | MMU         +---------+  Windows   |   +-----------+        +-------------+ |
-|       |          +-| translation |         |  Lookup    +----           +---------   AP SPI    | |
-|       |            +-------------+         |            |   |           |        +-------------+ |
-|       |            +-------------+         |            |   |  IO       |        +-------------+ |
-|       +------------| SMMU        +---------+            |   |  Windows  +---------  AP MCI0/1  | |
-|                    | translation |         +------------+   |  Lookup   |        +-------------+ |
-|                    +---------+---+                          |           |        +-------------+ |
-|             -                |                              |           +---------   AP STM    | |
-|             +-----------------                              |           |        +-------------+ |
-| AP          |                |                              +-+---------+                        |
-+---------------------------------------------------------------|----------------------------------+
-+-------------|-------------------------------------------------|----------------------------------+
-| CP          |            +-------------+               +------+-----+      +-------------------+ |
-|             |            |             |               |            +-------   SB CFG Space    | |
-|             |            |   DIOB      |               |            |      +-------------------+ |
-|             |            |   Windows   -----------------  IOB       |      +-------------------+ |
-|             |            |   Control   |               |  Windows   +------| SB PCIe-0 - PCIe2 | |
-|             |            |             |               |  Lookup    |      +-------------------+ |
-|             |            +------+------+               |            |      +-------------------+ |
-|             |                   |                      |            +------+      SB NAND      | |
-|             |                   |                      +------+-----+      +-------------------+ |
-|             |                   |                             |                                  |
-|             |                   |                             |                                  |
-|   +------------------+   +------------+                +------+-----+      +-------------------+ |
-|   | Network Engine   |   |            |                |            +-------  SB SPI-0/SPI-1   | |
-|   | Security Engine  |   | PCIe, MSS  |                |  RUNIT     |      +-------------------+ |
-|   | SATA, USB        |   | DMA        |                |  Windows   |      +-------------------+ |
-|   | SD/eMMC          |   |            |                |  Lookup    +-------   SB Device Bus   | |
-|   | TDM, I2C         |   |            |                |            |      +-------------------+ |
-|   +------------------+   +------------+                +------------+                            |
-|                                                                                                  |
-+--------------------------------------------------------------------------------------------------+
-
diff --git a/docs/plat/marvell/misc/mvebu-amb.rst b/docs/plat/marvell/misc/mvebu-amb.rst
new file mode 100644
index 0000000..d734003
--- /dev/null
+++ b/docs/plat/marvell/misc/mvebu-amb.rst
@@ -0,0 +1,58 @@
+AMB - AXI MBUS address decoding
+===============================
+
+AXI to M-bridge decoding unit driver for Marvell Armada 8K and 8K+ SoCs.
+
+The Runit offers a second level of address windows lookup. It is used to map
+transaction towards the CD BootROM, SPI0, SPI1 and Device bus (NOR).
+
+The Runit contains eight configurable windows. Each window defines a contiguous,
+address space and the properties associated with that address space.
+
+::
+
+  Unit        Bank         ATTR
+  Device-Bus  DEV_BOOT_CS  0x2F
+              DEV_CS0      0x3E
+              DEV_CS1      0x3D
+              DEV_CS2      0x3B
+              DEV_CS3      0x37
+  SPI-0       SPI_A_CS0    0x1E
+              SPI_A_CS1    0x5E
+              SPI_A_CS2    0x9E
+              SPI_A_CS3    0xDE
+              SPI_A_CS4    0x1F
+              SPI_A_CS5    0x5F
+              SPI_A_CS6    0x9F
+              SPI_A_CS7    0xDF
+  SPI         SPI_B_CS0    0x1A
+              SPI_B_CS1    0x5A
+              SPI_B_CS2    0x9A
+              SPI_B_CS3    0xDA
+  BOOT_ROM    BOOT_ROM     0x1D
+  UART        UART         0x01
+
+Mandatory functions
+-------------------
+
+- marvell_get_amb_memory_map
+    Returns the AMB windows configuration and the number of windows
+
+Mandatory structures
+--------------------
+
+- amb_memory_map
+    Array that include the configuration of the windows. Every window/entry is a
+    struct which has 2 parameters:
+
+      - Base address of the window
+      - Attribute of the window
+
+Examples
+--------
+
+.. code:: c
+
+    struct addr_map_win amb_memory_map[] = {
+        {0xf900,	AMB_DEV_CS0_ID},
+    };
diff --git a/docs/plat/marvell/misc/mvebu-amb.txt b/docs/plat/marvell/misc/mvebu-amb.txt
deleted file mode 100644
index 2a7a41e..0000000
--- a/docs/plat/marvell/misc/mvebu-amb.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-AMB - AXI MBUS address decoding
--------------------------------
-
-AXI to M-bridge decoding unit driver for Marvell Armada 8K and 8K+ SoCs.
-
-- The Runit offers a second level of address windows lookup. It is used to map transaction towards
-the CD BootROM, SPI0, SPI1 and Device bus (NOR).
-- The Runit contains eight configurable windows. Each window defines a contiguous,
-address space and the properties associated with that address space.
-
-Unit		Bank		ATTR
-Device-Bus	DEV_BOOT_CS 	0x2F
-		DEV_CS0     	0x3E
-		DEV_CS1     	0x3D
-		DEV_CS2     	0x3B
-		DEV_CS3     	0x37
-SPI-0		SPI_A_CS0 	0x1E
-		SPI_A_CS1 	0x5E
-		SPI_A_CS2 	0x9E
-		SPI_A_CS3 	0xDE
-		SPI_A_CS4 	0x1F
-		SPI_A_CS5 	0x5F
-		SPI_A_CS6 	0x9F
-		SPI_A_CS7 	0xDF
-SPI1		SPI_B_CS0 	0x1A
-		SPI_B_CS1 	0x5A
-		SPI_B_CS2	0x9A
-		SPI_B_CS3	0xDA
-BOOT_ROM	BOOT_ROM	0x1D
-UART		UART		0x01
-
-Mandatory functions:
-	- marvell_get_amb_memory_map
-		returns the AMB windows configuration and the number of windows
-
-Mandatory structures:
-	amb_memory_map - Array that include the configuration of the windows
-	  every window/entry is a struct which has 2 parameters:
-	  - base address of the window
-	  - Attribute of the window
-
-Examples:
-	struct addr_map_win amb_memory_map[] = {
-		{0xf900,	AMB_DEV_CS0_ID},
-	};
diff --git a/docs/plat/marvell/misc/mvebu-ccu.rst b/docs/plat/marvell/misc/mvebu-ccu.rst
new file mode 100644
index 0000000..5bac11f
--- /dev/null
+++ b/docs/plat/marvell/misc/mvebu-ccu.rst
@@ -0,0 +1,33 @@
+Marvell CCU address decoding bindings
+=====================================
+
+CCU configration driver (1st stage address translation) for Marvell Armada 8K and 8K+ SoCs.
+
+The CCU node includes a description of the address decoding configuration.
+
+Mandatory functions
+-------------------
+
+- marvell_get_ccu_memory_map
+    Return the CCU windows configuration and the number of windows of the
+    specific AP.
+
+Mandatory structures
+--------------------
+
+- ccu_memory_map
+    Array that includes the configuration of the windows. Every window/entry is
+    a struct which has 3 parameters:
+
+      - Base address of the window
+      - Size of the window
+      - Target-ID of the window
+
+Example
+-------
+
+.. code:: c
+
+	struct addr_map_win ccu_memory_map[] = {
+		{0x00000000f2000000,     0x00000000e000000,      IO_0_TID}, /* IO window */
+	};
diff --git a/docs/plat/marvell/misc/mvebu-ccu.txt b/docs/plat/marvell/misc/mvebu-ccu.txt
deleted file mode 100644
index 9764027..0000000
--- a/docs/plat/marvell/misc/mvebu-ccu.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Marvell CCU address decoding bindings
-=====================================
-
-CCU configration driver (1st stage address translation) for Marvell Armada 8K and 8K+ SoCs.
-
-The CCU node includes a description of the address decoding configuration.
-
-Mandatory functions:
-	- marvell_get_ccu_memory_map
-		return the CCU windows configuration and the number of windows
-		of the specific AP.
-
-Mandatory structures:
-	ccu_memory_map - Array that includes the configuration of the windows
-	  every window/entry is a struct which has 3 parameters:
-	  - Base address of the window
-	  - Size of the window
-	  - Target-ID of the window
-
-Example:
-	struct addr_map_win ccu_memory_map[] = {
-		{0x00000000f2000000,     0x00000000e000000,      IO_0_TID}, /* IO window */
-	};
diff --git a/docs/plat/marvell/misc/mvebu-io-win.rst b/docs/plat/marvell/misc/mvebu-io-win.rst
new file mode 100644
index 0000000..52845ca
--- /dev/null
+++ b/docs/plat/marvell/misc/mvebu-io-win.rst
@@ -0,0 +1,46 @@
+Marvell IO WIN address decoding bindings
+========================================
+
+IO Window configration driver (2nd stage address translation) for Marvell Armada 8K and 8K+ SoCs.
+
+The IO WIN includes a description of the address decoding configuration.
+
+Transactions that are decoded by CCU windows as IO peripheral, have an additional
+layer of decoding. This additional address decoding layer defines one of the
+following targets:
+
+- **0x0** = BootRom
+- **0x1** = STM (Serial Trace Macro-cell, a programmer's port into trace stream)
+- **0x2** = SPI direct access
+- **0x3** = PCIe registers
+- **0x4** = MCI Port
+- **0x5** = PCIe port
+
+Mandatory functions
+-------------------
+
+- marvell_get_io_win_memory_map
+    Returns the IO windows configuration and the number of windows of the
+    specific AP.
+
+Mandatory structures
+--------------------
+
+- io_win_memory_map
+    Array that include the configuration of the windows. Every window/entry is
+    a struct which has 3 parameters:
+
+	  - Base address of the window
+	  - Size of the window
+	  - Target-ID of the window
+
+Example
+-------
+
+.. code:: c
+
+	struct addr_map_win io_win_memory_map[] = {
+		{0x00000000fe000000,	0x000000001f00000,	PCIE_PORT_TID}, /* PCIe window 31Mb for PCIe port*/
+		{0x00000000ffe00000,	0x000000000100000,	PCIE_REGS_TID}, /* PCI-REG window 64Kb for PCIe-reg*/
+		{0x00000000f6000000,	0x000000000100000,	MCIPHY_TID},	/* MCI window  1Mb for PHY-reg*/
+	};
diff --git a/docs/plat/marvell/misc/mvebu-io-win.txt b/docs/plat/marvell/misc/mvebu-io-win.txt
deleted file mode 100644
index c83ad1f..0000000
--- a/docs/plat/marvell/misc/mvebu-io-win.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Marvell IO WIN address decoding bindings
-=====================================
-
-IO Window configration driver (2nd stage address translation) for Marvell Armada 8K and 8K+ SoCs.
-
-The IO WIN includes a description of the address decoding configuration.
-
-Transactions that are decoded by CCU windows as IO peripheral, have an additional
-layer of decoding. This additional address decoding layer defines one of the
-following targets:
-	0x0 = BootRom
-	0x1 = STM (Serial Trace Macro-cell, a programmer's port into trace stream)
-	0x2 = SPI direct access
-	0x3 = PCIe registers
-	0x4 = MCI Port
-	0x5 = PCIe port
-
-Mandatory functions:
-	- marvell_get_io_win_memory_map
-		returns the IO windows configuration and the number of windows
-		of the specific AP.
-
-Mandatory structures:
-	io_win_memory_map - Array that include the configuration of the windows
-	  every window/entry is a struct which has 3 parameters:
-	  - Base address of the window
-	  - Size of the window
-	  - Target-ID of the window
-
-Example:
-	struct addr_map_win io_win_memory_map[] = {
-		{0x00000000fe000000,	0x000000001f00000,	PCIE_PORT_TID}, /* PCIe window 31Mb for PCIe port*/
-		{0x00000000ffe00000,	0x000000000100000,	PCIE_REGS_TID}, /* PCI-REG window 64Kb for PCIe-reg*/
-		{0x00000000f6000000,	0x000000000100000,	MCIPHY_TID},	/* MCI window  1Mb for PHY-reg*/
-	};
diff --git a/docs/plat/marvell/misc/mvebu-iob.rst b/docs/plat/marvell/misc/mvebu-iob.rst
new file mode 100644
index 0000000..d02a7e8
--- /dev/null
+++ b/docs/plat/marvell/misc/mvebu-iob.rst
@@ -0,0 +1,52 @@
+Marvell IOB address decoding bindings
+=====================================
+
+IO bridge configration driver (3rd stage address translation) for Marvell Armada 8K and 8K+ SoCs.
+
+The IOB includes a description of the address decoding configuration.
+
+IOB supports up to n (in CP110 n=24) windows for external memory transaction.
+When a transaction passes through the IOB, its address is compared to each of
+the enabled windows. If there is a hit and it passes the security checks, it is
+advanced to the target port.
+
+Mandatory functions
+-------------------
+
+- marvell_get_iob_memory_map
+     Returns the IOB windows configuration and the number of windows
+
+Mandatory structures
+--------------------
+
+- iob_memory_map
+     Array that includes the configuration of the windows. Every window/entry is
+     a struct which has 3 parameters:
+
+       - Base address of the window
+       - Size of the window
+       - Target-ID of the window
+
+Target ID options
+-----------------
+
+- **0x0** = Internal configuration space
+- **0x1** = MCI0
+- **0x2** = PEX1_X1
+- **0x3** = PEX2_X1
+- **0x4** = PEX0_X4
+- **0x5** = NAND flash
+- **0x6** = RUNIT (NOR/SPI/BootRoom)
+- **0x7** = MCI1
+
+Example
+-------
+
+.. code:: c
+
+	struct addr_map_win iob_memory_map[] = {
+		{0x00000000f7000000,	0x0000000001000000,	PEX1_TID}, /* PEX1_X1 window */
+		{0x00000000f8000000,	0x0000000001000000,	PEX2_TID}, /* PEX2_X1 window */
+		{0x00000000f6000000,	0x0000000001000000,	PEX0_TID}, /* PEX0_X4 window */
+		{0x00000000f9000000,	0x0000000001000000,	NAND_TID}  /* NAND window */
+	};
diff --git a/docs/plat/marvell/misc/mvebu-iob.txt b/docs/plat/marvell/misc/mvebu-iob.txt
deleted file mode 100644
index 97ec09d..0000000
--- a/docs/plat/marvell/misc/mvebu-iob.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-Marvell IOB address decoding bindings
-=====================================
-
-IO bridge configration driver (3rd stage address translation) for Marvell Armada 8K and 8K+ SoCs.
-
-The IOB includes a description of the address decoding configuration.
-
-IOB supports up to n (in CP110 n=24) windows for external memory transaction.
-When a transaction passes through the IOB, its address is compared to each of
-the enabled windows. If there is a hit and it passes the security checks, it is
-advanced to the target port.
-
-Mandatory functions:
-	- marvell_get_iob_memory_map
-		returns the IOB windows configuration and the number of windows
-
-Mandatory structures:
-	iob_memory_map - Array that include the configuration of the windows
-	  every window/entry is a struct which has 3 parameters:
-	  - Base address of the window
-	  - Size of the window
-	  - Target-ID of the window
-
-Target ID options:
-	- 0x0 = Internal configuration space
-	- 0x1 = MCI0
-	- 0x2 = PEX1_X1
-	- 0x3 = PEX2_X1
-	- 0x4 = PEX0_X4
-	- 0x5 = NAND flash
-	- 0x6 = RUNIT (NOR/SPI/BootRoom)
-	- 0x7 = MCI1
-
-Example:
-	struct addr_map_win iob_memory_map[] = {
-		{0x00000000f7000000,	0x0000000001000000,	PEX1_TID}, /* PEX1_X1 window */
-		{0x00000000f8000000,	0x0000000001000000,	PEX2_TID}, /* PEX2_X1 window */
-		{0x00000000f6000000,	0x0000000001000000,	PEX0_TID}, /* PEX0_X4 window */
-		{0x00000000f9000000,	0x0000000001000000,	NAND_TID}  /* NAND window */
-	};
diff --git a/docs/plat/marvell/porting.rst b/docs/plat/marvell/porting.rst
new file mode 100644
index 0000000..8fc1c1f
--- /dev/null
+++ b/docs/plat/marvell/porting.rst
@@ -0,0 +1,163 @@
+TF-A Porting Guide for Marvell Platforms
+========================================
+
+This section describes how to port TF-A to a customer board, assuming that the
+SoC being used is already supported in TF-A.
+
+
+Source Code Structure
+---------------------
+
+- The customer platform specific code shall reside under ``plat/marvell/<soc family>/<soc>_cust``
+  (e.g. 'plat/marvell/a8k/a7040_cust').
+- The platform name for build purposes is called ``<soc>_cust`` (e.g. ``a7040_cust``).
+- The build system will reuse all files from within the soc directory, and take only the porting
+  files from the customer platform directory.
+
+Files that require porting are located at ``plat/marvell/<soc family>/<soc>_cust`` directory.
+
+
+Armada-70x0/Armada-80x0 Porting
+-------------------------------
+
+SoC Physical Address Map (marvell_plat_config.c)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This file describes the SoC physical memory mapping to be used for the CCU,
+IOWIN, AXI-MBUS and IOB address decode units (Refer to the functional spec for
+more details).
+
+In most cases, using the default address decode windows should work OK.
+
+In cases where a special physical address map is needed (e.g. Special size for
+PCIe MEM windows, large memory mapped SPI flash...), then porting of the SoC
+memory map is required.
+
+.. note::
+   For a detailed information on how CCU, IOWIN, AXI-MBUS & IOB work, please
+   refer to the SoC functional spec, and under
+   ``docs/marvell/misc/mvebu-[ccu/iob/amb/io-win].txt`` files.
+
+boot loader recovery (marvell_plat_config.c)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Background:
+
+  Boot rom can skip the current image and choose to boot from next position if a
+  specific value (``0xDEADB002``) is returned by the ble main function. This
+  feature is used for boot loader recovery by booting from a valid flash-image
+  saved in next position on flash (e.g. address 2M in SPI flash).
+
+  Supported options to implement the skip request are:
+    - GPIO
+    - I2C
+    - User defined
+
+- Porting:
+
+  Under marvell_plat_config.c, implement struct skip_image that includes
+  specific board parameters.
+
+  .. warning::
+     To disable this feature make sure the struct skip_image is not implemented.
+
+- Example:
+
+In A7040-DB specific implementation
+(``plat/marvell/a8k/a70x0/board/marvell_plat_config.c``), the image skip is
+implemented using GPIO: mpp 33 (SW5).
+
+Before resetting the board make sure there is a valid image on the next flash
+address:
+
+ -tftp [valid address] flash-image.bin
+ -sf update [valid address] 0x2000000 [size]
+
+Press reset and keep pressing the button connected to the chosen GPIO pin. A
+skip image request message is printed on the screen and boot rom boots from the
+saved image at the next position.
+
+DDR Porting (dram_port.c)
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This file defines the dram topology and parameters of the target board.
+
+The DDR code is part of the BLE component, which is an extension of ARM Trusted
+Firmware (TF-A).
+
+The DDR driver called mv_ddr is released separately apart from TF-A sources.
+
+The BLE and consequently, the DDR init code is executed at the early stage of
+the boot process.
+
+Each supported platform of the TF-A has its own DDR porting file called
+dram_port.c located at ``atf/plat/marvell/a8k/<platform>/board`` directory.
+
+Please refer to '<path_to_mv_ddr_sources>/doc/porting_guide.txt' for detailed
+porting description.
+
+The build target directory is "build/<platform>/release/ble".
+
+Comphy Porting (phy-porting-layer.h or phy-default-porting-layer.h)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Background:
+    Some of the comphy's parameters value depend on the HW connection between
+    the SoC and the PHY. Every board type has specific HW characteristics like
+    wire length. Due to those differences some comphy parameters vary between
+    board types. Therefore each board type can have its own list of values for
+    all relevant comphy parameters. The PHY porting layer specifies which
+    parameters need to be suited and the board designer should provide relevant
+    values.
+
+    .. seealso::
+        For XFI/SFI comphy type there is procedure "rx_training" which eases
+        process of suiting some of the parameters. Please see :ref:`uboot_cmd`
+        section: rx_training.
+
+    The PHY porting layer simplifies updating static values per board type,
+    which are now grouped in one place.
+
+    .. note::
+        The parameters for the same type of comphy may vary even for the same
+        board type, it is because the lanes from comphy-x to some PHY may have
+        different HW characteristic than lanes from comphy-y to the same
+        (multiplexed) or other PHY.
+
+- Porting:
+    The porting layer for PHY was introduced in TF-A. There is one file
+    ``drivers/marvell/comphy/phy-default-porting-layer.h`` which contains the
+    defaults. Those default parameters are used only if there is no appropriate
+    phy-porting-layer.h file under: ``plat/marvell/<soc
+    family>/<platform>/board/phy-porting-layer.h``. If the phy-porting-layer.h
+    exists, the phy-default-porting-layer.h is not going to be included.
+
+    .. warning::
+        Not all comphy types are already reworked to support the PHY porting
+        layer, currently the porting layer is supported for XFI/SFI and SATA
+        comphy types.
+
+    The easiest way to prepare the PHY porting layer for custom board is to copy
+    existing example to a new platform:
+
+    - cp ``plat/marvell/a8k/a80x0/board/phy-porting-layer.h`` "plat/marvell/<soc family>/<platform>/board/phy-porting-layer.h"
+    - adjust relevant parameters or
+    - if different comphy index is used for specific feature, move it to proper table entry and then adjust.
+
+    .. note::
+        The final table size with comphy parameters can be different, depending
+        on the CP module count for given SoC type.
+
+- Example:
+    Example porting layer for armada-8040-db is under:
+    ``plat/marvell/a8k/a80x0/board/phy-porting-layer.h``
+
+    .. note::
+        If there is no PHY porting layer for new platform (missing
+        phy-porting-layer.h), the default values are used
+        (drivers/marvell/comphy/phy-default-porting-layer.h) and the user is
+        warned:
+
+    .. warning::
+        "Using default comphy parameters - it may be required to suit them for
+        your board".
diff --git a/docs/plat/marvell/porting.txt b/docs/plat/marvell/porting.txt
deleted file mode 100644
index f9a39a0..0000000
--- a/docs/plat/marvell/porting.txt
+++ /dev/null
@@ -1,118 +0,0 @@
-.. _porting:
-
-TF-A Porting Guide
-=================
-
-This section describes how to port TF-A to a customer board, assuming that the SoC being used is already supported
-in TF-A.
-
-
-Source Code Structure
----------------------
-- The customer platform specific code shall reside under "plat/marvell/<soc family>/<soc>_cust"
-	(e.g. 'plat/marvell/a8k/a7040_cust').
-- The platform name for build purposes is called "<soc>_cust" (e.g. a7040_cust).
-- The build system will reuse all files from within the soc directory, and take only the porting
-  files from the customer platform directory.
-
-Files that require porting are located at "plat/marvell/<soc family>/<soc>_cust" directory.
-
-
-Armada-70x0/Armada-80x0 Porting
--------------------------------
-
-  - SoC Physical Address Map (marvell_plat_config.c):
-	- This file describes the SoC physical memory mapping to be used for the CCU, IOWIN, AXI-MBUS and IOB
-	  address decode units (Refer to the functional spec for more details).
-	- In most cases, using the default address decode windows should work OK.
-	- In cases where a special physical address map is needed (e.g. Special size for PCIe MEM windows,
-	  large memory mapped SPI flash...), then porting of the SoC memory map is required.
-	- Note: For a detailed information on how CCU, IOWIN, AXI-MBUS & IOB work, please refer to the SoC functional spec,
-	  and under "docs/marvell/misc/mvebu-[ccu/iob/amb/io-win].txt" files.
-
-  - boot loader recovery (marvell_plat_config.c):
-	- Background:
-		boot rom can skip the current image and choose to boot from next position if a specific value
-		(0xDEADB002) is returned by the ble main function. This feature is used for boot loader recovery
-		by booting from a valid flash-image saved in next position on flash (e.g. address 2M in SPI flash).
-
-		Supported options to implement the skip request are:
-			- GPIO
-			- I2C
-			- User defined
-
-	- Porting:
-		Under marvell_plat_config.c, implement struct skip_image that includes specific board parameters.
-		.. warning:: to disable this feature make sure the struct skip_image is not implemented.
-
-	- Example:
-		In A7040-DB specific implementation (plat/marvell/a8k/a70x0/board/marvell_plat_config.c),
-		the image skip is implemented using GPIO: mpp 33 (SW5).
-
-		Before resetting the board make sure there is a valid image on the next flash address:
-			-tftp [valid address] flash-image.bin
-			-sf update [valid address] 0x2000000 [size]
-
-		Press reset and keep pressing the button connected to the chosen GPIO pin. A skip image request
-		message is printed on the screen and boot rom boots from the saved image at the next position.
-
-  - DDR Porting (dram_port.c):
-	- This file defines the dram topology and parameters of the target board.
-	- The DDR code is part of the BLE component, which is an extension of ARM Trusted Firmware (TF-A).
-	- The DDR driver called mv_ddr is released separately apart from TF-A sources.
-	- The BLE and consequently, the DDR init code is executed at the early stage of the boot process.
-	- Each supported platform of the TF-A has its own DDR porting file called dram_port.c located at
-	  ``atf/plat/marvell/a8k/<platform>/board`` directory.
-	- Please refer to '<path_to_mv_ddr_sources>/doc/porting_guide.txt' for detailed porting description.
-	- The build target directory is "build/<platform>/release/ble".
-
-  - Comphy Porting (phy-porting-layer.h or phy-default-porting-layer.h)
-	- Background:
-		Some of the comphy's parameters value depend on the HW connection between the SoC and the PHY. Every
-		board type has specific HW characteristics like wire length. Due to those differences some comphy
-		parameters vary between board types. Therefore each board type can have its own list of values for
-		all relevant comphy parameters. The PHY porting layer specifies which parameters need to be suited and
-		the board designer should provide relevant values.
-
-		.. seealso::
-			For XFI/SFI comphy type there is procedure "rx_training" which eases process of suiting some of
-			the parameters. Please see :ref:`uboot_cmd` section: rx_training.
-
-		The PHY porting layer simplifies updating static values per board type, which are now grouped in one place.
-
-		.. note::
-			The parameters for the same type of comphy may vary even for the same board type, it is because
-			the lanes from comphy-x to some PHY may have different HW characteristic than lanes from
-			comphy-y to the same (multiplexed) or other PHY.
-
-	- Porting:
-		The porting layer for PHY was introduced in TF-A. There is one file
-		``drivers/marvell/comphy/phy-default-porting-layer.h`` which contains the defaults. Those default
-		parameters are used only if there is no appropriate phy-porting-layer.h file under:
-		``plat/marvell/<soc family>/<platform>/board/phy-porting-layer.h``. If the phy-porting-layer.h exists,
-		the phy-default-porting-layer.h is not going to be included.
-
-		.. warning::
-			Not all comphy types are already reworked to support the PHY porting layer, currently the porting
-			layer is supported for XFI/SFI and SATA comphy types.
-
-		The easiest way to prepare the PHY porting layer for custom board is to copy existing example to a new
-		platform:
-
-		- cp ``plat/marvell/a8k/a80x0/board/phy-porting-layer.h`` "plat/marvell/<soc family>/<platform>/board/phy-porting-layer.h"
-		- adjust relevant parameters or
-		- if different comphy index is used for specific feature, move it to proper table entry and then adjust.
-
-		.. note::
-			The final table size with comphy parameters can be different, depending on the CP module count for
-			given SoC type.
-
-	- Example:
-		Example porting layer for armada-8040-db is under: ``plat/marvell/a8k/a80x0/board/phy-porting-layer.h``
-
-		.. note::
-			If there is no PHY porting layer for new platform (missing phy-porting-layer.h), the default
-			values are used (drivers/marvell/comphy/phy-default-porting-layer.h) and the user is warned:
-
-		.. warning::
-			"Using default comphy parameters - it may be required to suit them for your board".
diff --git a/docs/plat/rpi4.rst b/docs/plat/rpi4.rst
new file mode 100644
index 0000000..0f529c1
--- /dev/null
+++ b/docs/plat/rpi4.rst
@@ -0,0 +1,85 @@
+Raspberry Pi 4
+==============
+
+The `Raspberry Pi 4`_ is an inexpensive single-board computer that contains four
+Arm Cortex-A72 cores. Also in contrast to previous Raspberry Pi versions this
+model has a GICv2 interrupt controller.
+
+This port is a minimal port to support loading non-secure EL2 payloads such
+as a 64-bit Linux kernel. Other payloads such as U-Boot or EDK-II should work
+as well, but have not been tested at this point.
+
+**IMPORTANT NOTE**: This port isn't secure. All of the memory used is DRAM,
+which is available from both the Non-secure and Secure worlds. The SoC does
+not seem to feature a secure memory controller of any kind, so portions of
+DRAM can't be protected properly from the Non-secure world.
+
+Build Instructions
+------------------
+
+There are no real configuration options at this point, so there is only
+one universal binary (bl31.bin), which can be built with:
+
+.. code:: shell
+
+    CROSS_COMPILE=aarch64-linux-gnu- make PLAT=rpi4 DEBUG=1
+
+Copy the generated build/rpi4/debug/bl31.bin to the SD card, either
+renaming it to ``armstub8.bin`` or adding an entry starting with ``armstub=``,
+then followed by the respective file name to ``config.txt``.
+You should have AArch64 code in the file loaded as the "kernel", as BL31
+will drop into AArch64/EL2 to the respective load address.
+arm64 Linux kernels are known to work this way.
+
+Other options that should be set in ``config.txt`` to properly boot 64-bit
+kernels are:
+
+::
+
+    enable_uart=1
+    arm_64bit=1
+    enable_gic=1
+
+The BL31 code will patch the provided device tree blob in memory to advertise
+PSCI support, also will add a reserved-memory node to the DT to tell the
+non-secure payload to not touch the resident TF-A code.
+
+If you connect a serial cable between the Mini UART and your computer, and
+connect to it (for example, with ``screen /dev/ttyUSB0 115200``) you should
+see some text from BL31, followed by the output of the EL2 payload.
+The command line provided is read from the ``cmdline.txt`` file on the SD card.
+
+TF-A port design
+----------------
+
+In contrast to the existing Raspberry Pi 3 port this one here is a BL31-only
+port, also it deviates quite a lot from the RPi3 port in many other ways.
+There is not so much difference between the two models, so eventually those
+two could be (more) unified in the future.
+
+As with the previous models, the GPU and its firmware are the first entity to
+run after the SoC gets its power. The on-chip Boot ROM loads the next stage
+(bootcode.bin) from flash (EEPROM), which is again GPU code.
+This part knows how to access the MMC controller and how to parse a FAT
+filesystem, so it will load further compononents and configuration files
+from the first FAT partition on the SD card.
+
+To accommodate this existing way of configuring and setting up the board,
+we use as much of this workflow as possible.
+If bootcode.bin finds a file called ``armstub8.bin`` on the SD card or it gets
+pointed to such code by finding a ``armstub=`` key in ``config.txt``, it will
+load this file to the beginning of DRAM (address 0) and execute it in
+AArch64 EL3.
+But before doing that, it will also load a "kernel" and the device tree into
+memory. The load addresses have a default, but can also be changed by
+setting them in ``config.txt``. If the GPU firmware finds a magic value in the
+armstub image file, it will put those two load addresses in memory locations
+near the beginning of memory, where TF-A code picks them up.
+
+To keep things simple, we will just use the kernel load address as the BL33
+entry point, also put the DTB address in the x0 register, as requested by
+the arm64 Linux kernel boot protocol. This does not necessarily mean that
+the EL2 payload needs to be a Linux kernel, a bootloader or any other kernel
+would work as well, as long as it can cope with having the DT address in
+register x0. If the payload has other means of finding the device tree, it
+could ignore this address as well.
diff --git a/drivers/arm/gic/v3/gicv3_main.c b/drivers/arm/gic/v3/gicv3_main.c
index 94a20ba..fb49a57 100644
--- a/drivers/arm/gic/v3/gicv3_main.c
+++ b/drivers/arm/gic/v3/gicv3_main.c
@@ -16,7 +16,6 @@
 #include "gicv3_private.h"
 
 const gicv3_driver_data_t *gicv3_driver_data;
-static unsigned int gicv2_compat;
 
 /*
  * Spinlock to guard registers needing read-modify-write. APIs protected by this
@@ -60,51 +59,61 @@
 void __init gicv3_driver_init(const gicv3_driver_data_t *plat_driver_data)
 {
 	unsigned int gic_version;
+	unsigned int gicv2_compat;
 
 	assert(plat_driver_data != NULL);
 	assert(plat_driver_data->gicd_base != 0U);
-	assert(plat_driver_data->gicr_base != 0U);
 	assert(plat_driver_data->rdistif_num != 0U);
 	assert(plat_driver_data->rdistif_base_addrs != NULL);
 
 	assert(IS_IN_EL3());
 
-	assert(plat_driver_data->interrupt_props_num > 0 ?
-	       plat_driver_data->interrupt_props != NULL : 1);
+	assert((plat_driver_data->interrupt_props_num != 0U) ?
+	       (plat_driver_data->interrupt_props != NULL) : 1);
 
 	/* Check for system register support */
-#ifdef __aarch64__
+#ifndef __aarch64__
+	assert((read_id_pfr1() &
+			(ID_PFR1_GIC_MASK << ID_PFR1_GIC_SHIFT)) != 0U);
+#else
 	assert((read_id_aa64pfr0_el1() &
 			(ID_AA64PFR0_GIC_MASK << ID_AA64PFR0_GIC_SHIFT)) != 0U);
-#else
-	assert((read_id_pfr1() & (ID_PFR1_GIC_MASK << ID_PFR1_GIC_SHIFT)) != 0U);
-#endif /* __aarch64__ */
+#endif /* !__aarch64__ */
 
 	/* The GIC version should be 3.0 */
 	gic_version = gicd_read_pidr2(plat_driver_data->gicd_base);
-	gic_version >>=	PIDR2_ARCH_REV_SHIFT;
+	gic_version >>= PIDR2_ARCH_REV_SHIFT;
 	gic_version &= PIDR2_ARCH_REV_MASK;
 	assert(gic_version == ARCH_REV_GICV3);
 
 	/*
-	 * Find out whether the GIC supports the GICv2 compatibility mode. The
-	 * ARE_S bit resets to 0 if supported
+	 * Find out whether the GIC supports the GICv2 compatibility mode.
+	 * The ARE_S bit resets to 0 if supported
 	 */
 	gicv2_compat = gicd_read_ctlr(plat_driver_data->gicd_base);
 	gicv2_compat >>= CTLR_ARE_S_SHIFT;
-	gicv2_compat = !(gicv2_compat & CTLR_ARE_S_MASK);
+	gicv2_compat = gicv2_compat & CTLR_ARE_S_MASK;
 
-	/*
-	 * Find the base address of each implemented Redistributor interface.
-	 * The number of interfaces should be equal to the number of CPUs in the
-	 * system. The memory for saving these addresses has to be allocated by
-	 * the platform port
-	 */
-	gicv3_rdistif_base_addrs_probe(plat_driver_data->rdistif_base_addrs,
-					   plat_driver_data->rdistif_num,
-					   plat_driver_data->gicr_base,
-					   plat_driver_data->mpidr_to_core_pos);
-
+	if (plat_driver_data->gicr_base != 0U) {
+		/*
+		 * Find the base address of each implemented Redistributor interface.
+		 * The number of interfaces should be equal to the number of CPUs in the
+		 * system. The memory for saving these addresses has to be allocated by
+		 * the platform port
+		 */
+		gicv3_rdistif_base_addrs_probe(plat_driver_data->rdistif_base_addrs,
+						   plat_driver_data->rdistif_num,
+						   plat_driver_data->gicr_base,
+						   plat_driver_data->mpidr_to_core_pos);
+#if !HW_ASSISTED_COHERENCY
+		/*
+		 * Flush the rdistif_base_addrs[] contents linked to the GICv3 driver.
+		 */
+		flush_dcache_range((uintptr_t)(plat_driver_data->rdistif_base_addrs),
+			plat_driver_data->rdistif_num *
+			sizeof(*(plat_driver_data->rdistif_base_addrs)));
+#endif
+	}
 	gicv3_driver_data = plat_driver_data;
 
 	/*
@@ -112,19 +121,19 @@
 	 * enabled. When the secondary CPU boots up, it initializes the
 	 * GICC/GICR interface with the caches disabled. Hence flush the
 	 * driver data to ensure coherency. This is not required if the
-	 * platform has HW_ASSISTED_COHERENCY or WARMBOOT_ENABLE_DCACHE_EARLY
-	 * enabled.
+	 * platform has HW_ASSISTED_COHERENCY enabled.
 	 */
-#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
-	flush_dcache_range((uintptr_t) &gicv3_driver_data,
-			sizeof(gicv3_driver_data));
-	flush_dcache_range((uintptr_t) gicv3_driver_data,
-			sizeof(*gicv3_driver_data));
+#if !HW_ASSISTED_COHERENCY
+	flush_dcache_range((uintptr_t)&gicv3_driver_data,
+		sizeof(gicv3_driver_data));
+	flush_dcache_range((uintptr_t)gicv3_driver_data,
+		sizeof(*gicv3_driver_data));
 #endif
 
-	INFO("GICv3 %s legacy support detected."
-			" ARM GICV3 driver initialized in EL3\n",
-			gicv2_compat ? "with" : "without");
+	INFO("GICv3 with%s legacy support detected."
+			" ARM GICv3 driver initialized in EL3\n",
+			(gicv2_compat == 0U) ? "" : "out");
+
 }
 
 /*******************************************************************************
@@ -192,6 +201,7 @@
 	gicv3_rdistif_on(proc_num);
 
 	gicr_base = gicv3_driver_data->rdistif_base_addrs[proc_num];
+	assert(gicr_base != 0U);
 
 	/* Set the default attribute of all SGIs and PPIs */
 	gicv3_ppi_sgi_config_defaults(gicr_base);
@@ -313,6 +323,7 @@
 
 	/* Mark the connected core as asleep */
 	gicr_base = gicv3_driver_data->rdistif_base_addrs[proc_num];
+	assert(gicr_base != 0U);
 	gicv3_rdistif_mark_core_asleep(gicr_base);
 }
 
@@ -629,7 +640,9 @@
 	num_ints &= TYPER_IT_LINES_NO_MASK;
 	num_ints = (num_ints + 1U) << 5;
 
-	assert(num_ints <= (MAX_SPI_ID + 1U));
+	/* Filter out special INTIDs 1020-1023 */
+	if (num_ints > (MAX_SPI_ID + 1U))
+		num_ints = MAX_SPI_ID + 1U;
 
 	/* Wait for pending write to complete */
 	gicd_wait_for_pending_write(gicd_base);
@@ -637,31 +650,31 @@
 	/* Save the GICD_CTLR */
 	dist_ctx->gicd_ctlr = gicd_read_ctlr(gicd_base);
 
-	/* Save GICD_IGROUPR for INTIDs 32 - 1020 */
+	/* Save GICD_IGROUPR for INTIDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, igroupr, IGROUPR);
 
-	/* Save GICD_ISENABLER for INT_IDs 32 - 1020 */
+	/* Save GICD_ISENABLER for INT_IDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, isenabler, ISENABLER);
 
-	/* Save GICD_ISPENDR for INTIDs 32 - 1020 */
+	/* Save GICD_ISPENDR for INTIDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, ispendr, ISPENDR);
 
-	/* Save GICD_ISACTIVER for INTIDs 32 - 1020 */
+	/* Save GICD_ISACTIVER for INTIDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, isactiver, ISACTIVER);
 
-	/* Save GICD_IPRIORITYR for INTIDs 32 - 1020 */
+	/* Save GICD_IPRIORITYR for INTIDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, ipriorityr, IPRIORITYR);
 
-	/* Save GICD_ICFGR for INTIDs 32 - 1020 */
+	/* Save GICD_ICFGR for INTIDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, icfgr, ICFGR);
 
-	/* Save GICD_IGRPMODR for INTIDs 32 - 1020 */
+	/* Save GICD_IGRPMODR for INTIDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, igrpmodr, IGRPMODR);
 
-	/* Save GICD_NSACR for INTIDs 32 - 1020 */
+	/* Save GICD_NSACR for INTIDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, nsacr, NSACR);
 
-	/* Save GICD_IROUTER for INTIDs 32 - 1024 */
+	/* Save GICD_IROUTER for INTIDs 32 - 1019 */
 	SAVE_GICD_REGS(gicd_base, dist_ctx, num_ints, irouter, IROUTER);
 
 	/*
@@ -707,24 +720,26 @@
 	num_ints &= TYPER_IT_LINES_NO_MASK;
 	num_ints = (num_ints + 1U) << 5;
 
-	assert(num_ints <= (MAX_SPI_ID + 1U));
+	/* Filter out special INTIDs 1020-1023 */
+	if (num_ints > (MAX_SPI_ID + 1U))
+		num_ints = MAX_SPI_ID + 1U;
 
-	/* Restore GICD_IGROUPR for INTIDs 32 - 1020 */
+	/* Restore GICD_IGROUPR for INTIDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, igroupr, IGROUPR);
 
-	/* Restore GICD_IPRIORITYR for INTIDs 32 - 1020 */
+	/* Restore GICD_IPRIORITYR for INTIDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, ipriorityr, IPRIORITYR);
 
-	/* Restore GICD_ICFGR for INTIDs 32 - 1020 */
+	/* Restore GICD_ICFGR for INTIDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, icfgr, ICFGR);
 
-	/* Restore GICD_IGRPMODR for INTIDs 32 - 1020 */
+	/* Restore GICD_IGRPMODR for INTIDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, igrpmodr, IGRPMODR);
 
-	/* Restore GICD_NSACR for INTIDs 32 - 1020 */
+	/* Restore GICD_NSACR for INTIDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, nsacr, NSACR);
 
-	/* Restore GICD_IROUTER for INTIDs 32 - 1020 */
+	/* Restore GICD_IROUTER for INTIDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, irouter, IROUTER);
 
 	/*
@@ -732,13 +747,13 @@
 	 * configured.
 	 */
 
-	/* Restore GICD_ISENABLER for INT_IDs 32 - 1020 */
+	/* Restore GICD_ISENABLER for INT_IDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, isenabler, ISENABLER);
 
-	/* Restore GICD_ISPENDR for INTIDs 32 - 1020 */
+	/* Restore GICD_ISPENDR for INTIDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, ispendr, ISPENDR);
 
-	/* Restore GICD_ISACTIVER for INTIDs 32 - 1020 */
+	/* Restore GICD_ISACTIVER for INTIDs 32 - 1019 */
 	RESTORE_GICD_REGS(gicd_base, dist_ctx, num_ints, isactiver, ISACTIVER);
 
 	/* Restore the GICD_CTLR */
@@ -1081,3 +1096,71 @@
 
 	return old_mask;
 }
+
+/*******************************************************************************
+ * This function delegates the responsibility of discovering the corresponding
+ * Redistributor frames to each CPU itself. It is a modified version of
+ * gicv3_rdistif_base_addrs_probe() and is executed by each CPU in the platform
+ * unlike the previous way in which only the Primary CPU did the discovery of
+ * all the Redistributor frames for every CPU. It also handles the scenario in
+ * which the frames of various CPUs are not contiguous in physical memory.
+ ******************************************************************************/
+int gicv3_rdistif_probe(const uintptr_t gicr_frame)
+{
+	u_register_t mpidr;
+	unsigned int proc_num, proc_self;
+	uint64_t typer_val;
+	uintptr_t rdistif_base;
+	bool gicr_frame_found = false;
+
+	assert(gicv3_driver_data->gicr_base == 0U);
+
+	/* Ensure this function is called with Data Cache enabled */
+#ifndef __aarch64__
+	assert((read_sctlr() & SCTLR_C_BIT) != 0U);
+#else
+	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0U);
+#endif /* !__aarch64__ */
+
+	proc_self = gicv3_driver_data->mpidr_to_core_pos(read_mpidr_el1());
+	rdistif_base = gicr_frame;
+	do {
+		typer_val = gicr_read_typer(rdistif_base);
+		if (gicv3_driver_data->mpidr_to_core_pos != NULL) {
+			mpidr = mpidr_from_gicr_typer(typer_val);
+			proc_num = gicv3_driver_data->mpidr_to_core_pos(mpidr);
+		} else {
+			proc_num = (unsigned int)(typer_val >> TYPER_PROC_NUM_SHIFT) &
+					TYPER_PROC_NUM_MASK;
+		}
+		if (proc_num == proc_self) {
+			/* The base address doesn't need to be initialized on
+			 * every warm boot.
+			 */
+			if (gicv3_driver_data->rdistif_base_addrs[proc_num] != 0U)
+				return 0;
+			gicv3_driver_data->rdistif_base_addrs[proc_num] =
+			rdistif_base;
+			gicr_frame_found = true;
+			break;
+		}
+		rdistif_base += (uintptr_t)(ULL(1) << GICR_PCPUBASE_SHIFT);
+	} while ((typer_val & TYPER_LAST_BIT) == 0U);
+
+	if (!gicr_frame_found)
+		return -1;
+
+	/*
+	 * Flush the driver data to ensure coherency. This is
+	 * not required if platform has HW_ASSISTED_COHERENCY
+	 * enabled.
+	 */
+#if !HW_ASSISTED_COHERENCY
+	/*
+	 * Flush the rdistif_base_addrs[] contents linked to the GICv3 driver.
+	 */
+	flush_dcache_range((uintptr_t)&(gicv3_driver_data->rdistif_base_addrs[proc_num]),
+		sizeof(*(gicv3_driver_data->rdistif_base_addrs)));
+#endif
+	return 0; /* Found matching GICR frame */
+}
diff --git a/drivers/partition/gpt.c b/drivers/partition/gpt.c
index 4577f06..1b804de 100644
--- a/drivers/partition/gpt.c
+++ b/drivers/partition/gpt.c
@@ -52,9 +52,10 @@
 	if (result != 0) {
 		return result;
 	}
-	entry->start = (uint64_t)gpt_entry->first_lba * PARTITION_BLOCK_SIZE;
+	entry->start = (uint64_t)gpt_entry->first_lba *
+		       PLAT_PARTITION_BLOCK_SIZE;
 	entry->length = (uint64_t)(gpt_entry->last_lba -
 				   gpt_entry->first_lba + 1) *
-			PARTITION_BLOCK_SIZE;
+			PLAT_PARTITION_BLOCK_SIZE;
 	return 0;
 }
diff --git a/drivers/partition/partition.c b/drivers/partition/partition.c
index 7fdbf53..68133ea 100644
--- a/drivers/partition/partition.c
+++ b/drivers/partition/partition.c
@@ -15,7 +15,7 @@
 #include <drivers/partition/mbr.h>
 #include <plat/common/platform.h>
 
-static uint8_t mbr_sector[PARTITION_BLOCK_SIZE];
+static uint8_t mbr_sector[PLAT_PARTITION_BLOCK_SIZE];
 static partition_entry_list_t list;
 
 #if LOG_LEVEL >= LOG_LEVEL_VERBOSE
@@ -57,15 +57,15 @@
 		return result;
 	}
 	result = io_read(image_handle, (uintptr_t)&mbr_sector,
-			 PARTITION_BLOCK_SIZE, &bytes_read);
+			 PLAT_PARTITION_BLOCK_SIZE, &bytes_read);
 	if (result != 0) {
 		WARN("Failed to read data (%i)\n", result);
 		return result;
 	}
 
 	/* Check MBR boot signature. */
-	if ((mbr_sector[PARTITION_BLOCK_SIZE - 2] != MBR_SIGNATURE_FIRST) ||
-	    (mbr_sector[PARTITION_BLOCK_SIZE - 1] != MBR_SIGNATURE_SECOND)) {
+	if ((mbr_sector[LEGACY_PARTITION_BLOCK_SIZE - 2] != MBR_SIGNATURE_FIRST) ||
+	    (mbr_sector[LEGACY_PARTITION_BLOCK_SIZE - 1] != MBR_SIGNATURE_SECOND)) {
 		return -ENOENT;
 	}
 	offset = (uintptr_t)&mbr_sector + MBR_PRIMARY_ENTRY_OFFSET;
@@ -120,15 +120,15 @@
 		return result;
 	}
 	result = io_read(image_handle, (uintptr_t)&mbr_sector,
-			 PARTITION_BLOCK_SIZE, &bytes_read);
+			 PLAT_PARTITION_BLOCK_SIZE, &bytes_read);
 	if (result != 0) {
 		WARN("Failed to read data (%i)\n", result);
 		return result;
 	}
 
 	/* Check MBR boot signature. */
-	if ((mbr_sector[PARTITION_BLOCK_SIZE - 2] != MBR_SIGNATURE_FIRST) ||
-	    (mbr_sector[PARTITION_BLOCK_SIZE - 1] != MBR_SIGNATURE_SECOND)) {
+	if ((mbr_sector[LEGACY_PARTITION_BLOCK_SIZE - 2] != MBR_SIGNATURE_FIRST) ||
+	    (mbr_sector[LEGACY_PARTITION_BLOCK_SIZE - 1] != MBR_SIGNATURE_SECOND)) {
 		return -ENOENT;
 	}
 	offset = (uintptr_t)&mbr_sector +
diff --git a/include/drivers/arm/gicv3.h b/include/drivers/arm/gicv3.h
index 9c72d4d..c4f42d0 100644
--- a/include/drivers/arm/gicv3.h
+++ b/include/drivers/arm/gicv3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -366,6 +366,7 @@
  * GICv3 EL3 driver API
  ******************************************************************************/
 void gicv3_driver_init(const gicv3_driver_data_t *plat_driver_data);
+int gicv3_rdistif_probe(const uintptr_t gicr_frame);
 void gicv3_distif_init(void);
 void gicv3_rdistif_init(unsigned int proc_num);
 void gicv3_rdistif_on(unsigned int proc_num);
diff --git a/include/drivers/partition/gpt.h b/include/drivers/partition/gpt.h
index 3ae160f..d923e95 100644
--- a/include/drivers/partition/gpt.h
+++ b/include/drivers/partition/gpt.h
@@ -10,9 +10,9 @@
 #include <drivers/partition/partition.h>
 
 #define PARTITION_TYPE_GPT		0xee
-#define GPT_HEADER_OFFSET		PARTITION_BLOCK_SIZE
+#define GPT_HEADER_OFFSET		PLAT_PARTITION_BLOCK_SIZE
 #define GPT_ENTRY_OFFSET		(GPT_HEADER_OFFSET +		\
-					 PARTITION_BLOCK_SIZE)
+					 PLAT_PARTITION_BLOCK_SIZE)
 #define GUID_LEN			16
 
 #define GPT_SIGNATURE			"EFI PART"
diff --git a/include/drivers/partition/partition.h b/include/drivers/partition/partition.h
index d94c782..5f64833 100644
--- a/include/drivers/partition/partition.h
+++ b/include/drivers/partition/partition.h
@@ -17,7 +17,15 @@
 
 CASSERT(PLAT_PARTITION_MAX_ENTRIES <= 128, assert_plat_partition_max_entries);
 
-#define PARTITION_BLOCK_SIZE		512
+#if !PLAT_PARTITION_BLOCK_SIZE
+# define PLAT_PARTITION_BLOCK_SIZE	512
+#endif /* PLAT_PARTITION_BLOCK_SIZE */
+
+CASSERT((PLAT_PARTITION_BLOCK_SIZE == 512) ||
+	(PLAT_PARTITION_BLOCK_SIZE == 4096),
+	assert_plat_partition_block_size);
+
+#define LEGACY_PARTITION_BLOCK_SIZE	512
 
 #define EFI_NAMELEN			36
 
diff --git a/include/lib/psci/psci.h b/include/lib/psci/psci.h
index 04e5e3d..7f7b7e3 100644
--- a/include/lib/psci/psci.h
+++ b/include/lib/psci/psci.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -301,6 +301,8 @@
 				const psci_power_state_t *target_state);
 	void (*pwr_domain_suspend)(const psci_power_state_t *target_state);
 	void (*pwr_domain_on_finish)(const psci_power_state_t *target_state);
+	void (*pwr_domain_on_finish_late)(
+				const psci_power_state_t *target_state);
 	void (*pwr_domain_suspend_finish)(
 				const psci_power_state_t *target_state);
 	void __dead2 (*pwr_domain_pwr_down_wfi)(
diff --git a/include/plat/arm/css/common/css_pm.h b/include/plat/arm/css/common/css_pm.h
index b82ff47..93f8616 100644
--- a/include/plat/arm/css/common/css_pm.h
+++ b/include/plat/arm/css/common/css_pm.h
@@ -27,6 +27,7 @@
 
 int css_pwr_domain_on(u_register_t mpidr);
 void css_pwr_domain_on_finish(const psci_power_state_t *target_state);
+void css_pwr_domain_on_finish_late(const psci_power_state_t *target_state);
 void css_pwr_domain_off(const psci_power_state_t *target_state);
 void css_pwr_domain_suspend(const psci_power_state_t *target_state);
 void css_pwr_domain_suspend_finish(
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index a05ee5a..d65e02d 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -142,11 +142,11 @@
 	 * world, and only for the secure world when CTX_INCLUDE_MTE_REGS is
 	 * set.
 	 */
-	unsigned int mte = get_armv8_5_mte_support();
 #if CTX_INCLUDE_MTE_REGS
-	assert(mte == MTE_IMPLEMENTED_ELX);
+	assert(get_armv8_5_mte_support() == MTE_IMPLEMENTED_ELX);
 	scr_el3 |= SCR_ATA_BIT;
 #else
+	unsigned int mte = get_armv8_5_mte_support();
 	if (mte == MTE_IMPLEMENTED_EL0) {
 		/*
 		 * Can enable MTE across both worlds as no MTE registers are
diff --git a/lib/psci/psci_on.c b/lib/psci/psci_on.c
index aa6b324..470b4f3 100644
--- a/lib/psci/psci_on.c
+++ b/lib/psci/psci_on.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -182,6 +182,14 @@
 #endif
 
 	/*
+	 * Plat. management: Perform any platform specific actions which
+	 * can only be done with the cpu and the cluster guaranteed to
+	 * be coherent.
+	 */
+	if (psci_plat_pm_ops->pwr_domain_on_finish_late != NULL)
+		psci_plat_pm_ops->pwr_domain_on_finish_late(state_info);
+
+	/*
 	 * All the platform specific actions for turning this cpu
 	 * on have completed. Perform enough arch.initialization
 	 * to run in the non-secure address space.
diff --git a/plat/arm/board/fvp/fvp_pm.c b/plat/arm/board/fvp/fvp_pm.c
index 42dec8d..0a62543 100644
--- a/plat/arm/board/fvp/fvp_pm.c
+++ b/plat/arm/board/fvp/fvp_pm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -247,10 +247,19 @@
 {
 	fvp_power_domain_on_finish_common(target_state);
 
-	/* Enable the gic cpu interface */
+}
+
+/*******************************************************************************
+ * FVP handler called when a power domain has just been powered on and the cpu
+ * and its cluster are fully participating in coherent transaction on the
+ * interconnect. Data cache must be enabled for CPU at this point.
+ ******************************************************************************/
+static void fvp_pwr_domain_on_finish_late(const psci_power_state_t *target_state)
+{
+	/* Program GIC per-cpu distributor or re-distributor interface */
 	plat_arm_gic_pcpu_init();
 
-	/* Program the gic per-cpu distributor or re-distributor interface */
+	/* Enable GIC CPU interface */
 	plat_arm_gic_cpuif_enable();
 }
 
@@ -272,7 +281,7 @@
 
 	fvp_power_domain_on_finish_common(target_state);
 
-	/* Enable the gic cpu interface */
+	/* Enable GIC CPU interface */
 	plat_arm_gic_cpuif_enable();
 }
 
@@ -397,6 +406,7 @@
 	.pwr_domain_off = fvp_pwr_domain_off,
 	.pwr_domain_suspend = fvp_pwr_domain_suspend,
 	.pwr_domain_on_finish = fvp_pwr_domain_on_finish,
+	.pwr_domain_on_finish_late = fvp_pwr_domain_on_finish_late,
 	.pwr_domain_suspend_finish = fvp_pwr_domain_suspend_finish,
 	.system_off = fvp_system_off,
 	.system_reset = fvp_system_reset,
diff --git a/plat/arm/board/fvp/fvp_trusted_boot.c b/plat/arm/board/fvp/fvp_trusted_boot.c
index 0d160cb..dc50764 100644
--- a/plat/arm/board/fvp/fvp_trusted_boot.c
+++ b/plat/arm/board/fvp/fvp_trusted_boot.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -8,39 +8,41 @@
 #include <stdint.h>
 #include <string.h>
 
+#include <lib/mmio.h>
+
 #include <plat/common/platform.h>
 #include <platform_def.h>
 #include <tools_share/tbbr_oid.h>
 
 /*
- * Store a new non-volatile counter value. On some FVP versions, the
- * non-volatile counters are RO. On these versions we expect the values in the
- * certificates to always match the RO values so that this function is never
- * called.
+ * Store a new non-volatile counter value.
+ *
+ * On some FVP versions, the non-volatile counters are read-only so this
+ * function will always fail.
  *
  * Return: 0 = success, Otherwise = error
  */
 int plat_set_nv_ctr(void *cookie, unsigned int nv_ctr)
 {
 	const char *oid;
-	uint32_t *nv_ctr_addr;
+	uintptr_t nv_ctr_addr;
 
 	assert(cookie != NULL);
 
 	oid = (const char *)cookie;
 	if (strcmp(oid, TRUSTED_FW_NVCOUNTER_OID) == 0) {
-		nv_ctr_addr = (uint32_t *)TFW_NVCTR_BASE;
+		nv_ctr_addr = TFW_NVCTR_BASE;
 	} else if (strcmp(oid, NON_TRUSTED_FW_NVCOUNTER_OID) == 0) {
-		nv_ctr_addr = (uint32_t *)NTFW_CTR_BASE;
+		nv_ctr_addr = NTFW_CTR_BASE;
 	} else {
 		return 1;
 	}
 
-	*(unsigned int *)nv_ctr_addr = nv_ctr;
+	mmio_write_32(nv_ctr_addr, nv_ctr);
 
-	/* Verify that the current value is the one we just wrote. */
-	if (nv_ctr != (unsigned int)(*nv_ctr_addr))
-		return 1;
-
-	return 0;
+	/*
+	 * If the FVP models a locked counter then its value cannot be updated
+	 * and the above write operation has been silently ignored.
+	 */
+	return (mmio_read_32(nv_ctr_addr) == nv_ctr) ? 0 : 1;
 }
diff --git a/plat/arm/common/arm_gicv3.c b/plat/arm/common/arm_gicv3.c
index 7f4957f..fef5376 100644
--- a/plat/arm/common/arm_gicv3.c
+++ b/plat/arm/common/arm_gicv3.c
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
+#include <assert.h>
 #include <platform_def.h>
 
 #include <common/interrupt_props.h>
@@ -67,7 +68,7 @@
 
 static const gicv3_driver_data_t arm_gic_data __unused = {
 	.gicd_base = PLAT_ARM_GICD_BASE,
-	.gicr_base = PLAT_ARM_GICR_BASE,
+	.gicr_base = 0U,
 	.interrupt_props = arm_interrupt_props,
 	.interrupt_props_num = ARRAY_SIZE(arm_interrupt_props),
 	.rdistif_num = PLATFORM_CORE_COUNT,
@@ -86,6 +87,11 @@
 #if (!defined(__aarch64__) && defined(IMAGE_BL32)) || \
 	(defined(__aarch64__) && defined(IMAGE_BL31))
 	gicv3_driver_init(&arm_gic_data);
+
+	if (gicv3_rdistif_probe(PLAT_ARM_GICR_BASE) == -1) {
+		ERROR("No GICR base frame found for Primary CPU\n");
+		panic();
+	}
 #endif
 }
 
@@ -116,10 +122,20 @@
 }
 
 /******************************************************************************
- * ARM common helper to initialize the per-cpu redistributor interface in GICv3
+ * ARM common helper function to iterate over all GICR frames and discover the
+ * corresponding per-cpu redistributor frame as well as initialize the
+ * corresponding interface in GICv3. At the moment, Arm platforms do not have
+ * non-contiguous GICR frames.
  *****************************************************************************/
 void plat_arm_gic_pcpu_init(void)
 {
+	int result;
+
+	result = gicv3_rdistif_probe(PLAT_ARM_GICR_BASE);
+	if (result == -1) {
+		ERROR("No GICR base frame found for CPU 0x%lx\n", read_mpidr());
+		panic();
+	}
 	gicv3_rdistif_init(plat_my_core_pos());
 }
 
diff --git a/plat/arm/css/common/css_pm.c b/plat/arm/css/common/css_pm.c
index f6fc6aa..01c674f 100644
--- a/plat/arm/css/common/css_pm.c
+++ b/plat/arm/css/common/css_pm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -76,9 +76,6 @@
 {
 	assert(CSS_CORE_PWR_STATE(target_state) == ARM_LOCAL_STATE_OFF);
 
-	/* Enable the gic cpu interface */
-	plat_arm_gic_cpuif_enable();
-
 	/*
 	 * Perform the common cluster specific operations i.e enable coherency
 	 * if this cluster was off.
@@ -100,10 +97,21 @@
 	/* Assert that the system power domain need not be initialized */
 	assert(css_system_pwr_state(target_state) == ARM_LOCAL_STATE_RUN);
 
+	css_pwr_domain_on_finisher_common(target_state);
+}
+
+/*******************************************************************************
+ * Handler called when a power domain has just been powered on and the cpu
+ * and its cluster are fully participating in coherent transaction on the
+ * interconnect. Data cache must be enabled for CPU at this point.
+ ******************************************************************************/
+void css_pwr_domain_on_finish_late(const psci_power_state_t *target_state)
+{
 	/* Program the gic per-cpu distributor or re-distributor interface */
 	plat_arm_gic_pcpu_init();
 
-	css_pwr_domain_on_finisher_common(target_state);
+	/* Enable the gic cpu interface */
+	plat_arm_gic_cpuif_enable();
 }
 
 /*******************************************************************************
@@ -185,6 +193,9 @@
 		arm_system_pwr_domain_resume();
 
 	css_pwr_domain_on_finisher_common(target_state);
+
+	/* Enable the gic cpu interface */
+	plat_arm_gic_cpuif_enable();
 }
 
 /*******************************************************************************
@@ -306,6 +317,7 @@
 plat_psci_ops_t plat_arm_psci_pm_ops = {
 	.pwr_domain_on		= css_pwr_domain_on,
 	.pwr_domain_on_finish	= css_pwr_domain_on_finish,
+	.pwr_domain_on_finish_late = css_pwr_domain_on_finish_late,
 	.pwr_domain_off		= css_pwr_domain_off,
 	.cpu_standby		= css_cpu_standby,
 	.pwr_domain_suspend	= css_pwr_domain_suspend,
diff --git a/plat/hisilicon/hikey/hikey_bl2_setup.c b/plat/hisilicon/hikey/hikey_bl2_setup.c
index 2f96efc..96136ec 100644
--- a/plat/hisilicon/hikey/hikey_bl2_setup.c
+++ b/plat/hisilicon/hikey/hikey_bl2_setup.c
@@ -114,6 +114,11 @@
 }
 #endif /* __aarch64__ */
 
+int bl2_plat_handle_pre_image_load(unsigned int image_id)
+{
+	return hikey_set_fip_addr(image_id, "fastboot");
+}
+
 int hikey_bl2_handle_post_image_load(unsigned int image_id)
 {
 	int err = 0;
diff --git a/plat/hisilicon/hikey/hikey_io_storage.c b/plat/hisilicon/hikey/hikey_io_storage.c
index 11dd973..fd610d8 100644
--- a/plat/hisilicon/hikey/hikey_io_storage.c
+++ b/plat/hisilicon/hikey/hikey_io_storage.c
@@ -18,6 +18,7 @@
 #include <drivers/io/io_memmap.h>
 #include <drivers/io/io_storage.h>
 #include <drivers/mmc.h>
+#include <drivers/partition/partition.h>
 #include <lib/mmio.h>
 #include <lib/semihosting.h>
 #include <tools_share/firmware_image_package.h>
@@ -43,9 +44,12 @@
 static int check_emmc(const uintptr_t spec);
 static int check_fip(const uintptr_t spec);
 
-static const io_block_spec_t emmc_fip_spec = {
-	.offset		= HIKEY_FIP_BASE,
-	.length		= HIKEY_FIP_MAX_SIZE,
+static io_block_spec_t emmc_fip_spec;
+
+static const io_block_spec_t emmc_gpt_spec = {
+	.offset		= 0,
+	.length		= PLAT_PARTITION_BLOCK_SIZE *
+			  (PLAT_PARTITION_MAX_ENTRIES / 4 + 2),
 };
 
 static const io_block_dev_spec_t emmc_dev_spec = {
@@ -213,6 +217,11 @@
 		check_fip
 	},
 #endif /* TRUSTED_BOARD_BOOT */
+	[GPT_IMAGE_ID] = {
+		&emmc_dev_handle,
+		(uintptr_t)&emmc_gpt_spec,
+		check_emmc
+	},
 };
 
 static int check_emmc(const uintptr_t spec)
@@ -267,6 +276,23 @@
 	(void)result;
 }
 
+int hikey_set_fip_addr(unsigned int image_id, const char *name)
+{
+	const partition_entry_t *entry;
+
+	if (emmc_fip_spec.length == 0) {
+		partition_init(GPT_IMAGE_ID);
+		entry = get_partition_entry(name);
+		if (entry == NULL) {
+			ERROR("Could NOT find the %s partition!\n", name);
+			return -ENOENT;
+		}
+		emmc_fip_spec.offset = entry->start;
+		emmc_fip_spec.length = entry->length;
+	}
+	return 0;
+}
+
 /* Return an IO device handle and specification which can be used to access
  * an image. Use this to enforce platform load policy
  */
diff --git a/plat/hisilicon/hikey/hikey_private.h b/plat/hisilicon/hikey/hikey_private.h
index d82a079..b75bc72 100644
--- a/plat/hisilicon/hikey/hikey_private.h
+++ b/plat/hisilicon/hikey/hikey_private.h
@@ -72,4 +72,6 @@
 
 void init_acpu_dvfs(void);
 
+int hikey_set_fip_addr(unsigned int image_id, const char *name);
+
 #endif /* HIKEY_PRIVATE_H */
diff --git a/plat/hisilicon/hikey/include/hikey_def.h b/plat/hisilicon/hikey/include/hikey_def.h
index 4fb3e56..590700d 100644
--- a/plat/hisilicon/hikey/include/hikey_def.h
+++ b/plat/hisilicon/hikey/include/hikey_def.h
@@ -84,8 +84,6 @@
 #define HIKEY_BL1_MMC_DATA_SIZE		0x0000B000
 
 #define EMMC_BASE			0
-#define HIKEY_FIP_BASE			(EMMC_BASE + (4 << 20))
-#define HIKEY_FIP_MAX_SIZE		(8 << 20)
 #define HIKEY_EMMC_RPMB_BASE		(EMMC_BASE + 0)
 #define HIKEY_EMMC_RPMB_MAX_SIZE	(128 << 10)
 #define HIKEY_EMMC_USERDATA_BASE	(EMMC_BASE + 0)
diff --git a/plat/hisilicon/hikey/platform.mk b/plat/hisilicon/hikey/platform.mk
index 7fd897c..fbf7432 100644
--- a/plat/hisilicon/hikey/platform.mk
+++ b/plat/hisilicon/hikey/platform.mk
@@ -76,6 +76,8 @@
 				drivers/io/io_fip.c			\
 				drivers/io/io_storage.c			\
 				drivers/mmc/mmc.c			\
+				drivers/partition/gpt.c			\
+				drivers/partition/partition.c		\
 				drivers/synopsys/emmc/dw_mmc.c		\
 				lib/cpus/aarch64/cortex_a53.S		\
 				plat/hisilicon/hikey/aarch64/hikey_helpers.S \
diff --git a/plat/hisilicon/hikey960/hikey960_bl2_setup.c b/plat/hisilicon/hikey960/hikey960_bl2_setup.c
index fc9ddab..35d7692 100644
--- a/plat/hisilicon/hikey960/hikey960_bl2_setup.c
+++ b/plat/hisilicon/hikey960/hikey960_bl2_setup.c
@@ -18,6 +18,7 @@
 #include <drivers/delay_timer.h>
 #include <drivers/dw_ufs.h>
 #include <drivers/generic_delay_timer.h>
+#include <drivers/partition/partition.h>
 #include <drivers/ufs.h>
 #include <lib/mmio.h>
 #ifdef SPD_opteed
@@ -263,6 +264,11 @@
  * This function can be used by the platforms to update/use image
  * information for given `image_id`.
  ******************************************************************************/
+int bl2_plat_handle_pre_image_load(unsigned int image_id)
+{
+	return hikey960_set_fip_addr(image_id, "fip");
+}
+
 int bl2_plat_handle_post_image_load(unsigned int image_id)
 {
 	return hikey960_bl2_handle_post_image_load(image_id);
diff --git a/plat/hisilicon/hikey960/hikey960_def.h b/plat/hisilicon/hikey960/hikey960_def.h
index 4ea3acd..9651d78 100644
--- a/plat/hisilicon/hikey960/hikey960_def.h
+++ b/plat/hisilicon/hikey960/hikey960_def.h
@@ -44,9 +44,6 @@
 #define PL011_UART_CLK_IN_HZ		19200000
 
 #define UFS_BASE			0
-/* FIP partition */
-#define HIKEY960_FIP_BASE		(UFS_BASE + 0x1400000)
-#define HIKEY960_FIP_MAX_SIZE		(12 << 20)
 
 #define HIKEY960_UFS_DESC_BASE		0x20000000
 #define HIKEY960_UFS_DESC_SIZE		0x00200000	/* 2MB */
diff --git a/plat/hisilicon/hikey960/hikey960_io_storage.c b/plat/hisilicon/hikey960/hikey960_io_storage.c
index a4e8389..e1c5845 100644
--- a/plat/hisilicon/hikey960/hikey960_io_storage.c
+++ b/plat/hisilicon/hikey960/hikey960_io_storage.c
@@ -18,6 +18,7 @@
 #include <drivers/io/io_fip.h>
 #include <drivers/io/io_memmap.h>
 #include <drivers/io/io_storage.h>
+#include <drivers/partition/partition.h>
 #include <lib/mmio.h>
 #include <lib/semihosting.h>
 #include <tools_share/firmware_image_package.h>
@@ -36,9 +37,12 @@
 size_t ufs_read_lun3_blks(int lba, uintptr_t buf, size_t size);
 size_t ufs_write_lun3_blks(int lba, const uintptr_t buf, size_t size);
 
-static const io_block_spec_t ufs_fip_spec = {
-	.offset		= HIKEY960_FIP_BASE,
-	.length		= HIKEY960_FIP_MAX_SIZE,
+static io_block_spec_t ufs_fip_spec;
+
+static const io_block_spec_t ufs_gpt_spec = {
+	.offset		= 0,
+	.length		= PLAT_PARTITION_BLOCK_SIZE *
+			  (PLAT_PARTITION_MAX_ENTRIES / 4 + 2),
 };
 
 static const io_block_dev_spec_t ufs_dev_spec = {
@@ -199,6 +203,11 @@
 		check_fip
 	},
 #endif /* TRUSTED_BOARD_BOOT */
+	[GPT_IMAGE_ID] = {
+		&ufs_dev_handle,
+		(uintptr_t)&ufs_gpt_spec,
+		check_ufs
+	},
 };
 
 static int check_ufs(const uintptr_t spec)
@@ -253,6 +262,23 @@
 	(void)result;
 }
 
+int hikey960_set_fip_addr(unsigned int image_id, const char *name)
+{
+	const partition_entry_t *entry;
+
+	if (ufs_fip_spec.length == 0) {
+		partition_init(GPT_IMAGE_ID);
+		entry = get_partition_entry(name);
+		if (entry == NULL) {
+			ERROR("Could NOT find the %s partition!\n", name);
+			return -ENOENT;
+		}
+		ufs_fip_spec.offset = entry->start;
+		ufs_fip_spec.length = entry->length;
+	}
+	return 0;
+}
+
 /* Return an IO device handle and specification which can be used to access
  * an image. Use this to enforce platform load policy
  */
diff --git a/plat/hisilicon/hikey960/hikey960_private.h b/plat/hisilicon/hikey960/hikey960_private.h
index 9a18dd6..54bf501 100644
--- a/plat/hisilicon/hikey960/hikey960_private.h
+++ b/plat/hisilicon/hikey960/hikey960_private.h
@@ -26,6 +26,7 @@
 			unsigned long coh_limit);
 void hikey960_io_setup(void);
 int hikey960_read_boardid(unsigned int *id);
+int hikey960_set_fip_addr(unsigned int image_id, const char *name);
 void hikey960_clk_init(void);
 void hikey960_pmu_init(void);
 void hikey960_regulator_enable(void);
diff --git a/plat/hisilicon/hikey960/platform.mk b/plat/hisilicon/hikey960/platform.mk
index 4f2c3c6..6cb53c7 100644
--- a/plat/hisilicon/hikey960/platform.mk
+++ b/plat/hisilicon/hikey960/platform.mk
@@ -22,11 +22,13 @@
 PLAT_PL061_MAX_GPIOS		:=	176
 PROGRAMMABLE_RESET_ADDRESS	:=	1
 ENABLE_SVE_FOR_NS		:=	0
+PLAT_PARTITION_BLOCK_SIZE	:=	4096
 
 # Process flags
 $(eval $(call add_define,HIKEY960_TSP_RAM_LOCATION_ID))
 $(eval $(call add_define,CRASH_CONSOLE_BASE))
 $(eval $(call add_define,PLAT_PL061_MAX_GPIOS))
+$(eval $(call add_define,PLAT_PARTITION_BLOCK_SIZE))
 
 # Add the build options to pack Trusted OS Extra1 and Trusted OS Extra2 images
 # in the FIP if the platform requires.
@@ -75,6 +77,8 @@
 				drivers/io/io_block.c			\
 				drivers/io/io_fip.c			\
 				drivers/io/io_storage.c			\
+				drivers/partition/gpt.c			\
+				drivers/partition/partition.c		\
 				drivers/synopsys/ufs/dw_ufs.c		\
 				drivers/ufs/ufs.c			\
 				lib/cpus/aarch64/cortex_a53.S		\
diff --git a/plat/rockchip/px30/include/platform_def.h b/plat/rockchip/px30/include/platform_def.h
index c101cdc..9dccab8 100644
--- a/plat/rockchip/px30/include/platform_def.h
+++ b/plat/rockchip/px30/include/platform_def.h
@@ -69,7 +69,7 @@
 /*******************************************************************************
  * Platform memory map related constants
  ******************************************************************************/
-/* TF txet, ro, rw, Size: 512KB */
+/* TF text, ro, rw, Size: 512KB */
 #define TZRAM_BASE		(0x0)
 #define TZRAM_SIZE		(0x80000)
 
@@ -79,7 +79,7 @@
 /*
  * Put BL3-1 at the top of the Trusted RAM
  */
-#define BL31_BASE		(TZRAM_BASE + 0x10000)
+#define BL31_BASE		(TZRAM_BASE + 0x40000)
 #define BL31_LIMIT		(TZRAM_BASE + TZRAM_SIZE)
 
 /*******************************************************************************
diff --git a/plat/rockchip/rk3288/include/shared/bl32_param.h b/plat/rockchip/rk3288/include/shared/bl32_param.h
index 743dad4..ffdb2f3 100644
--- a/plat/rockchip/rk3288/include/shared/bl32_param.h
+++ b/plat/rockchip/rk3288/include/shared/bl32_param.h
@@ -10,9 +10,9 @@
 /*******************************************************************************
  * Platform memory map related constants
  ******************************************************************************/
-/* TF txet, ro, rw, Size: 2MB */
+/* TF text, ro, rw, Size: 1MB */
 #define TZRAM_BASE		(0x0)
-#define TZRAM_SIZE		(0x200000)
+#define TZRAM_SIZE		(0x100000)
 
 /*******************************************************************************
  * BL32 specific defines.
@@ -20,7 +20,7 @@
 /*
  * Put BL32 at the top of the Trusted RAM
  */
-#define BL32_BASE			(TZRAM_BASE + 0x100000)
+#define BL32_BASE			(TZRAM_BASE + 0x40000)
 #define BL32_LIMIT			(TZRAM_BASE + TZRAM_SIZE)
 
 #endif /* BL32_PARAM_H */
diff --git a/plat/rockchip/rk3328/include/platform_def.h b/plat/rockchip/rk3328/include/platform_def.h
index 3104d9f..baac12d 100644
--- a/plat/rockchip/rk3328/include/platform_def.h
+++ b/plat/rockchip/rk3328/include/platform_def.h
@@ -66,7 +66,7 @@
 /*******************************************************************************
  * Platform memory map related constants
  ******************************************************************************/
-/* TF txet, ro, rw, Size: 512KB */
+/* TF text, ro, rw, Size: 512KB */
 #define TZRAM_BASE		(0x0)
 #define TZRAM_SIZE		(0x80000)
 
@@ -76,7 +76,7 @@
 /*
  * Put BL3-1 at the top of the Trusted RAM
  */
-#define BL31_BASE		(TZRAM_BASE + 0x10000)
+#define BL31_BASE		(TZRAM_BASE + 0x40000)
 #define BL31_LIMIT		(TZRAM_BASE + TZRAM_SIZE)
 
 /*******************************************************************************
diff --git a/plat/rockchip/rk3368/include/platform_def.h b/plat/rockchip/rk3368/include/platform_def.h
index 7b3cc6e..9334a83 100644
--- a/plat/rockchip/rk3368/include/platform_def.h
+++ b/plat/rockchip/rk3368/include/platform_def.h
@@ -67,7 +67,7 @@
 /*******************************************************************************
  * Platform memory map related constants
  ******************************************************************************/
-/* TF txet, ro, rw, Size: 512KB */
+/* TF text, ro, rw, Size: 512KB */
 #define TZRAM_BASE		(0x0)
 #define TZRAM_SIZE		(0x80000)
 
@@ -77,7 +77,7 @@
 /*
  * Put BL3-1 at the top of the Trusted RAM
  */
-#define BL31_BASE		(TZRAM_BASE + 0x10000)
+#define BL31_BASE		(TZRAM_BASE + 0x40000)
 #define BL31_LIMIT	(TZRAM_BASE + TZRAM_SIZE)
 
 /*******************************************************************************
diff --git a/plat/rockchip/rk3399/include/shared/bl31_param.h b/plat/rockchip/rk3399/include/shared/bl31_param.h
index e7f2226..6e7e8ba 100644
--- a/plat/rockchip/rk3399/include/shared/bl31_param.h
+++ b/plat/rockchip/rk3399/include/shared/bl31_param.h
@@ -20,7 +20,7 @@
 /*
  * Put BL31 at the top of the Trusted RAM
  */
-#define BL31_BASE		(TZRAM_BASE + 0x1000)
+#define BL31_BASE		(TZRAM_BASE + 0x40000)
 #define BL31_LIMIT		(TZRAM_BASE + TZRAM_SIZE)
 
 #endif /* BL31_PARAM_H */
diff --git a/plat/rpi/common/include/rpi_shared.h b/plat/rpi/common/include/rpi_shared.h
index 6863438..de83571 100644
--- a/plat/rpi/common/include/rpi_shared.h
+++ b/plat/rpi/common/include/rpi_shared.h
@@ -14,7 +14,7 @@
  ******************************************************************************/
 
 /* Utility functions */
-void rpi3_console_init(void);
+void rpi3_console_init(unsigned int base_clk_rate);
 void rpi3_setup_page_tables(uintptr_t total_base, size_t total_size,
 			    uintptr_t code_start, uintptr_t code_limit,
 			    uintptr_t rodata_start, uintptr_t rodata_limit
diff --git a/plat/rpi/common/rpi3_common.c b/plat/rpi/common/rpi3_common.c
index ab63d98..ff33694 100644
--- a/plat/rpi/common/rpi3_common.c
+++ b/plat/rpi/common/rpi3_common.c
@@ -104,14 +104,14 @@
  ******************************************************************************/
 static console_16550_t rpi3_console;
 
-void rpi3_console_init(void)
+void rpi3_console_init(unsigned int base_clk_rate)
 {
 	int console_scope = CONSOLE_FLAG_BOOT;
 #if RPI3_RUNTIME_UART != -1
 	console_scope |= CONSOLE_FLAG_RUNTIME;
 #endif
 	int rc = console_16550_register(PLAT_RPI3_UART_BASE,
-					PLAT_RPI3_UART_CLK_IN_HZ,
+					base_clk_rate,
 					PLAT_RPI3_UART_BAUDRATE,
 					&rpi3_console);
 	if (rc == 0) {
@@ -176,18 +176,6 @@
 }
 
 /*******************************************************************************
- * Return entrypoint of BL33.
- ******************************************************************************/
-uintptr_t plat_get_ns_image_entrypoint(void)
-{
-#ifdef PRELOADED_BL33_BASE
-	return PRELOADED_BL33_BASE;
-#else
-	return PLAT_RPI3_NS_IMAGE_OFFSET;
-#endif
-}
-
-/*******************************************************************************
  * Gets SPSR for BL32 entry
  ******************************************************************************/
 uint32_t rpi3_get_spsr_for_bl32_entry(void)
diff --git a/plat/rpi/rpi3/rpi3_bl1_setup.c b/plat/rpi/rpi3/rpi3_bl1_setup.c
index 3ac30e0..dcce76e 100644
--- a/plat/rpi/rpi3/rpi3_bl1_setup.c
+++ b/plat/rpi/rpi3/rpi3_bl1_setup.c
@@ -35,7 +35,7 @@
 		      0x80000000);
 
 	/* Initialize the console to provide early debug support */
-	rpi3_console_init();
+	rpi3_console_init(PLAT_RPI3_UART_CLK_IN_HZ);
 
 	/* Allow BL1 to see the whole Trusted RAM */
 	bl1_tzram_layout.total_base = BL_RAM_BASE;
diff --git a/plat/rpi/rpi3/rpi3_bl2_setup.c b/plat/rpi/rpi3/rpi3_bl2_setup.c
index 991c0fc..44827c6 100644
--- a/plat/rpi/rpi3/rpi3_bl2_setup.c
+++ b/plat/rpi/rpi3/rpi3_bl2_setup.c
@@ -62,7 +62,7 @@
 	meminfo_t *mem_layout = (meminfo_t *) arg1;
 
 	/* Initialize the console to provide early debug support */
-	rpi3_console_init();
+	rpi3_console_init(PLAT_RPI3_UART_CLK_IN_HZ);
 
 	/* Enable arch timer */
 	generic_delay_timer_init();
diff --git a/plat/rpi/rpi3/rpi3_bl31_setup.c b/plat/rpi/rpi3/rpi3_bl31_setup.c
index a9efc52..24a5613 100644
--- a/plat/rpi/rpi3/rpi3_bl31_setup.c
+++ b/plat/rpi/rpi3/rpi3_bl31_setup.c
@@ -48,6 +48,18 @@
 }
 
 /*******************************************************************************
+ * Return entrypoint of BL33.
+ ******************************************************************************/
+uintptr_t plat_get_ns_image_entrypoint(void)
+{
+#ifdef PRELOADED_BL33_BASE
+	return PRELOADED_BL33_BASE;
+#else
+	return PLAT_RPI3_NS_IMAGE_OFFSET;
+#endif
+}
+
+/*******************************************************************************
  * Perform any BL31 early platform setup. Here is an opportunity to copy
  * parameters passed by the calling EL (S-EL1 in BL2 & EL3 in BL1) before
  * they are lost (potentially). This needs to be done before the MMU is
@@ -60,7 +72,7 @@
 
 {
 	/* Initialize the console to provide early debug support */
-	rpi3_console_init();
+	rpi3_console_init(PLAT_RPI3_UART_CLK_IN_HZ);
 
 	/*
 	 * In debug builds, a special value is passed in 'arg1' to verify
diff --git a/plat/rpi/rpi4/aarch64/armstub8_header.S b/plat/rpi/rpi4/aarch64/armstub8_header.S
new file mode 100644
index 0000000..246358d
--- /dev/null
+++ b/plat/rpi/rpi4/aarch64/armstub8_header.S
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/*
+ * armstub8.bin header to let the GPU firmware recognise this code.
+ * It will then write the load address of the kernel image and the DT
+ * after the header magic in RAM, so we can read those addresses at runtime.
+ */
+
+.text
+	b	armstub8_end
+
+.global stub_magic
+.global dtb_ptr32
+.global kernel_entry32
+
+.org 0xf0
+armstub8:
+stub_magic:
+	.word 0x5afe570b
+stub_version:
+	.word 0
+dtb_ptr32:
+	.word 0x0
+kernel_entry32:
+	.word 0x0
+
+/*
+ * Technically an offset of 0x100 would suffice, but the follow-up code
+ * (bl31_entrypoint.S at BL31_BASE) needs to be page aligned, so pad here
+ * till the end of the first 4K page.
+ */
+.org 0x1000
+armstub8_end:
diff --git a/plat/rpi/rpi4/aarch64/plat_helpers.S b/plat/rpi/rpi4/aarch64/plat_helpers.S
new file mode 100644
index 0000000..46073b7
--- /dev/null
+++ b/plat/rpi/rpi4/aarch64/plat_helpers.S
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2015-2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <platform_def.h>
+#include <cortex_a72.h>
+
+#include "../include/rpi_hw.h"
+
+	.globl	plat_crash_console_flush
+	.globl	plat_crash_console_init
+	.globl	plat_crash_console_putc
+	.globl	platform_mem_init
+	.globl	plat_get_my_entrypoint
+	.globl	plat_is_my_cpu_primary
+	.globl	plat_my_core_pos
+	.globl	plat_reset_handler
+	.globl	plat_rpi3_calc_core_pos
+	.globl	plat_secondary_cold_boot_setup
+
+	/* -----------------------------------------------------
+	 *  unsigned int plat_my_core_pos(void)
+	 *
+	 *  This function uses the plat_rpi3_calc_core_pos()
+	 *  definition to get the index of the calling CPU.
+	 * -----------------------------------------------------
+	 */
+func plat_my_core_pos
+	mrs	x0, mpidr_el1
+	b	plat_rpi3_calc_core_pos
+endfunc plat_my_core_pos
+
+	/* -----------------------------------------------------
+	 *  unsigned int plat_rpi3_calc_core_pos(u_register_t mpidr);
+	 *
+	 *  CorePos = (ClusterId * 4) + CoreId
+	 * -----------------------------------------------------
+	 */
+func plat_rpi3_calc_core_pos
+	and	x1, x0, #MPIDR_CPU_MASK
+	and	x0, x0, #MPIDR_CLUSTER_MASK
+	add	x0, x1, x0, LSR #6
+	ret
+endfunc plat_rpi3_calc_core_pos
+
+	/* -----------------------------------------------------
+	 * unsigned int plat_is_my_cpu_primary (void);
+	 *
+	 * Find out whether the current cpu is the primary
+	 * cpu.
+	 * -----------------------------------------------------
+	 */
+func plat_is_my_cpu_primary
+	mrs	x0, mpidr_el1
+	and	x0, x0, #(MPIDR_CLUSTER_MASK | MPIDR_CPU_MASK)
+	cmp	x0, #RPI4_PRIMARY_CPU
+	cset	w0, eq
+	ret
+endfunc plat_is_my_cpu_primary
+
+	/* -----------------------------------------------------
+	 * void plat_secondary_cold_boot_setup (void);
+	 *
+	 * This function performs any platform specific actions
+	 * needed for a secondary cpu after a cold reset e.g
+	 * mark the cpu's presence, mechanism to place it in a
+	 * holding pen etc.
+	 * -----------------------------------------------------
+	 */
+func plat_secondary_cold_boot_setup
+	/* Calculate address of our hold entry */
+	bl	plat_my_core_pos
+	lsl	x0, x0, #3
+	mov_imm	x2, PLAT_RPI3_TM_HOLD_BASE
+	add	x0, x0, x2
+
+	/*
+	 * This code runs way before requesting the warmboot of this core,
+	 * so it is possible to clear the mailbox before getting a request
+	 * to boot.
+	 */
+	mov	x1, PLAT_RPI3_TM_HOLD_STATE_WAIT
+	str	x1,[x0]
+
+	/* Wait until we have a go */
+poll_mailbox:
+	wfe
+	ldr	x1, [x0]
+	cmp	x1, PLAT_RPI3_TM_HOLD_STATE_GO
+	bne	poll_mailbox
+
+	/* Jump to the provided entrypoint */
+	mov_imm	x0, PLAT_RPI3_TM_ENTRYPOINT
+	ldr	x1, [x0]
+	br	x1
+endfunc plat_secondary_cold_boot_setup
+
+	/* ---------------------------------------------------------------------
+	 * uintptr_t plat_get_my_entrypoint (void);
+	 *
+	 * Main job of this routine is to distinguish between a cold and a warm
+	 * boot.
+	 *
+	 * This functions returns:
+	 *  - 0 for a cold boot.
+	 *  - Any other value for a warm boot.
+	 * ---------------------------------------------------------------------
+	 */
+func plat_get_my_entrypoint
+	/* TODO: support warm boot */
+	mov	x0, #0
+	ret
+endfunc plat_get_my_entrypoint
+
+	/* ---------------------------------------------
+	 * void platform_mem_init (void);
+	 *
+	 * No need to carry out any memory initialization.
+	 * ---------------------------------------------
+	 */
+func platform_mem_init
+	ret
+endfunc platform_mem_init
+
+	/* ---------------------------------------------
+	 * int plat_crash_console_init(void)
+	 * Function to initialize the crash console
+	 * without a C Runtime to print crash report.
+	 * Clobber list : x0 - x3
+	 * ---------------------------------------------
+	 */
+func plat_crash_console_init
+	mov_imm	x0, PLAT_RPI3_UART_BASE
+	mov_imm	x1, PLAT_RPI4_VPU_CLK_RATE
+	mov_imm	x2, PLAT_RPI3_UART_BAUDRATE
+	b	console_16550_core_init
+endfunc plat_crash_console_init
+
+	/* ---------------------------------------------
+	 * int plat_crash_console_putc(int c)
+	 * Function to print a character on the crash
+	 * console without a C Runtime.
+	 * Clobber list : x1, x2
+	 * ---------------------------------------------
+	 */
+func plat_crash_console_putc
+	mov_imm	x1, PLAT_RPI3_UART_BASE
+	b	console_16550_core_putc
+endfunc plat_crash_console_putc
+
+	/* ---------------------------------------------
+	 * int plat_crash_console_flush()
+	 * Function to force a write of all buffered
+	 * data that hasn't been output.
+	 * Out : return -1 on error else return 0.
+	 * Clobber list : x0, x1
+	 * ---------------------------------------------
+	 */
+func plat_crash_console_flush
+	mov_imm	x0, PLAT_RPI3_UART_BASE
+	b	console_16550_core_flush
+endfunc plat_crash_console_flush
+
+	/* ---------------------------------------------
+	 * void plat_reset_handler(void);
+	 * ---------------------------------------------
+	 */
+func plat_reset_handler
+	/* ------------------------------------------------
+	 * Set L2 read/write cache latency:
+	 * - L2 Data RAM latency: 3 cycles (0b010)
+	 * - L2 Data RAM setup: 1 cycle (bit 5)
+	 * ------------------------------------------------
+	 */
+	mrs	x0, CORTEX_A72_L2CTLR_EL1
+	mov	x1, #0x22
+	orr	x0, x0, x1
+	msr	CORTEX_A72_L2CTLR_EL1, x0
+	isb
+
+	ret
+endfunc plat_reset_handler
diff --git a/plat/rpi/rpi4/include/plat.ld.S b/plat/rpi/rpi4/include/plat.ld.S
new file mode 100644
index 0000000..9262fad
--- /dev/null
+++ b/plat/rpi/rpi4/include/plat.ld.S
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Stub linker script to provide the armstub8.bin header before the actual
+ * code. If the GPU firmware finds a magic value at offset 240 in
+ * armstub8.bin, it will put the DTB and kernel load address in subsequent
+ * words. We can then read those values to find the proper NS entry point
+ * and find our DTB more flexibly.
+ */
+
+MEMORY {
+    PRERAM (rwx): ORIGIN = 0, LENGTH = 4096
+}
+
+SECTIONS
+{
+    .armstub8 . : {
+        *armstub8_header.o(.text*)
+        KEEP(*(.armstub8))
+    } >PRERAM
+}
diff --git a/plat/rpi/rpi4/include/plat_macros.S b/plat/rpi/rpi4/include/plat_macros.S
new file mode 100644
index 0000000..6007d03
--- /dev/null
+++ b/plat/rpi/rpi4/include/plat_macros.S
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2016-2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#ifndef PLAT_MACROS_S
+#define PLAT_MACROS_S
+
+	/* ---------------------------------------------
+	 * The below required platform porting macro
+	 * prints out relevant platform registers
+	 * whenever an unhandled exception is taken in
+	 * BL31.
+	 * Clobbers: x0 - x10, x16, x17, sp
+	 * ---------------------------------------------
+	 */
+	.macro plat_crash_print_regs
+	.endm
+
+#endif /* PLAT_MACROS_S */
diff --git a/plat/rpi/rpi4/include/platform_def.h b/plat/rpi/rpi4/include/platform_def.h
new file mode 100644
index 0000000..a9ecdba
--- /dev/null
+++ b/plat/rpi/rpi4/include/platform_def.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015-2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef PLATFORM_DEF_H
+#define PLATFORM_DEF_H
+
+#include <arch.h>
+#include <common/tbbr/tbbr_img_def.h>
+#include <lib/utils_def.h>
+#include <plat/common/common_def.h>
+
+#include "rpi_hw.h"
+
+/* Special value used to verify platform parameters from BL2 to BL31 */
+#define RPI3_BL31_PLAT_PARAM_VAL	ULL(0x0F1E2D3C4B5A6978)
+
+#define PLATFORM_STACK_SIZE		ULL(0x1000)
+
+#define PLATFORM_MAX_CPUS_PER_CLUSTER	U(4)
+#define PLATFORM_CLUSTER_COUNT		U(1)
+#define PLATFORM_CLUSTER0_CORE_COUNT	PLATFORM_MAX_CPUS_PER_CLUSTER
+#define PLATFORM_CORE_COUNT		PLATFORM_CLUSTER0_CORE_COUNT
+
+#define RPI4_PRIMARY_CPU		U(0)
+
+#define PLAT_MAX_PWR_LVL		MPIDR_AFFLVL1
+#define PLAT_NUM_PWR_DOMAINS		(PLATFORM_CLUSTER_COUNT + \
+					 PLATFORM_CORE_COUNT)
+
+#define PLAT_MAX_RET_STATE		U(1)
+#define PLAT_MAX_OFF_STATE		U(2)
+
+/* Local power state for power domains in Run state. */
+#define PLAT_LOCAL_STATE_RUN		U(0)
+/* Local power state for retention. Valid only for CPU power domains */
+#define PLAT_LOCAL_STATE_RET		U(1)
+/*
+ * Local power state for OFF/power-down. Valid for CPU and cluster power
+ * domains.
+ */
+#define PLAT_LOCAL_STATE_OFF		U(2)
+
+/*
+ * Macros used to parse state information from State-ID if it is using the
+ * recommended encoding for State-ID.
+ */
+#define PLAT_LOCAL_PSTATE_WIDTH		U(4)
+#define PLAT_LOCAL_PSTATE_MASK		((U(1) << PLAT_LOCAL_PSTATE_WIDTH) - 1)
+
+/*
+ * Some data must be aligned on the biggest cache line size in the platform.
+ * This is known only to the platform as it might have a combination of
+ * integrated and external caches.
+ */
+#define CACHE_WRITEBACK_SHIFT		U(6)
+#define CACHE_WRITEBACK_GRANULE		(U(1) << CACHE_WRITEBACK_SHIFT)
+
+/*
+ * I/O registers.
+ */
+#define DEVICE0_BASE			RPI_IO_BASE
+#define DEVICE0_SIZE			RPI_IO_SIZE
+
+/*
+ * Mailbox to control the secondary cores. All secondary cores are held in a
+ * wait loop in cold boot. To release them perform the following steps (plus
+ * any additional barriers that may be needed):
+ *
+ *     uint64_t *entrypoint = (uint64_t *)PLAT_RPI3_TM_ENTRYPOINT;
+ *     *entrypoint = ADDRESS_TO_JUMP_TO;
+ *
+ *     uint64_t *mbox_entry = (uint64_t *)PLAT_RPI3_TM_HOLD_BASE;
+ *     mbox_entry[cpu_id] = PLAT_RPI3_TM_HOLD_STATE_GO;
+ *
+ *     sev();
+ */
+/* The secure entry point to be used on warm reset by all CPUs. */
+#define PLAT_RPI3_TM_ENTRYPOINT		0x100
+#define PLAT_RPI3_TM_ENTRYPOINT_SIZE	ULL(8)
+
+/* Hold entries for each CPU. */
+#define PLAT_RPI3_TM_HOLD_BASE		(PLAT_RPI3_TM_ENTRYPOINT + \
+					 PLAT_RPI3_TM_ENTRYPOINT_SIZE)
+#define PLAT_RPI3_TM_HOLD_ENTRY_SIZE	ULL(8)
+#define PLAT_RPI3_TM_HOLD_SIZE		(PLAT_RPI3_TM_HOLD_ENTRY_SIZE * \
+					 PLATFORM_CORE_COUNT)
+
+#define PLAT_RPI3_TRUSTED_MAILBOX_SIZE	(PLAT_RPI3_TM_ENTRYPOINT_SIZE + \
+					 PLAT_RPI3_TM_HOLD_SIZE)
+
+#define PLAT_RPI3_TM_HOLD_STATE_WAIT	ULL(0)
+#define PLAT_RPI3_TM_HOLD_STATE_GO	ULL(1)
+
+/*
+ * BL31 specific defines.
+ *
+ * Put BL31 at the top of the Trusted SRAM. BL31_BASE is calculated using the
+ * current BL31 debug size plus a little space for growth.
+ */
+#define PLAT_MAX_BL31_SIZE		ULL(0x80000)
+
+#define BL31_BASE			ULL(0x1000)
+#define BL31_LIMIT			ULL(0x80000)
+#define BL31_PROGBITS_LIMIT		ULL(0x80000)
+
+#define SEC_SRAM_ID			0
+#define SEC_DRAM_ID			1
+
+/*
+ * Other memory-related defines.
+ */
+#define PLAT_PHY_ADDR_SPACE_SIZE	(ULL(1) << 32)
+#define PLAT_VIRT_ADDR_SPACE_SIZE	(ULL(1) << 32)
+
+#define MAX_MMAP_REGIONS		8
+#define MAX_XLAT_TABLES			4
+
+#define MAX_IO_DEVICES			U(3)
+#define MAX_IO_HANDLES			U(4)
+
+#define MAX_IO_BLOCK_DEVICES		U(1)
+
+/*
+ * Serial-related constants.
+ */
+#define PLAT_RPI3_UART_BASE		RPI3_MINI_UART_BASE
+#define PLAT_RPI3_UART_BAUDRATE		ULL(115200)
+
+/*
+ * System counter
+ */
+#define SYS_COUNTER_FREQ_IN_TICKS	ULL(54000000)
+
+#endif /* PLATFORM_DEF_H */
diff --git a/plat/rpi/rpi4/include/rpi_hw.h b/plat/rpi/rpi4/include/rpi_hw.h
new file mode 100644
index 0000000..ed367ee
--- /dev/null
+++ b/plat/rpi/rpi4/include/rpi_hw.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2016-2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef RPI_HW_H
+#define RPI_HW_H
+
+#include <lib/utils_def.h>
+
+/*
+ * Peripherals
+ */
+
+#define RPI_IO_BASE			ULL(0xFE000000)
+#define RPI_IO_SIZE			ULL(0x02000000)
+
+/*
+ * ARM <-> VideoCore mailboxes
+ */
+#define RPI3_MBOX_OFFSET		ULL(0x0000B880)
+#define RPI3_MBOX_BASE			(RPI_IO_BASE + RPI3_MBOX_OFFSET)
+/* VideoCore -> ARM */
+#define RPI3_MBOX0_READ_OFFSET		ULL(0x00000000)
+#define RPI3_MBOX0_PEEK_OFFSET		ULL(0x00000010)
+#define RPI3_MBOX0_SENDER_OFFSET	ULL(0x00000014)
+#define RPI3_MBOX0_STATUS_OFFSET	ULL(0x00000018)
+#define RPI3_MBOX0_CONFIG_OFFSET	ULL(0x0000001C)
+/* ARM -> VideoCore */
+#define RPI3_MBOX1_WRITE_OFFSET		ULL(0x00000020)
+#define RPI3_MBOX1_PEEK_OFFSET		ULL(0x00000030)
+#define RPI3_MBOX1_SENDER_OFFSET	ULL(0x00000034)
+#define RPI3_MBOX1_STATUS_OFFSET	ULL(0x00000038)
+#define RPI3_MBOX1_CONFIG_OFFSET	ULL(0x0000003C)
+/* Mailbox status constants */
+#define RPI3_MBOX_STATUS_FULL_MASK	U(0x80000000) /* Set if full */
+#define RPI3_MBOX_STATUS_EMPTY_MASK	U(0x40000000) /* Set if empty */
+
+/*
+ * Power management, reset controller, watchdog.
+ */
+#define RPI3_IO_PM_OFFSET		ULL(0x00100000)
+#define RPI3_PM_BASE			(RPI_IO_BASE + RPI3_IO_PM_OFFSET)
+/* Registers on top of RPI3_PM_BASE. */
+#define RPI3_PM_RSTC_OFFSET		ULL(0x0000001C)
+#define RPI3_PM_RSTS_OFFSET		ULL(0x00000020)
+#define RPI3_PM_WDOG_OFFSET		ULL(0x00000024)
+/* Watchdog constants */
+#define RPI3_PM_PASSWORD		U(0x5A000000)
+#define RPI3_PM_RSTC_WRCFG_MASK		U(0x00000030)
+#define RPI3_PM_RSTC_WRCFG_FULL_RESET	U(0x00000020)
+/*
+ * The RSTS register is used by the VideoCore firmware when booting the
+ * Raspberry Pi to know which partition to boot from. The partition value is
+ * formed by bits 0, 2, 4, 6, 8 and 10. Partition 63 is used by said firmware
+ * to indicate halt.
+ */
+#define RPI3_PM_RSTS_WRCFG_HALT		U(0x00000555)
+
+/*
+ * Clock controller
+ */
+#define RPI4_IO_CLOCK_OFFSET		ULL(0x00101000)
+#define RPI4_CLOCK_BASE			(RPI_IO_BASE + RPI4_IO_CLOCK_OFFSET)
+#define RPI4_VPU_CLOCK_DIVIDER		ULL(0x0000000c)
+
+/*
+ * Hardware random number generator.
+ */
+#define RPI3_IO_RNG_OFFSET		ULL(0x00104000)
+#define RPI3_RNG_BASE			(RPI_IO_BASE + RPI3_IO_RNG_OFFSET)
+#define RPI3_RNG_CTRL_OFFSET		ULL(0x00000000)
+#define RPI3_RNG_STATUS_OFFSET		ULL(0x00000004)
+#define RPI3_RNG_DATA_OFFSET		ULL(0x00000008)
+#define RPI3_RNG_INT_MASK_OFFSET	ULL(0x00000010)
+/* Enable/disable RNG */
+#define RPI3_RNG_CTRL_ENABLE		U(0x1)
+#define RPI3_RNG_CTRL_DISABLE		U(0x0)
+/* Number of currently available words */
+#define RPI3_RNG_STATUS_NUM_WORDS_SHIFT	U(24)
+#define RPI3_RNG_STATUS_NUM_WORDS_MASK	U(0xFF)
+/* Value to mask interrupts caused by the RNG */
+#define RPI3_RNG_INT_MASK_DISABLE	U(0x1)
+
+/*
+ * Serial port (called 'Mini UART' in the Broadcom documentation).
+ */
+#define RPI3_IO_MINI_UART_OFFSET	ULL(0x00215040)
+#define RPI3_MINI_UART_BASE		(RPI_IO_BASE + RPI3_IO_MINI_UART_OFFSET)
+#define PLAT_RPI4_VPU_CLK_RATE		ULL(1000000000)
+
+/*
+ * GPIO controller
+ */
+#define RPI3_IO_GPIO_OFFSET		ULL(0x00200000)
+#define RPI3_GPIO_BASE			(RPI_IO_BASE + RPI3_IO_GPIO_OFFSET)
+
+/*
+ * SDHost controller
+ */
+#define RPI3_IO_SDHOST_OFFSET           ULL(0x00202000)
+#define RPI3_SDHOST_BASE                (RPI_IO_BASE + RPI3_IO_SDHOST_OFFSET)
+
+/*
+ * GIC interrupt controller
+ */
+#define RPI_HAVE_GIC
+#define RPI4_GIC_GICD_BASE		ULL(0xff841000)
+#define RPI4_GIC_GICC_BASE		ULL(0xff842000)
+
+#define	RPI4_LOCAL_CONTROL_BASE_ADDRESS		ULL(0xff800000)
+#define	RPI4_LOCAL_CONTROL_PRESCALER		ULL(0xff800008)
+
+#endif /* RPI_HW_H */
diff --git a/plat/rpi/rpi4/platform.mk b/plat/rpi/rpi4/platform.mk
new file mode 100644
index 0000000..2038021
--- /dev/null
+++ b/plat/rpi/rpi4/platform.mk
@@ -0,0 +1,103 @@
+#
+# Copyright (c) 2013-2019, ARM Limited and Contributors. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+include lib/libfdt/libfdt.mk
+include lib/xlat_tables_v2/xlat_tables.mk
+
+PLAT_INCLUDES		:=	-Iplat/rpi/common/include		\
+				-Iplat/rpi/rpi4/include
+
+PLAT_BL_COMMON_SOURCES	:=	drivers/ti/uart/aarch64/16550_console.S	\
+				plat/rpi/common/rpi3_common.c		\
+				${XLAT_TABLES_LIB_SRCS}
+
+BL31_SOURCES		+=	lib/cpus/aarch64/cortex_a72.S		\
+				plat/rpi/rpi4/aarch64/plat_helpers.S	\
+				plat/rpi/rpi4/aarch64/armstub8_header.S	\
+				drivers/arm/gic/common/gic_common.c     \
+				drivers/arm/gic/v2/gicv2_helpers.c      \
+				drivers/arm/gic/v2/gicv2_main.c         \
+				plat/common/plat_gicv2.c                \
+				plat/rpi/rpi4/rpi4_bl31_setup.c		\
+				plat/rpi/common/rpi3_pm.c		\
+				plat/common/plat_psci_common.c		\
+				plat/rpi/common/rpi3_topology.c		\
+				common/fdt_fixup.c			\
+				${LIBFDT_SRCS}
+
+# For now we only support BL31, using the kernel loaded by the GPU firmware.
+RESET_TO_BL31		:=	1
+
+# All CPUs enter armstub8.bin.
+COLD_BOOT_SINGLE_CPU	:=	0
+
+# Tune compiler for Cortex-A72
+ifeq ($(notdir $(CC)),armclang)
+    TF_CFLAGS_aarch64	+=	-mcpu=cortex-a72
+else ifneq ($(findstring clang,$(notdir $(CC))),)
+    TF_CFLAGS_aarch64	+=	-mcpu=cortex-a72
+else
+    TF_CFLAGS_aarch64	+=	-mtune=cortex-a72
+endif
+
+# Add support for platform supplied linker script for BL31 build
+$(eval $(call add_define,PLAT_EXTRA_LD_SCRIPT))
+
+# Enable all errata workarounds for Cortex-A72
+ERRATA_A72_859971		:= 1
+
+WORKAROUND_CVE_2017_5715	:= 1
+
+# Add new default target when compiling this platform
+all: bl31
+
+# Build config flags
+# ------------------
+
+# Disable stack protector by default
+ENABLE_STACK_PROTECTOR	 	:= 0
+
+# Have different sections for code and rodata
+SEPARATE_CODE_AND_RODATA	:= 1
+
+# Use Coherent memory
+USE_COHERENT_MEM		:= 1
+
+# Platform build flags
+# --------------------
+
+# There is not much else than a Linux kernel to load at the moment.
+RPI3_DIRECT_LINUX_BOOT		:= 1
+
+# BL33 images are in AArch64 by default
+RPI3_BL33_IN_AARCH32		:= 0
+
+# UART to use at runtime. -1 means the runtime UART is disabled.
+# Any other value means the default UART will be used.
+RPI3_RUNTIME_UART		:= 0
+
+# Use normal memory mapping for ROM, FIP, SRAM and DRAM
+RPI3_USE_UEFI_MAP		:= 0
+
+# Process platform flags
+# ----------------------
+
+$(eval $(call add_define,RPI3_BL33_IN_AARCH32))
+$(eval $(call add_define,RPI3_DIRECT_LINUX_BOOT))
+ifdef RPI3_PRELOADED_DTB_BASE
+$(eval $(call add_define,RPI3_PRELOADED_DTB_BASE))
+endif
+$(eval $(call add_define,RPI3_RUNTIME_UART))
+$(eval $(call add_define,RPI3_USE_UEFI_MAP))
+
+ifeq (${ARCH},aarch32)
+  $(error Error: AArch32 not supported on rpi4)
+endif
+
+ifneq ($(ENABLE_STACK_PROTECTOR), 0)
+PLAT_BL_COMMON_SOURCES	+=	drivers/rpi3/rng/rpi3_rng.c		\
+				plat/rpi/common/rpi3_stack_protector.c
+endif
diff --git a/plat/rpi/rpi4/rpi4_bl31_setup.c b/plat/rpi/rpi4/rpi4_bl31_setup.c
new file mode 100644
index 0000000..53ab0c2
--- /dev/null
+++ b/plat/rpi/rpi4/rpi4_bl31_setup.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2015-2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+
+#include <libfdt.h>
+
+#include <platform_def.h>
+#include <arch_helpers.h>
+#include <common/bl_common.h>
+#include <lib/mmio.h>
+#include <lib/xlat_tables/xlat_mmu_helpers.h>
+#include <lib/xlat_tables/xlat_tables_defs.h>
+#include <lib/xlat_tables/xlat_tables_v2.h>
+#include <plat/common/platform.h>
+#include <common/fdt_fixup.h>
+#include <libfdt.h>
+
+#include <drivers/arm/gicv2.h>
+
+#include <rpi_shared.h>
+
+/*
+ * Fields at the beginning of armstub8.bin.
+ * While building the BL31 image, we put the stub magic into the binary.
+ * The GPU firmware detects this at boot time, clears that field as a
+ * confirmation and puts the kernel and DT address in the following words.
+ */
+extern uint32_t stub_magic;
+extern uint32_t dtb_ptr32;
+extern uint32_t kernel_entry32;
+
+static const gicv2_driver_data_t rpi4_gic_data = {
+	.gicd_base = RPI4_GIC_GICD_BASE,
+	.gicc_base = RPI4_GIC_GICC_BASE,
+};
+
+/*
+ * To be filled by the code below. At the moment BL32 is not supported.
+ * In the future these might be passed down from BL2.
+ */
+static entry_point_info_t bl32_image_ep_info;
+static entry_point_info_t bl33_image_ep_info;
+
+/*******************************************************************************
+ * Return a pointer to the 'entry_point_info' structure of the next image for
+ * the security state specified. BL33 corresponds to the non-secure image type
+ * while BL32 corresponds to the secure image type. A NULL pointer is returned
+ * if the image does not exist.
+ ******************************************************************************/
+entry_point_info_t *bl31_plat_get_next_image_ep_info(uint32_t type)
+{
+	entry_point_info_t *next_image_info;
+
+	assert(sec_state_is_valid(type) != 0);
+
+	next_image_info = (type == NON_SECURE)
+			? &bl33_image_ep_info : &bl32_image_ep_info;
+
+	/* None of the images can have 0x0 as the entrypoint. */
+	if (next_image_info->pc) {
+		return next_image_info;
+	} else {
+		return NULL;
+	}
+}
+
+uintptr_t plat_get_ns_image_entrypoint(void)
+{
+#ifdef PRELOADED_BL33_BASE
+	return PRELOADED_BL33_BASE;
+#else
+	/* Cleared by the GPU if kernel address is valid. */
+	if (stub_magic == 0)
+		return kernel_entry32;
+
+	WARN("Stub magic failure, using default kernel address 0x80000\n");
+	return 0x80000;
+#endif
+}
+
+static uintptr_t rpi4_get_dtb_address(void)
+{
+#ifdef RPI3_PRELOADED_DTB_BASE
+	return RPI3_PRELOADED_DTB_BASE;
+#else
+	/* Cleared by the GPU if DTB address is valid. */
+	if (stub_magic == 0)
+		return dtb_ptr32;
+
+	WARN("Stub magic failure, DTB address unknown\n");
+	return 0;
+#endif
+}
+
+static void ldelay(register_t delay)
+{
+	__asm__ volatile (
+		"1:\tcbz %0, 2f\n\t"
+		"sub %0, %0, #1\n\t"
+		"b 1b\n"
+		"2:"
+		: "=&r" (delay) : "0" (delay)
+	);
+}
+
+/*******************************************************************************
+ * Perform any BL31 early platform setup. Here is an opportunity to copy
+ * parameters passed by the calling EL (S-EL1 in BL2 & EL3 in BL1) before
+ * they are lost (potentially). This needs to be done before the MMU is
+ * initialized so that the memory layout can be used while creating page
+ * tables. BL2 has flushed this information to memory, so we are guaranteed
+ * to pick up good data.
+ ******************************************************************************/
+void bl31_early_platform_setup2(u_register_t arg0, u_register_t arg1,
+				u_register_t arg2, u_register_t arg3)
+
+{
+	uint32_t div_reg;
+
+	/*
+	 * LOCAL_CONTROL:
+	 * Bit 9 clear: Increment by 1 (vs. 2).
+	 * Bit 8 clear: Timer source is 19.2MHz crystal (vs. APB).
+	 */
+	mmio_write_32(RPI4_LOCAL_CONTROL_BASE_ADDRESS, 0);
+
+	/* LOCAL_PRESCALER; divide-by (0x80000000 / register_val) == 1 */
+	mmio_write_32(RPI4_LOCAL_CONTROL_PRESCALER, 0x80000000);
+
+	/* Early GPU firmware revisions need a little break here. */
+	ldelay(100000);
+
+	/*
+	 * Initialize the console to provide early debug support.
+	 * Different GPU firmware revisions set up the VPU divider differently,
+	 * so read the actual divider register to learn the UART base clock
+	 * rate. The divider is encoded as a 12.12 fixed point number, but we
+	 * just care about the integer part of it.
+	 */
+	div_reg = mmio_read_32(RPI4_CLOCK_BASE + RPI4_VPU_CLOCK_DIVIDER);
+	div_reg = (div_reg >> 12) & 0xfff;
+	if (div_reg == 0)
+		div_reg = 1;
+	rpi3_console_init(PLAT_RPI4_VPU_CLK_RATE / div_reg);
+
+	bl33_image_ep_info.pc = plat_get_ns_image_entrypoint();
+	bl33_image_ep_info.spsr = rpi3_get_spsr_for_bl33_entry();
+	SET_SECURITY_STATE(bl33_image_ep_info.h.attr, NON_SECURE);
+
+#if RPI3_DIRECT_LINUX_BOOT
+# if RPI3_BL33_IN_AARCH32
+	/*
+	 * According to the file ``Documentation/arm/Booting`` of the Linux
+	 * kernel tree, Linux expects:
+	 * r0 = 0
+	 * r1 = machine type number, optional in DT-only platforms (~0 if so)
+	 * r2 = Physical address of the device tree blob
+	 */
+	VERBOSE("rpi4: Preparing to boot 32-bit Linux kernel\n");
+	bl33_image_ep_info.args.arg0 = 0U;
+	bl33_image_ep_info.args.arg1 = ~0U;
+	bl33_image_ep_info.args.arg2 = rpi4_get_dtb_address();
+# else
+	/*
+	 * According to the file ``Documentation/arm64/booting.txt`` of the
+	 * Linux kernel tree, Linux expects the physical address of the device
+	 * tree blob (DTB) in x0, while x1-x3 are reserved for future use and
+	 * must be 0.
+	 */
+	VERBOSE("rpi4: Preparing to boot 64-bit Linux kernel\n");
+	bl33_image_ep_info.args.arg0 = rpi4_get_dtb_address();
+	bl33_image_ep_info.args.arg1 = 0ULL;
+	bl33_image_ep_info.args.arg2 = 0ULL;
+	bl33_image_ep_info.args.arg3 = 0ULL;
+# endif /* RPI3_BL33_IN_AARCH32 */
+#endif /* RPI3_DIRECT_LINUX_BOOT */
+}
+
+void bl31_plat_arch_setup(void)
+{
+	/*
+	 * Is the dtb_ptr32 pointer valid? If yes, map the DTB region.
+	 * We map the 2MB region the DTB start address lives in, plus
+	 * the next 2MB, to have enough room for expansion.
+	 */
+	if (stub_magic == 0) {
+		unsigned long long dtb_region = dtb_ptr32;
+
+		dtb_region &= ~0x1fffff;	/* Align to 2 MB. */
+		mmap_add_region(dtb_region, dtb_region, 4U << 20,
+				MT_MEMORY | MT_RW | MT_NS);
+	}
+	/*
+	 * Add the first page of memory, which holds the stub magic,
+	 * the kernel and the DT address.
+	 * This also holds the secondary CPU's entrypoints and mailboxes.
+	 */
+	mmap_add_region(0, 0, 4096, MT_NON_CACHEABLE | MT_RW | MT_SECURE);
+
+	rpi3_setup_page_tables(BL31_BASE, BL31_END - BL31_BASE,
+			       BL_CODE_BASE, BL_CODE_END,
+			       BL_RO_DATA_BASE, BL_RO_DATA_END
+#if USE_COHERENT_MEM
+			       , BL_COHERENT_RAM_BASE, BL_COHERENT_RAM_END
+#endif
+			      );
+
+	enable_mmu_el3(0);
+}
+
+static uint32_t dtb_size(const void *dtb)
+{
+	const uint32_t *dtb_header = dtb;
+
+	return fdt32_to_cpu(dtb_header[1]);
+}
+
+static void rpi4_prepare_dtb(void)
+{
+	void *dtb = (void *)rpi4_get_dtb_address();
+	uint32_t gic_int_prop[3];
+	int ret, offs;
+
+	/* Return if no device tree is detected */
+	if (fdt_check_header(dtb) != 0)
+		return;
+
+	ret = fdt_open_into(dtb, dtb, 0x100000);
+	if (ret < 0) {
+		ERROR("Invalid Device Tree at %p: error %d\n", dtb, ret);
+		return;
+	}
+
+	if (dt_add_psci_node(dtb)) {
+		ERROR("Failed to add PSCI Device Tree node\n");
+		return;
+	}
+
+	if (dt_add_psci_cpu_enable_methods(dtb)) {
+		ERROR("Failed to add PSCI cpu enable methods in Device Tree\n");
+		return;
+	}
+
+	/* Reserve memory used by Trusted Firmware. */
+	if (fdt_add_reserved_memory(dtb, "atf@0", 0, 0x80000))
+		WARN("Failed to add reserved memory nodes to DT.\n");
+
+	offs = fdt_node_offset_by_compatible(dtb, 0, "arm,gic-400");
+	gic_int_prop[0] = cpu_to_fdt32(1);		// PPI
+	gic_int_prop[1] = cpu_to_fdt32(9);		// PPI #9
+	gic_int_prop[2] = cpu_to_fdt32(0x0f04);		// all cores, level high
+	fdt_setprop(dtb, offs, "interrupts", gic_int_prop, 12);
+
+	offs = fdt_path_offset(dtb, "/chosen");
+	fdt_setprop_string(dtb, offs, "stdout-path", "serial0");
+
+	ret = fdt_pack(dtb);
+	if (ret < 0)
+		ERROR("Failed to pack Device Tree at %p: error %d\n", dtb, ret);
+
+	clean_dcache_range((uintptr_t)dtb, dtb_size(dtb));
+	INFO("Changed device tree to advertise PSCI.\n");
+}
+
+void bl31_platform_setup(void)
+{
+	rpi4_prepare_dtb();
+
+	/* Configure the interrupt controller */
+	gicv2_driver_init(&rpi4_gic_data);
+	gicv2_distif_init();
+	gicv2_pcpu_distif_init();
+	gicv2_cpuif_enable();
+}