core: generic_entry_a{32,64}.S: use correct cached_mem_end

Stores the correct register at cached_mem_end at boot. This avoids usage
of stale dcache content.

Fixes: 5dd1570ac5b0 ("core: add embedded data region")
Reviewed-by: Jerome Forissier <jerome@forissier.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
diff --git a/core/arch/arm/kernel/generic_entry_a32.S b/core/arch/arm/kernel/generic_entry_a32.S
index 9bb2f52..d2e9ff0 100644
--- a/core/arch/arm/kernel/generic_entry_a32.S
+++ b/core/arch/arm/kernel/generic_entry_a32.S
@@ -372,7 +372,7 @@
 	/* Copy backwards (as memmove) in case we're overlapping */
 	add	r0, r0, r2		/* __init_start + len */
 	add	r1, r1, r2		/* __data_end + len */
-	str	r1, cached_mem_end
+	str	r0, cached_mem_end
 	ldr	r2, =__init_start
 copy_init:
 	ldmdb	r1!, {r3, r8-r12}
@@ -392,8 +392,7 @@
 	/* Copy backwards (as memmove) in case we're overlapping */
 	add	r0, r0, r2
 	add	r1, r1, r2
-	adr	r3, cached_mem_end
-	str	r1, [r3]
+	str	r0, cached_mem_end
 	ldr	r2, =__end
 
 copy_init:
diff --git a/core/arch/arm/kernel/generic_entry_a64.S b/core/arch/arm/kernel/generic_entry_a64.S
index b3cbb8f..bfb97a5 100644
--- a/core/arch/arm/kernel/generic_entry_a64.S
+++ b/core/arch/arm/kernel/generic_entry_a64.S
@@ -90,7 +90,7 @@
 	add	x0, x0, x2		/* __init_start + len */
 	add	x1, x1, x2		/* __data_end + len */
 	adr	x3, cached_mem_end
-	str	x1, [x3]
+	str	x0, [x3]
 	adr	x2, __init_start
 copy_init:
 	ldp	x3, x4, [x1, #-16]!
@@ -101,8 +101,8 @@
 	/*
 	 * The binary is built as:
 	 * [Core, rodata and data] : In correct location
-	 * [struct boot_embdata + data] : Should be saved before
-	 * initializing pager, first uint32_t tells the length of the data
+	 * [struct boot_embdata + data] : Should be moved to __end, first
+	 * uint32_t tells the length of the struct + data
 	 */
 	adr	x0, __end		/* dst */
 	adr	x1, __data_end		/* src */
@@ -111,7 +111,7 @@
 	add	x0, x0, x2
 	add	x1, x1, x2
 	adr	x3, cached_mem_end
-	str	x1, [x3]
+	str	x0, [x3]
 	adr	x2, __end
 
 copy_init:
@@ -151,6 +151,12 @@
 	/* Enable aborts now that we can receive exceptions */
 	msr	daifclr, #DAIFBIT_ABT
 
+	/*
+	 * Invalidate dcache for all memory used during initialization to
+	 * avoid nasty surprices when the cache is turned on. We must not
+	 * invalidate memory not used by OP-TEE since we may invalidate
+	 * entries used by for instance ARM Trusted Firmware.
+	 */
 	adr_l	x0, __text_start
 	ldr	x1, cached_mem_end
 	sub	x1, x1, x0