core: arm64: fix speculative execution past ERET vulnerability

Even though ERET always causes a jump to another address, aarch64 CPUs
speculatively execute following instructions as if the ERET
instruction was not a jump instruction.
The speculative execution does not cross privilege-levels (to the jump
target as one would expect), but it continues on the kernel privilege
level as if the ERET instruction did not change the control flow -
thus execution anything that is accidentally linked after the ERET
instruction. Later, the results of this speculative execution are
always architecturally discarded, however they can leak data using
microarchitectural side channels. This speculative execution is very
reliable (seems to be unconditional) and it manages to complete even
relatively performance-heavy operations (e.g. multiple dependent
fetches from uncached memory).

It was fixed by Linux [1], FreeBSD [2] and OpenBSD [3]. The misbehavior
is demonstrated in [4] and [5].

Link: [1] https://github.com/torvalds/linux/commit/679db70801da9fda91d26caf13bf5b5ccc74e8e8
Link: [2] https://github.com/freebsd/freebsd/commit/29fb48ace4186a41c409fde52bcf4216e9e50b61
Link: [3] https://github.com/openbsd/src/commit/3a08873ece1cb28ace89fd65e8f3c1375cc98de2
Link: [4] https://github.com/google/safeside/blob/master/demos/eret_hvc_smc_wrapper.cc
Link: [5] https://github.com/google/safeside/blob/master/kernel_modules/kmod_eret_hvc_smc/eret_hvc_smc_module.c

Signed-off-by: Anthony Steinhauser <asteinhauser@google.com>
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
diff --git a/core/arch/arm/kernel/thread_a64.S b/core/arch/arm/kernel/thread_a64.S
index d0edc1c..9c7cf12 100644
--- a/core/arch/arm/kernel/thread_a64.S
+++ b/core/arch/arm/kernel/thread_a64.S
@@ -23,6 +23,13 @@
 		madd	x\res, x\tmp0, x\tmp1, x\res
 	.endm
 
+	.macro return_from_exception
+		eret
+		/* Guard against speculation past ERET */
+		dsb nsh
+		isb
+	.endm
+
 	.macro b_if_spsr_is_el0 reg, label
 		tbnz	\reg, #(SPSR_MODE_RW_32 << SPSR_MODE_RW_SHIFT), \label
 		tst	\reg, #(SPSR_64_MODE_EL_MASK << SPSR_64_MODE_EL_SHIFT)
@@ -41,7 +48,7 @@
 
 	load_xregs x0, THREAD_CTX_REGS_X1, 1, 3
 	ldr	x0, [x0, THREAD_CTX_REGS_X0]
-	eret
+	return_from_exception
 
 1:	load_xregs x0, THREAD_CTX_REGS_X1, 1, 3
 	ldr	x0, [x0, THREAD_CTX_REGS_X0]
@@ -495,7 +502,7 @@
 	load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1
 #endif /*CFG_CORE_UNMAP_CORE_AT_EL0*/
 
-	eret
+	return_from_exception
 
 	/*
 	 * void icache_inv_user_range(void *addr, size_t size);
@@ -659,7 +666,7 @@
 	ldp	x0, x1, [x30, THREAD_SVC_REG_X0]
 	ldr	x30, [x30, #THREAD_SVC_REG_X30]
 
-	eret
+	return_from_exception
 
 1:	ldp	x0, x1, [x30, THREAD_SVC_REG_X0]
 	ldr	x30, [x30, #THREAD_SVC_REG_X30]
@@ -748,7 +755,7 @@
 	load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 3
 
 	/* Return from exception */
-	eret
+	return_from_exception
 END_FUNC el1_sync_abort
 
 	/* sp_el0 in x3 */
@@ -826,7 +833,7 @@
 	load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1
 
 	/* Return from exception */
-	eret
+	return_from_exception
 1:	b	eret_to_el0
 END_FUNC el0_sync_abort
 
@@ -979,7 +986,7 @@
 	load_xregs sp, THREAD_CORE_LOCAL_X0, 0, 1
 
 	/* Return from exception */
-	eret
+	return_from_exception
 1:	b	eret_to_el0
 .endm