Refactor Statistical Profiling Extensions implementation
Factor out SPE operations in a separate file. Use the publish
subscribe framework to drain the SPE buffers before entering secure
world. Additionally, enable SPE before entering normal world.
A side effect of this change is that the profiling buffers are now
only drained when a transition from normal world to secure world
happens. Previously they were drained also on return from secure
world, which is unnecessary as SPE is not supported in S-EL1.
Change-Id: I17582c689b4b525770dbb6db098b3a0b5777b70a
Signed-off-by: Dimitris Papastamos <dimitris.papastamos@arm.com>
diff --git a/Makefile b/Makefile
index 9ab37ac..9894c5c 100644
--- a/Makefile
+++ b/Makefile
@@ -189,6 +189,7 @@
-Iinclude/lib/cpus/${ARCH} \
-Iinclude/lib/el3_runtime \
-Iinclude/lib/el3_runtime/${ARCH} \
+ -Iinclude/lib/extensions \
-Iinclude/lib/pmf \
-Iinclude/lib/psci \
-Iinclude/lib/xlat_tables \
diff --git a/bl31/bl31.mk b/bl31/bl31.mk
index 336c295..fccdc8a 100644
--- a/bl31/bl31.mk
+++ b/bl31/bl31.mk
@@ -46,6 +46,10 @@
services/std_svc/sdei/sdei_state.c
endif
+ifeq (${ENABLE_SPE_FOR_LOWER_ELS},1)
+BL31_SOURCES += lib/extensions/spe/spe.c
+endif
+
BL31_LINKERFILE := bl31/bl31.ld.S
# Flag used to indicate if Crash reporting via console should be included
diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S
index 34fdaee..ed35df8 100644
--- a/include/common/aarch64/el3_common_macros.S
+++ b/include/common/aarch64/el3_common_macros.S
@@ -95,10 +95,6 @@
* MDCR_EL3.SPD32: Set to 0b10 to disable AArch32 Secure self-hosted
* privileged debug from S-EL1.
*
- * MDCR_EL3.NSPB (ARM v8.2): SPE enabled in non-secure state and
- * disabled in secure state. Accesses to SPE registers at SEL1 generate
- * trap exceptions to EL3.
- *
* MDCR_EL3.TDOSA: Set to zero so that EL2 and EL2 System register
* access to the powerdown debug registers do not trap to EL3.
*
@@ -112,19 +108,6 @@
*/
mov_imm x0, ((MDCR_EL3_RESET_VAL | MDCR_SDD_BIT | MDCR_SPD32(MDCR_SPD32_DISABLE)) \
& ~(MDCR_TDOSA_BIT | MDCR_TDA_BIT | MDCR_TPM_BIT))
-
-#if ENABLE_SPE_FOR_LOWER_ELS
- /* Detect if SPE is implemented */
- mrs x1, id_aa64dfr0_el1
- ubfx x1, x1, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH
- cmp x1, #0x1
- b.ne 1f
-
- /* Enable SPE for use by normal world */
- orr x0, x0, #MDCR_NSPB(MDCR_NSPB_EL1)
-1:
-#endif
-
msr mdcr_el3, x0
/* ---------------------------------------------------------------------
diff --git a/include/lib/aarch64/arch.h b/include/lib/aarch64/arch.h
index cb7dab7..777a01a 100644
--- a/include/lib/aarch64/arch.h
+++ b/include/lib/aarch64/arch.h
@@ -604,4 +604,9 @@
#define PAR_ADDR_SHIFT 12
#define PAR_ADDR_MASK (BIT(40) - 1) /* 40-bits-wide page address */
+/*******************************************************************************
+ * Definitions for system register interface to SPE
+ ******************************************************************************/
+#define PMBLIMITR_EL1 S3_0_C9_C10_0
+
#endif /* __ARCH_H__ */
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index 782343d..46d9a1c 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -197,6 +197,7 @@
DEFINE_SYSOP_TYPE_FUNC(dmb, st)
DEFINE_SYSOP_TYPE_FUNC(dmb, ld)
DEFINE_SYSOP_TYPE_FUNC(dsb, ish)
+DEFINE_SYSOP_TYPE_FUNC(dsb, nsh)
DEFINE_SYSOP_TYPE_FUNC(dsb, ishst)
DEFINE_SYSOP_TYPE_FUNC(dmb, ish)
DEFINE_SYSOP_TYPE_FUNC(dmb, ishst)
@@ -301,6 +302,7 @@
DEFINE_SYSREG_READ_FUNC(ctr_el0)
DEFINE_SYSREG_RW_FUNCS(mdcr_el2)
+DEFINE_SYSREG_RW_FUNCS(mdcr_el3)
DEFINE_SYSREG_RW_FUNCS(hstr_el2)
DEFINE_SYSREG_RW_FUNCS(cnthp_ctl_el2)
DEFINE_SYSREG_RW_FUNCS(pmcr_el0)
@@ -320,6 +322,7 @@
DEFINE_RENAME_SYSREG_WRITE_FUNC(icc_eoir1_el1, ICC_EOIR1_EL1)
DEFINE_RENAME_SYSREG_WRITE_FUNC(icc_sgi0r_el1, ICC_SGI0R_EL1)
+DEFINE_RENAME_SYSREG_RW_FUNCS(pmblimitr_el1, PMBLIMITR_EL1)
#define IS_IN_EL(x) \
(GET_EL(read_CurrentEl()) == MODE_EL##x)
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index cf06a64..5889904 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -313,7 +313,6 @@
* Function prototypes
******************************************************************************/
void el1_sysregs_context_save(el1_sys_regs_t *regs);
-void el1_sysregs_context_save_post_ops(void);
void el1_sysregs_context_restore(el1_sys_regs_t *regs);
#if CTX_INCLUDE_FPREGS
void fpregs_context_save(fp_regs_t *regs);
diff --git a/include/lib/extensions/spe.h b/include/lib/extensions/spe.h
new file mode 100644
index 0000000..8a74127
--- /dev/null
+++ b/include/lib/extensions/spe.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __SPE_H__
+#define __SPE_H__
+
+void spe_enable(int el2_unused);
+void spe_disable(void);
+
+#endif /* __SPE_H__ */
diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h
index a28a903..abd7395 100644
--- a/include/plat/arm/common/plat_arm.h
+++ b/include/plat/arm/common/plat_arm.h
@@ -227,7 +227,4 @@
uint32_t cookie_lo,
void *handle);
-/* Disable Statistical Profiling Extensions helper */
-void arm_disable_spe(void);
-
#endif /* __PLAT_ARM_H__ */
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
index 143da95..620ec16 100644
--- a/lib/el3_runtime/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -9,7 +9,6 @@
#include <context.h>
.global el1_sysregs_context_save
- .global el1_sysregs_context_save_post_ops
.global el1_sysregs_context_restore
#if CTX_INCLUDE_FPREGS
.global fpregs_context_save
@@ -112,36 +111,6 @@
/* -----------------------------------------------------
* The following function strictly follows the AArch64
* PCS to use x9-x17 (temporary caller-saved registers)
- * to do post operations after saving the EL1 system
- * register context.
- * -----------------------------------------------------
- */
-func el1_sysregs_context_save_post_ops
-#if ENABLE_SPE_FOR_LOWER_ELS
- /* Detect if SPE is implemented */
- mrs x9, id_aa64dfr0_el1
- ubfx x9, x9, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH
- cmp x9, #0x1
- b.ne 1f
-
- /*
- * Before switching from normal world to secure world
- * the profiling buffers need to be drained out to memory. This is
- * required to avoid an invalid memory access when TTBR is switched
- * for entry to SEL1.
- */
- .arch armv8.2-a+profile
- psb csync
- dsb nsh
- .arch armv8-a
-1:
-#endif
- ret
-endfunc el1_sysregs_context_save_post_ops
-
-/* -----------------------------------------------------
- * The following function strictly follows the AArch64
- * PCS to use x9-x17 (temporary caller-saved registers)
* to restore EL1 system register context. It assumes
* that 'x0' is pointing to a 'el1_sys_regs' structure
* from where the register context will be restored
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 479acc9..8f1523f 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -15,6 +15,7 @@
#include <platform_def.h>
#include <pubsub_events.h>
#include <smcc_helpers.h>
+#include <spe.h>
#include <string.h>
#include <utils.h>
@@ -216,6 +217,9 @@
static void enable_extensions_nonsecure(int el2_unused)
{
#if IMAGE_BL31
+#if ENABLE_SPE_FOR_LOWER_ELS
+ spe_enable(el2_unused);
+#endif
#endif
}
@@ -354,13 +358,6 @@
* relying on hw. Some fields are architecturally
* UNKNOWN on reset.
*
- * MDCR_EL2.TPMS (ARM v8.2): Do not trap statistical
- * profiling controls to EL2.
- *
- * MDCR_EL2.E2PB (ARM v8.2): SPE enabled in non-secure
- * state. Accesses to profiling buffer controls at
- * non-secure EL1 are not trapped to EL2.
- *
* MDCR_EL2.TDRA: Set to zero so that Non-secure EL0 and
* EL1 System register accesses to the Debug ROM
* registers are not trapped to EL2.
@@ -397,22 +394,6 @@
| MDCR_EL2_HPME_BIT | MDCR_EL2_TPM_BIT
| MDCR_EL2_TPMCR_BIT));
-#if ENABLE_SPE_FOR_LOWER_ELS
- uint64_t id_aa64dfr0_el1;
-
- /* Detect if SPE is implemented */
- id_aa64dfr0_el1 = read_id_aa64dfr0_el1() >>
- ID_AA64DFR0_PMS_SHIFT;
- if ((id_aa64dfr0_el1 & ID_AA64DFR0_PMS_MASK) == 1) {
- /*
- * Make sure traps to EL2 are not generated if
- * EL2 is implemented but not used.
- */
- mdcr_el2 &= ~MDCR_EL2_TPMS;
- mdcr_el2 |= MDCR_EL2_E2PB(MDCR_EL2_E2PB_EL1);
- }
-#endif
-
write_mdcr_el2(mdcr_el2);
/*
@@ -454,7 +435,6 @@
assert(ctx);
el1_sysregs_context_save(get_sysregs_ctx(ctx));
- el1_sysregs_context_save_post_ops();
#if IMAGE_BL31
if (security_state == SECURE)
diff --git a/lib/extensions/spe/spe.c b/lib/extensions/spe/spe.c
new file mode 100644
index 0000000..3b297f2
--- /dev/null
+++ b/lib/extensions/spe/spe.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <pubsub.h>
+
+/*
+ * The assembler does not yet understand the psb csync mnemonic
+ * so use the equivalent hint instruction.
+ */
+#define psb_csync() asm volatile("hint #17")
+
+void spe_enable(int el2_unused)
+{
+ uint64_t features;
+
+ features = read_id_aa64dfr0_el1() >> ID_AA64DFR0_PMS_SHIFT;
+ if ((features & ID_AA64DFR0_PMS_MASK) == 1) {
+ uint64_t v;
+
+ if (el2_unused) {
+ /*
+ * MDCR_EL2.TPMS (ARM v8.2): Do not trap statistical
+ * profiling controls to EL2.
+ *
+ * MDCR_EL2.E2PB (ARM v8.2): SPE enabled in Non-secure
+ * state. Accesses to profiling buffer controls at
+ * Non-secure EL1 are not trapped to EL2.
+ */
+ v = read_mdcr_el2();
+ v &= ~MDCR_EL2_TPMS;
+ v |= MDCR_EL2_E2PB(MDCR_EL2_E2PB_EL1);
+ write_mdcr_el2(v);
+ }
+
+ /*
+ * MDCR_EL2.NSPB (ARM v8.2): SPE enabled in Non-secure state
+ * and disabled in secure state. Accesses to SPE registers at
+ * S-EL1 generate trap exceptions to EL3.
+ */
+ v = read_mdcr_el3();
+ v |= MDCR_NSPB(MDCR_NSPB_EL1);
+ write_mdcr_el3(v);
+ }
+}
+
+void spe_disable(void)
+{
+ uint64_t features;
+
+ features = read_id_aa64dfr0_el1() >> ID_AA64DFR0_PMS_SHIFT;
+ if ((features & ID_AA64DFR0_PMS_MASK) == 1) {
+ uint64_t v;
+
+ /* Drain buffered data */
+ psb_csync();
+ dsbnsh();
+
+ /* Disable profiling buffer */
+ v = read_pmblimitr_el1();
+ v &= ~(1ULL << 0);
+ write_pmblimitr_el1(v);
+ isb();
+ }
+}
+
+static void *spe_drain_buffers_hook(const void *arg)
+{
+ uint64_t features;
+
+ features = read_id_aa64dfr0_el1() >> ID_AA64DFR0_PMS_SHIFT;
+ if ((features & ID_AA64DFR0_PMS_MASK) == 1) {
+ /* Drain buffered data */
+ psb_csync();
+ dsbnsh();
+ }
+
+ return 0;
+}
+
+SUBSCRIBE_TO_EVENT(cm_entering_secure_world, spe_drain_buffers_hook);
diff --git a/plat/arm/board/fvp/fvp_pm.c b/plat/arm/board/fvp/fvp_pm.c
index 0ab5b82..13bd8f2 100644
--- a/plat/arm/board/fvp/fvp_pm.c
+++ b/plat/arm/board/fvp/fvp_pm.c
@@ -14,6 +14,7 @@
#include <plat_arm.h>
#include <platform.h>
#include <psci.h>
+#include <spe.h>
#include <v2m_def.h>
#include "drivers/pwrc/fvp_pwrc.h"
#include "fvp_def.h"
@@ -57,7 +58,7 @@
* On power down we need to disable statistical profiling extensions
* before exiting coherency.
*/
- arm_disable_spe();
+ spe_disable();
#endif
/* Disable coherency if this cluster is to be turned off */
diff --git a/plat/arm/common/aarch64/arm_helpers.S b/plat/arm/common/aarch64/arm_helpers.S
index b53e60d..9d3a108 100644
--- a/plat/arm/common/aarch64/arm_helpers.S
+++ b/plat/arm/common/aarch64/arm_helpers.S
@@ -12,7 +12,6 @@
.globl plat_crash_console_putc
.globl plat_crash_console_flush
.globl platform_mem_init
- .globl arm_disable_spe
/* -----------------------------------------------------
@@ -88,34 +87,6 @@
ret
endfunc platform_mem_init
- /* -----------------------------------------------------
- * void arm_disable_spe (void);
- * -----------------------------------------------------
- */
-#if ENABLE_SPE_FOR_LOWER_ELS
-func arm_disable_spe
- /* Detect if SPE is implemented */
- mrs x0, id_aa64dfr0_el1
- ubfx x0, x0, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH
- cmp x0, #0x1
- b.ne 1f
-
- /* Drain buffered data */
- .arch armv8.2-a+profile
- psb csync
- dsb nsh
-
- /* Disable Profiling Buffer */
- mrs x0, pmblimitr_el1
- bic x0, x0, #1
- msr pmblimitr_el1, x0
- isb
- .arch armv8-a
-1:
- ret
-endfunc arm_disable_spe
-#endif
-
/*
* Need to use coherent stack when ARM Cryptocell is used to autheticate images
* since Cryptocell uses DMA to transfer data and it is not coherent with the