CHROMIUM: MALI: Bifrost r14p0 EAC release
Checkout CL:*655397, then:
rsync -av --delete --ignore-times .../mali-ddk-bifrost/kernel/drivers/gpu/arm/ ./
BUG=b:113868550
TEST=emerge-kukui -av chromeos-kernel-4_14
TEST=CROSS_COMPILE=aarch64-cros-linux-gnu- ARCH=arm64 \
O=../v4.14-build/arm64 make allmodconfig
make -j > /dev/null
Change-Id: I927144e6a809dbbd4defc4ea14f37632373caf0b
Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/1206230
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Conflicts:
drivers/gpu/arm/midgard/mali_kbase_sync.h
[rebase419(groeck): Context conflicts]
Signed-off-by: Guenter Roeck <groeck@chromium.org>
diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild
index 345d244..3e1c915 100644
--- a/drivers/gpu/arm/midgard/Kbuild
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -21,7 +21,7 @@
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r13p0-01rel0"
+MALI_RELEASE_NAME ?= "r14p0-01rel0"
# Paths required for build
KBASE_PATH = $(src)
@@ -30,32 +30,22 @@
# Set up defaults if not defined by build system
MALI_CUSTOMER_RELEASE ?= 1
+MALI_USE_CSF ?= 0
MALI_UNIT_TEST ?= 0
MALI_KERNEL_TEST_API ?= 0
MALI_MOCK_TEST ?= 0
MALI_COVERAGE ?= 0
CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
-# This workaround is for what seems to be a compiler bug we observed in
-# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling
-# the "_Pragma" syntax, where an error message is returned:
-#
-# "internal compiler error: unspellable token PRAGMA"
-#
-# This regression has thus far only been seen on the GCC 4.7 compiler bundled
-# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds
-# which are not known to be used with AOSP, is hardcoded to disable the
-# workaround, i.e. set the define to 0.
-MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
# Set up our defines, which will be passed to gcc
DEFINES = \
-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+ -DMALI_USE_CSF=$(MALI_USE_CSF) \
-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
-DMALI_COVERAGE=$(MALI_COVERAGE) \
- -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
- -DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+ -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\"
ifeq ($(KBUILD_EXTMOD),)
# in-tree
@@ -67,6 +57,8 @@
DEFINES += -I$(srctree)/drivers/staging/android
+DEFINES += -DMALI_KBASE_BUILD
+
# Use our defines when compiling
ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
@@ -93,7 +85,6 @@
mali_kbase_hw.c \
mali_kbase_utility.c \
mali_kbase_debug.c \
- mali_kbase_trace_timeline.c \
mali_kbase_gpu_memory_debugfs.c \
mali_kbase_mem_linux.c \
mali_kbase_core_linux.c \
@@ -148,6 +139,10 @@
endif
endif
+ifeq ($(MALI_USE_CSF),1)
+ include $(src)/csf/Kbuild
+endif
+
mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \
mali_kbase_dma_fence.o \
mali_kbase_fence.o
diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig
index e69efdc..12ca727 100644
--- a/drivers/gpu/arm/midgard/Kconfig
+++ b/drivers/gpu/arm/midgard/Kconfig
@@ -60,6 +60,7 @@
config MALI_DEVFREQ
bool "devfreq support for Mali"
depends on MALI_MIDGARD && PM_DEVFREQ
+ default y
help
Support devfreq for Mali.
@@ -109,19 +110,6 @@
If unsure, say N.
-config MALI_PRFCNT_SET_SECONDARY
- bool "Use secondary set of performance counters"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- Select this option to use secondary set of performance counters. Kernel
- features that depend on an access to the primary set of counters may
- become unavailable. Enabling this option will prevent power management
- from working optimally and may cause instrumentation tools to return
- bogus results.
-
- If unsure, say N.
-
config MALI_DEBUG
bool "Debug build"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -166,13 +154,6 @@
help
Enables insertion of errors to test module failure and recovery mechanisms.
-config MALI_TRACE_TIMELINE
- bool "Timeline tracing"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- Enables timeline tracing through the kernel tracepoint system.
-
config MALI_SYSTEM_TRACE
bool "Enable system event tracing support"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -184,16 +165,6 @@
minimal overhead when not in use. Enable only if you know what
you are doing.
-config MALI_JOB_DUMP
- bool "Enable system level support needed for job dumping"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- Choose this option to enable system level support needed for
- job dumping. This is typically used for instrumentation but has
- minimal overhead when not in use. Enable only if you know what
- you are doing.
-
config MALI_2MB_ALLOC
bool "Attempt to allocate 2MB pages"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -217,4 +188,29 @@
If using kernel >= v4.10 then say N, otherwise if devfreq cooling
changes have been backported say Y to avoid compilation errors.
+# Instrumentation options.
+
+config MALI_JOB_DUMP
+ bool "Enable system level support needed for job dumping"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ Choose this option to enable system level support needed for
+ job dumping. This is typically used for instrumentation but has
+ minimal overhead when not in use. Enable only if you know what
+ you are doing.
+
+config MALI_PRFCNT_SET_SECONDARY
+ bool "Use secondary set of performance counters"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ Select this option to use secondary set of performance counters. Kernel
+ features that depend on an access to the primary set of counters may
+ become unavailable. Enabling this option will prevent power management
+ from working optimally and may cause instrumentation tools to return
+ bogus results.
+
+ If unsure, say N.
+
source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/drivers/gpu/arm/midgard/Mconfig b/drivers/gpu/arm/midgard/Mconfig
index 9ad765a..583dec3 100644
--- a/drivers/gpu/arm/midgard/Mconfig
+++ b/drivers/gpu/arm/midgard/Mconfig
@@ -24,7 +24,7 @@
config MALI_GATOR_SUPPORT
bool "Streamline support via Gator"
- depends on MALI_MIDGARD
+ depends on MALI_MIDGARD && !BACKEND_USER
default y if INSTRUMENTATION_STREAMLINE_OLD
default n
help
@@ -84,6 +84,9 @@
include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
exist.
+ When PLATFORM_CUSTOM is set, this needs to be set manually to
+ pick up the desired platform files.
+
config MALI_MOCK_TEST
bool
depends on MALI_MIDGARD && !RELEASE
@@ -112,19 +115,6 @@
If unsure, say N.
-config MALI_PRFCNT_SET_SECONDARY
- bool "Use secondary set of performance counters"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- Select this option to use secondary set of performance counters. Kernel
- features that depend on an access to the primary set of counters may
- become unavailable. Enabling this option will prevent power management
- from working optimally and may cause instrumentation tools to return
- bogus results.
-
- If unsure, say N.
-
config MALI_DEBUG
bool "Debug build"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -164,13 +154,6 @@
help
Injected errors are random, rather than user-driven.
-config MALI_TRACE_TIMELINE
- bool "Timeline tracing"
- depends on MALI_MIDGARD && MALI_EXPERT
- default n
- help
- Enables timeline tracing through the kernel tracepoint system.
-
config MALI_SYSTEM_TRACE
bool "Enable system event tracing support"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -206,4 +189,9 @@
not merged in mainline kernel yet. So this define helps to guard those
parts of the code.
+# Instrumentation options.
+
+# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig.
+# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig.
+
source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
index bdf4c5a..dcd8ca4 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -1,5 +1,5 @@
#
-# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -30,14 +30,12 @@
backend/gpu/mali_kbase_jm_as.c \
backend/gpu/mali_kbase_jm_hw.c \
backend/gpu/mali_kbase_jm_rb.c \
- backend/gpu/mali_kbase_js_affinity.c \
backend/gpu/mali_kbase_js_backend.c \
backend/gpu/mali_kbase_mmu_hw_direct.c \
backend/gpu/mali_kbase_pm_backend.c \
backend/gpu/mali_kbase_pm_driver.c \
backend/gpu/mali_kbase_pm_metrics.c \
backend/gpu/mali_kbase_pm_ca.c \
- backend/gpu/mali_kbase_pm_ca_fixed.c \
backend/gpu/mali_kbase_pm_always_on.c \
backend/gpu/mali_kbase_pm_coarse_demand.c \
backend/gpu/mali_kbase_pm_demand.c \
@@ -46,15 +44,13 @@
ifeq ($(MALI_CUSTOMER_RELEASE),0)
BACKEND += \
- backend/gpu/mali_kbase_pm_ca_random.c \
backend/gpu/mali_kbase_pm_demand_always_powered.c \
backend/gpu/mali_kbase_pm_fast_start.c
endif
ifeq ($(CONFIG_MALI_DEVFREQ),y)
BACKEND += \
- backend/gpu/mali_kbase_devfreq.c \
- backend/gpu/mali_kbase_pm_ca_devfreq.c
+ backend/gpu/mali_kbase_devfreq.c
endif
ifeq ($(CONFIG_MALI_NO_MALI),y)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
index 49567f7..7378bfd 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,6 +29,6 @@
kbdev->current_gpu_coherency_mode = mode;
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
- kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+ kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
index c9c463e..450f6e7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -152,7 +152,7 @@
while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) {
kctx->reg_dump[offset+1] =
kbase_reg_read(kctx->kbdev,
- kctx->reg_dump[offset], NULL);
+ kctx->reg_dump[offset]);
offset += 2;
}
return true;
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
index 432c2aa..683a24c 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -148,9 +148,7 @@
}
#endif
- if (kbdev->pm.backend.ca_current_policy->id ==
- KBASE_PM_CA_POLICY_ID_DEVFREQ)
- kbase_devfreq_set_core_mask(kbdev, core_mask);
+ kbase_devfreq_set_core_mask(kbdev, core_mask);
*target_freq = nominal_freq;
kbdev->current_voltage = voltage;
@@ -259,6 +257,7 @@
struct device_node *node;
int i = 0;
int count;
+ u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present;
if (!opp_node)
return 0;
@@ -283,8 +282,14 @@
if (of_property_read_u64(node, "opp-hz-real", &real_freq))
real_freq = opp_freq;
if (of_property_read_u64(node, "opp-core-mask", &core_mask))
- core_mask =
- kbdev->gpu_props.props.raw_props.shader_present;
+ core_mask = shader_present;
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) &&
+ core_mask != shader_present) {
+ dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
+ opp_freq);
+ continue;
+ }
+
core_count_p = of_get_property(node, "opp-core-count", NULL);
if (core_count_p) {
u64 remaining_core_mask =
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
index a0dfd81..ebc3022 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -154,11 +154,9 @@
#endif /* CONFIG_DEBUG_FS */
-void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
- struct kbase_context *kctx)
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
{
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
- KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
writel(value, kbdev->reg + offset);
@@ -168,21 +166,15 @@
kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
value, 1);
#endif /* CONFIG_DEBUG_FS */
- dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
-
- if (kctx && kctx->jctx.tb)
- kbase_device_trace_register_access(kctx, REG_WRITE, offset,
- value);
+ dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value);
}
KBASE_EXPORT_TEST_API(kbase_reg_write);
-u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
- struct kbase_context *kctx)
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
{
u32 val;
KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
- KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
val = readl(kbdev->reg + offset);
@@ -192,10 +184,8 @@
kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset,
val, 0);
#endif /* CONFIG_DEBUG_FS */
- dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+ dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val);
- if (kctx && kctx->jctx.tb)
- kbase_device_trace_register_access(kctx, REG_READ, offset, val);
return val;
}
@@ -216,11 +206,11 @@
u32 status;
u64 address;
- status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+ status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS));
address = (u64) kbase_reg_read(kbdev,
- GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32;
address |= kbase_reg_read(kbdev,
- GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO));
dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
status & 0xFF,
@@ -246,7 +236,7 @@
kbase_clean_caches_done(kbdev);
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
/* kbase_pm_check_transitions must be called after the IRQ has been
* cleared. This is because it might trigger further power transitions
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
index 729256e..928efe9 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,29 +34,21 @@
* @kbdev: Kbase device pointer
* @offset: Offset of register
* @value: Value to write
- * @kctx: Kbase context pointer. May be NULL
*
- * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
- * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
- * != KBASEP_AS_NR_INVALID).
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
*/
-void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
- struct kbase_context *kctx);
+void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value);
/**
* kbase_reg_read - read from GPU register
* @kbdev: Kbase device pointer
* @offset: Offset of register
- * @kctx: Kbase context pointer. May be NULL
*
- * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
- * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
- * != KBASEP_AS_NR_INVALID).
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false).
*
* Return: Value in desired register
*/
-u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
- struct kbase_context *kctx);
+u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
/**
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
index 8809ab0b..39773e6 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -37,62 +37,61 @@
int i;
/* Fill regdump with the content of the relevant registers */
- regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+ regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
regdump->l2_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(L2_FEATURES), NULL);
+ GPU_CONTROL_REG(L2_FEATURES));
regdump->core_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(CORE_FEATURES), NULL);
+ GPU_CONTROL_REG(CORE_FEATURES));
regdump->tiler_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TILER_FEATURES), NULL);
+ GPU_CONTROL_REG(TILER_FEATURES));
regdump->mem_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(MEM_FEATURES), NULL);
+ GPU_CONTROL_REG(MEM_FEATURES));
regdump->mmu_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(MMU_FEATURES), NULL);
+ GPU_CONTROL_REG(MMU_FEATURES));
regdump->as_present = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(AS_PRESENT), NULL);
+ GPU_CONTROL_REG(AS_PRESENT));
regdump->js_present = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(JS_PRESENT), NULL);
+ GPU_CONTROL_REG(JS_PRESENT));
for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
regdump->js_features[i] = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+ GPU_CONTROL_REG(JS_FEATURES_REG(i)));
for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
regdump->texture_features[i] = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+ GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)));
regdump->thread_max_threads = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+ GPU_CONTROL_REG(THREAD_MAX_THREADS));
regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
- NULL);
+ GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE));
regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+ GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE));
regdump->thread_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+ GPU_CONTROL_REG(THREAD_FEATURES));
regdump->thread_tls_alloc = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(THREAD_TLS_ALLOC), NULL);
+ GPU_CONTROL_REG(THREAD_TLS_ALLOC));
regdump->shader_present_lo = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+ GPU_CONTROL_REG(SHADER_PRESENT_LO));
regdump->shader_present_hi = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+ GPU_CONTROL_REG(SHADER_PRESENT_HI));
regdump->tiler_present_lo = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+ GPU_CONTROL_REG(TILER_PRESENT_LO));
regdump->tiler_present_hi = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+ GPU_CONTROL_REG(TILER_PRESENT_HI));
regdump->l2_present_lo = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+ GPU_CONTROL_REG(L2_PRESENT_LO));
regdump->l2_present_hi = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+ GPU_CONTROL_REG(L2_PRESENT_HI));
regdump->stack_present_lo = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(STACK_PRESENT_LO), NULL);
+ GPU_CONTROL_REG(STACK_PRESENT_LO));
regdump->stack_present_hi = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(STACK_PRESENT_HI), NULL);
+ GPU_CONTROL_REG(STACK_PRESENT_HI));
}
void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
@@ -103,7 +102,7 @@
kbase_pm_register_access_enable(kbdev);
regdump->coherency_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+ GPU_CONTROL_REG(COHERENCY_FEATURES));
/* We're done accessing the GPU registers for now. */
kbase_pm_register_access_disable(kbdev);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
index 77d71f5..6c69132 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -51,16 +51,16 @@
/* Enable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
- irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+ irq_mask | CLEAN_CACHES_COMPLETED);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* clean&invalidate the caches so we're sure the mmu tables for the dump
* buffer is valid */
KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+ GPU_COMMAND_CLEAN_INV_CACHES);
kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
@@ -74,12 +74,8 @@
int err = -EINVAL;
u32 irq_mask;
int ret;
- u64 shader_cores_needed;
u32 prfcnt_config;
- shader_cores_needed = kbase_pm_get_present_cores(kbdev,
- KBASE_PM_CORE_SHADER);
-
/* alignment failure */
if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
goto out_err;
@@ -90,7 +86,7 @@
/* Request the cores early on synchronously - we'll release them on any
* errors (e.g. instrumentation already active) */
- kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+ kbase_pm_request_cores_sync(kbdev, true, true);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
@@ -102,9 +98,9 @@
/* Enable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
- PRFCNT_SAMPLE_COMPLETED, NULL);
+ PRFCNT_SAMPLE_COMPLETED);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* In use, this context is the owner */
@@ -147,35 +143,34 @@
#endif
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
- prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+ prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
- enable->dump_buffer & 0xFFFFFFFF, kctx);
+ enable->dump_buffer & 0xFFFFFFFF);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
- enable->dump_buffer >> 32, kctx);
+ enable->dump_buffer >> 32);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
- enable->jm_bm, kctx);
+ enable->jm_bm);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
- enable->shader_bm, kctx);
+ enable->shader_bm);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
- enable->mmu_l2_bm, kctx);
+ enable->mmu_l2_bm);
/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
* HW counter dump. */
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
- kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
- kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0);
else
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
- enable->tiler_bm, kctx);
+ enable->tiler_bm);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
- prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+ prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL);
/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
*/
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
- enable->tiler_bm, kctx);
+ enable->tiler_bm);
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
@@ -191,7 +186,7 @@
return err;
out_unrequest_cores:
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+ kbase_pm_release_cores(kbdev, true, true);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
out_err:
return err;
@@ -234,20 +229,19 @@
/* Disable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
- irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+ irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
/* Disable the counters */
- kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0);
kbdev->hwcnt.kctx = NULL;
kbdev->hwcnt.addr = 0ULL;
kbase_pm_ca_instr_disable(kbdev);
- kbase_pm_unrequest_cores(kbdev, true,
- kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+ kbase_pm_release_cores(kbdev, true, true);
kbase_pm_release_l2_caches(kbdev);
@@ -290,15 +284,15 @@
/* Reconfigure the dump address */
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
- kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+ kbdev->hwcnt.addr & 0xFFFFFFFF);
kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
- kbdev->hwcnt.addr >> 32, NULL);
+ kbdev->hwcnt.addr >> 32);
/* Start dumping */
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
kbdev->hwcnt.addr, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+ GPU_COMMAND_PRFCNT_SAMPLE);
dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
@@ -376,13 +370,20 @@
kbdev->hwcnt.backend.triggered = 1;
wake_up(&kbdev->hwcnt.backend.wait);
} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
- int ret;
- /* Always clean and invalidate the cache after a successful dump
- */
- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
- ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
- &kbdev->hwcnt.backend.cache_clean_work);
- KBASE_DEBUG_ASSERT(ret);
+ if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
+ /* All finished and idle */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+ kbdev->hwcnt.backend.triggered = 1;
+ wake_up(&kbdev->hwcnt.backend.wait);
+ } else {
+ int ret;
+ /* Always clean and invalidate the cache after a successful dump
+ */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+ ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+ &kbdev->hwcnt.backend.cache_clean_work);
+ KBASE_DEBUG_ASSERT(ret);
+ }
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
@@ -399,10 +400,9 @@
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
/* Disable interrupt */
spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
- NULL);
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
- irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+ irq_mask & ~CLEAN_CACHES_COMPLETED);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
/* Wakeup... */
@@ -460,7 +460,7 @@
/* Clear the counters */
KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_PRFCNT_CLEAR, kctx);
+ GPU_COMMAND_PRFCNT_CLEAR);
err = 0;
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
index 95bebf8..dd0279a 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,7 @@
return IRQ_NONE;
}
- val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
@@ -96,7 +96,7 @@
atomic_inc(&kbdev->faults_pending);
- val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
@@ -134,7 +134,7 @@
return IRQ_NONE;
}
- val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
@@ -239,7 +239,7 @@
return IRQ_NONE;
}
- val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
@@ -251,7 +251,7 @@
kbasep_irq_test_data.triggered = 1;
wake_up(&kbasep_irq_test_data.wait);
- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
return IRQ_HANDLED;
}
@@ -271,7 +271,7 @@
return IRQ_NONE;
}
- val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
@@ -283,7 +283,7 @@
kbasep_irq_test_data.triggered = 1;
wake_up(&kbasep_irq_test_data.wait);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val);
return IRQ_HANDLED;
}
@@ -327,9 +327,9 @@
}
/* store old mask */
- old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+ old_mask_val = kbase_reg_read(kbdev, mask_offset);
/* mask interrupts */
- kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+ kbase_reg_write(kbdev, mask_offset, 0x0);
if (kbdev->irqs[tag].irq) {
/* release original handler and install test handler */
@@ -343,8 +343,8 @@
kbasep_test_interrupt_timeout;
/* trigger interrupt */
- kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
- kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+ kbase_reg_write(kbdev, mask_offset, 0x1);
+ kbase_reg_write(kbdev, rawstat_offset, 0x1);
hrtimer_start(&kbasep_irq_test_data.timer,
HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
@@ -366,7 +366,7 @@
kbasep_irq_test_data.triggered = 0;
/* mask interrupts */
- kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+ kbase_reg_write(kbdev, mask_offset, 0x0);
/* release test handler */
free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
@@ -382,7 +382,7 @@
}
}
/* restore old mask */
- kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+ kbase_reg_write(kbdev, mask_offset, old_mask_val);
return err;
}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
index 573a1aa..fee19aa 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -37,7 +37,6 @@
#include <mali_kbase_ctx_sched.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
-#include <backend/gpu/mali_kbase_js_affinity.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
#define beenthere(kctx, f, a...) \
@@ -52,7 +51,54 @@
static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
struct kbase_context *kctx)
{
- return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+ return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
+}
+
+static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
+ base_jd_core_req core_req,
+ int js)
+{
+ u64 affinity;
+
+ if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+ BASE_JD_REQ_T) {
+ /* Tiler-only atom */
+ /* If the hardware supports XAFFINITY then we'll only enable
+ * the tiler (which is the default so this is a no-op),
+ * otherwise enable shader core 0.
+ */
+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+ affinity = 1;
+ else
+ affinity = 0;
+ } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+ struct mali_base_gpu_coherent_group_info *coherency_info =
+ &kbdev->gpu_props.props.coherency_info;
+
+ affinity = kbase_pm_ca_get_core_mask(kbdev) &
+ kbdev->pm.debug_core_mask[js];
+
+ /* JS2 on a dual core group system targets core group 1. All
+ * other cases target core group 0.
+ */
+ if (js == 2 && num_core_groups > 1)
+ affinity &= coherency_info->group[1].core_mask;
+ else
+ affinity &= coherency_info->group[0].core_mask;
+ } else {
+ /* Use all cores */
+ affinity = kbase_pm_ca_get_core_mask(kbdev) &
+ kbdev->pm.debug_core_mask[js];
+ }
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+ affinity & 0xFFFFFFFF);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+ affinity >> 32);
+
+ return affinity;
}
void kbase_job_hw_submit(struct kbase_device *kbdev,
@@ -62,6 +108,7 @@
struct kbase_context *kctx;
u32 cfg;
u64 jc_head = katom->jc;
+ u64 affinity;
KBASE_DEBUG_ASSERT(kbdev);
KBASE_DEBUG_ASSERT(katom);
@@ -70,20 +117,13 @@
/* Command register must be available */
KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
- /* Affinity is not violating */
- kbase_js_debug_log_current_affinities(kbdev);
- KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
- katom->affinity));
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
- jc_head & 0xFFFFFFFF, kctx);
+ jc_head & 0xFFFFFFFF);
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
- jc_head >> 32, kctx);
+ jc_head >> 32);
- kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
- katom->affinity & 0xFFFFFFFF, kctx);
- kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
- katom->affinity >> 32, kctx);
+ affinity = kbase_job_write_affinity(kbdev, katom->core_req, js);
/* start MMU, medium priority, cache clean/flush on end, clean/flush on
* start */
@@ -127,11 +167,11 @@
}
}
- kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
- katom->flush_id, kctx);
+ katom->flush_id);
/* Write an approximate start timestamp.
* It's approximate because there might be a job in the HEAD register.
@@ -139,11 +179,11 @@
katom->start_timestamp = ktime_get();
/* GO ! */
- dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
- katom, kctx, js, jc_head, katom->affinity);
+ dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx",
+ katom, kctx, js, jc_head);
KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
- (u32) katom->affinity);
+ (u32)affinity);
#if defined(CONFIG_MALI_GATOR_SUPPORT)
kbase_trace_mali_job_slots_event(
@@ -151,7 +191,7 @@
kctx, kbase_jd_atom_id(kctx, katom));
#endif
KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
- katom->affinity, cfg);
+ affinity, cfg);
KBASE_TLSTREAM_TL_RET_CTX_LPU(
kctx,
&kbdev->gpu_props.props.raw_props.js_features[
@@ -174,10 +214,8 @@
kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
}
#endif
- kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
-
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
- JS_COMMAND_START, katom->kctx);
+ JS_COMMAND_START);
}
/**
@@ -269,10 +307,9 @@
/* read out the job slot status code if the job
* slot reported failure */
completion_code = kbase_reg_read(kbdev,
- JOB_SLOT_REG(i, JS_STATUS), NULL);
+ JOB_SLOT_REG(i, JS_STATUS));
- switch (completion_code) {
- case BASE_JD_EVENT_STOPPED:
+ if (completion_code == BASE_JD_EVENT_STOPPED) {
#if defined(CONFIG_MALI_GATOR_SUPPORT)
kbase_trace_mali_job_slots_event(
GATOR_MAKE_EVENT(
@@ -287,37 +324,27 @@
* JS<n>_TAIL so that the job chain can
* be resumed */
job_tail = (u64)kbase_reg_read(kbdev,
- JOB_SLOT_REG(i, JS_TAIL_LO),
- NULL) |
+ JOB_SLOT_REG(i, JS_TAIL_LO)) |
((u64)kbase_reg_read(kbdev,
- JOB_SLOT_REG(i, JS_TAIL_HI),
- NULL) << 32);
- break;
- case BASE_JD_EVENT_NOT_STARTED:
+ JOB_SLOT_REG(i, JS_TAIL_HI))
+ << 32);
+ } else if (completion_code ==
+ BASE_JD_EVENT_NOT_STARTED) {
/* PRLAM-10673 can cause a TERMINATED
* job to come back as NOT_STARTED, but
* the error interrupt helps us detect
* it */
completion_code =
BASE_JD_EVENT_TERMINATED;
- /* fall through */
- default:
- dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
- i, completion_code,
- kbase_exception_name
- (kbdev,
- completion_code));
}
kbase_gpu_irq_evict(kbdev, i, completion_code);
}
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
- done & ((1 << i) | (1 << (i + 16))),
- NULL);
+ done & ((1 << i) | (1 << (i + 16))));
active = kbase_reg_read(kbdev,
- JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
- NULL);
+ JOB_CONTROL_REG(JOB_IRQ_JS_STATE));
if (((active >> i) & 1) == 0 &&
(((done >> (i + 16)) & 1) == 0)) {
@@ -362,7 +389,7 @@
* execution.
*/
u32 rawstat = kbase_reg_read(kbdev,
- JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
if ((rawstat >> (i + 16)) & 1) {
/* There is a failed job that we've
@@ -412,7 +439,7 @@
}
spurious:
done = kbase_reg_read(kbdev,
- JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
/* Workaround for missing interrupt caused by
@@ -420,7 +447,7 @@
if (((active >> i) & 1) && (0 ==
kbase_reg_read(kbdev,
JOB_SLOT_REG(i,
- JS_STATUS), NULL))) {
+ JS_STATUS)))) {
/* Force job slot to be processed again
*/
done |= (1u << i);
@@ -484,7 +511,6 @@
base_jd_core_req core_reqs,
struct kbase_jd_atom *target_katom)
{
- struct kbase_context *kctx = target_katom->kctx;
#if KBASE_TRACE_ENABLE
u32 status_reg_before;
u64 job_in_head_before;
@@ -494,12 +520,11 @@
/* Check the head pointer */
job_in_head_before = ((u64) kbase_reg_read(kbdev,
- JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+ JOB_SLOT_REG(js, JS_HEAD_LO)))
| (((u64) kbase_reg_read(kbdev,
- JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+ JOB_SLOT_REG(js, JS_HEAD_HI)))
<< 32);
- status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
- NULL);
+ status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
#endif
if (action == JS_COMMAND_SOFT_STOP) {
@@ -603,11 +628,10 @@
}
}
- kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action);
#if KBASE_TRACE_ENABLE
- status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
- NULL);
+ status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
struct kbase_jd_atom *head;
struct kbase_context *head_kctx;
@@ -812,7 +836,7 @@
mutex_lock(&kbdev->pm.lock);
if (kbdev->pm.backend.gpu_powered)
flush_id = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+ GPU_CONTROL_REG(LATEST_FLUSH));
mutex_unlock(&kbdev->pm.lock);
}
@@ -1071,34 +1095,32 @@
dev_err(kbdev->dev, "Register state:");
dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
- kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)));
dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
- kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
- kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL));
+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE)));
for (i = 0; i < 3; i++) {
dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x",
- i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
- NULL),
- i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
- NULL));
+ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)),
+ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO)));
}
dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
- kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
- kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
- kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
- kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
- kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
- kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)));
dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x",
- kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL),
- kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL));
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)));
}
static void kbasep_reset_timeout_worker(struct work_struct *data)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
index d71a9ed..831491e 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -166,4 +166,24 @@
*/
void kbase_gpu_cacheclean(struct kbase_device *kbdev);
+static inline bool kbase_atom_needs_tiler(struct kbase_device *kbdev,
+ base_jd_core_req core_req)
+{
+ return core_req & BASE_JD_REQ_T;
+}
+
+static inline bool kbase_atom_needs_shaders(struct kbase_device *kbdev,
+ base_jd_core_req core_req)
+{
+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
+ return true;
+ if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+ BASE_JD_REQ_T) {
+ /* Tiler only atom */
+ return false;
+ }
+
+ return true;
+}
+
#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
index 5cf1fe3..79777b7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -34,7 +34,6 @@
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <backend/gpu/mali_kbase_device_internal.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
-#include <backend/gpu/mali_kbase_js_affinity.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
/* Return whether the specified ringbuffer is empty. HW access lock must be
@@ -104,8 +103,6 @@
katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
- kbase_js_debug_log_current_affinities(kbdev);
-
return katom;
}
@@ -122,12 +119,6 @@
return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
}
-struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
- int js)
-{
- return kbase_gpu_inspect(kbdev, js, 0);
-}
-
struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
int js)
{
@@ -312,221 +303,58 @@
int js,
struct kbase_jd_atom *katom)
{
- /* The most recently checked affinity. Having this at this scope allows
- * us to guarantee that we've checked the affinity in this function
- * call.
+ base_jd_core_req core_req = katom->core_req;
+
+ /* NOTE: The following uses a number of FALLTHROUGHs to optimize the
+ * calls to this function. Ending of the function is indicated by BREAK
+ * OUT.
*/
- u64 recently_chosen_affinity = 0;
- bool chosen_affinity = false;
- bool retry;
+ switch (katom->coreref_state) {
+ /* State when job is first attempted to be run */
+ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+ /* Request the cores */
+ kbase_pm_request_cores(kbdev,
+ kbase_atom_needs_tiler(kbdev, core_req),
+ kbase_atom_needs_shaders(kbdev, core_req));
- do {
- retry = false;
+ /* Proceed to next state */
+ katom->coreref_state =
+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
- /* NOTE: The following uses a number of FALLTHROUGHs to optimize
- * the calls to this function. Ending of the function is
- * indicated by BREAK OUT */
- switch (katom->coreref_state) {
- /* State when job is first attempted to be run */
- case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
- KBASE_DEBUG_ASSERT(katom->affinity == 0);
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
- /* Compute affinity */
- if (false == kbase_js_choose_affinity(
- &recently_chosen_affinity, kbdev, katom,
- js)) {
- /* No cores are currently available */
+ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+ {
+ bool cores_ready;
+
+ cores_ready = kbase_pm_cores_requested(kbdev,
+ kbase_atom_needs_tiler(kbdev, core_req),
+ kbase_atom_needs_shaders(kbdev, core_req));
+
+ if (!cores_ready) {
+ /* Stay in this state and return, to retry at
+ * this state later.
+ */
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+ JS_CORE_REF_REGISTER_INUSE_FAILED,
+ katom->kctx, katom,
+ katom->jc, js,
+ (u32) 0);
/* *** BREAK OUT: No state transition *** */
break;
}
-
- chosen_affinity = true;
-
- /* Request the cores */
- kbase_pm_request_cores(kbdev,
- katom->core_req & BASE_JD_REQ_T,
- recently_chosen_affinity);
-
- katom->affinity = recently_chosen_affinity;
-
/* Proceed to next state */
- katom->coreref_state =
- KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
-
- /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
- case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
- {
- enum kbase_pm_cores_ready cores_ready;
-
- KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
- (katom->core_req & BASE_JD_REQ_T));
-
- cores_ready = kbase_pm_register_inuse_cores(
- kbdev,
- katom->core_req & BASE_JD_REQ_T,
- katom->affinity);
- if (cores_ready == KBASE_NEW_AFFINITY) {
- /* Affinity no longer valid - return to
- * previous state */
- kbasep_js_job_check_deref_cores(kbdev,
- katom);
- KBASE_TRACE_ADD_SLOT_INFO(kbdev,
- JS_CORE_REF_REGISTER_INUSE_FAILED,
- katom->kctx, katom,
- katom->jc, js,
- (u32) katom->affinity);
- /* *** BREAK OUT: Return to previous
- * state, retry *** */
- retry = true;
- break;
- }
- if (cores_ready == KBASE_CORES_NOT_READY) {
- /* Stay in this state and return, to
- * retry at this state later */
- KBASE_TRACE_ADD_SLOT_INFO(kbdev,
- JS_CORE_REF_REGISTER_INUSE_FAILED,
- katom->kctx, katom,
- katom->jc, js,
- (u32) katom->affinity);
- /* *** BREAK OUT: No state transition
- * *** */
- break;
- }
- /* Proceed to next state */
- katom->coreref_state =
- KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
- }
-
- /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
- case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
- KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
- (katom->core_req & BASE_JD_REQ_T));
-
- /* Optimize out choosing the affinity twice in the same
- * function call */
- if (chosen_affinity == false) {
- /* See if the affinity changed since a previous
- * call. */
- if (false == kbase_js_choose_affinity(
- &recently_chosen_affinity,
- kbdev, katom, js)) {
- /* No cores are currently available */
- kbasep_js_job_check_deref_cores(kbdev,
- katom);
- KBASE_TRACE_ADD_SLOT_INFO(kbdev,
- JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
- katom->kctx, katom,
- katom->jc, js,
- (u32) recently_chosen_affinity);
- /* *** BREAK OUT: Transition to lower
- * state *** */
- break;
- }
- chosen_affinity = true;
- }
-
- /* Now see if this requires a different set of cores */
- if (recently_chosen_affinity != katom->affinity) {
- enum kbase_pm_cores_ready cores_ready;
-
- kbase_pm_request_cores(kbdev,
- katom->core_req & BASE_JD_REQ_T,
- recently_chosen_affinity);
-
- /* Register new cores whilst we still hold the
- * old ones, to minimize power transitions */
- cores_ready =
- kbase_pm_register_inuse_cores(kbdev,
- katom->core_req & BASE_JD_REQ_T,
- recently_chosen_affinity);
- kbasep_js_job_check_deref_cores(kbdev, katom);
-
- /* Fixup the state that was reduced by
- * deref_cores: */
- katom->coreref_state =
- KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
- katom->affinity = recently_chosen_affinity;
- if (cores_ready == KBASE_NEW_AFFINITY) {
- /* Affinity no longer valid - return to
- * previous state */
- katom->coreref_state =
- KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
-
- kbasep_js_job_check_deref_cores(kbdev,
- katom);
-
- KBASE_TRACE_ADD_SLOT_INFO(kbdev,
- JS_CORE_REF_REGISTER_INUSE_FAILED,
- katom->kctx, katom,
- katom->jc, js,
- (u32) katom->affinity);
- /* *** BREAK OUT: Return to previous
- * state, retry *** */
- retry = true;
- break;
- }
- /* Now might be waiting for powerup again, with
- * a new affinity */
- if (cores_ready == KBASE_CORES_NOT_READY) {
- /* Return to previous state */
- katom->coreref_state =
- KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
- KBASE_TRACE_ADD_SLOT_INFO(kbdev,
- JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
- katom->kctx, katom,
- katom->jc, js,
- (u32) katom->affinity);
- /* *** BREAK OUT: Transition to lower
- * state *** */
- break;
- }
- }
- /* Proceed to next state */
- katom->coreref_state =
- KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
-
- /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
- case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
- KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
- (katom->core_req & BASE_JD_REQ_T));
- KBASE_DEBUG_ASSERT(katom->affinity ==
- recently_chosen_affinity);
-
- /* Note: this is where the caller must've taken the
- * hwaccess_lock */
-
- /* Check for affinity violations - if there are any,
- * then we just ask the caller to requeue and try again
- * later */
- if (kbase_js_affinity_would_violate(kbdev, js,
- katom->affinity) != false) {
- /* Return to previous state */
- katom->coreref_state =
- KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
- /* *** BREAK OUT: Transition to lower state ***
- */
- KBASE_TRACE_ADD_SLOT_INFO(kbdev,
- JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
- katom->kctx, katom, katom->jc, js,
- (u32) katom->affinity);
- break;
- }
-
- /* No affinity violations would result, so the cores are
- * ready */
katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
/* *** BREAK OUT: Cores Ready *** */
break;
-
- default:
- KBASE_DEBUG_ASSERT_MSG(false,
- "Unhandled kbase_atom_coreref_state %d",
- katom->coreref_state);
- break;
}
- } while (retry != false);
+
+ default:
+ KBASE_DEBUG_ASSERT_MSG(false,
+ "Unhandled kbase_atom_coreref_state %d",
+ katom->coreref_state);
+ break;
+ }
return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
}
@@ -534,6 +362,8 @@
static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
struct kbase_jd_atom *katom)
{
+ base_jd_core_req core_req = katom->core_req;
+
KBASE_DEBUG_ASSERT(kbdev != NULL);
KBASE_DEBUG_ASSERT(katom != NULL);
@@ -541,31 +371,18 @@
case KBASE_ATOM_COREREF_STATE_READY:
/* State where atom was submitted to the HW - just proceed to
* power-down */
- KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
- (katom->core_req & BASE_JD_REQ_T));
/* *** FALLTHROUGH *** */
- case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
- /* State where cores were registered */
- KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
- (katom->core_req & BASE_JD_REQ_T));
- kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
- katom->affinity);
-
- break;
-
case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
- /* State where cores were requested, but not registered */
- KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
- (katom->core_req & BASE_JD_REQ_T));
- kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
- katom->affinity);
+ /* State where cores were requested */
+ kbase_pm_release_cores(kbdev,
+ kbase_atom_needs_tiler(kbdev, core_req),
+ kbase_atom_needs_shaders(kbdev, core_req));
break;
case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
/* Initial state - nothing required */
- KBASE_DEBUG_ASSERT(katom->affinity == 0);
break;
default:
@@ -575,12 +392,11 @@
break;
}
- katom->affinity = 0;
katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
}
static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
- base_jd_core_req core_req, u64 affinity,
+ base_jd_core_req core_req,
enum kbase_atom_coreref_state coreref_state)
{
KBASE_DEBUG_ASSERT(kbdev != NULL);
@@ -589,31 +405,18 @@
case KBASE_ATOM_COREREF_STATE_READY:
/* State where atom was submitted to the HW - just proceed to
* power-down */
- KBASE_DEBUG_ASSERT(affinity != 0 ||
- (core_req & BASE_JD_REQ_T));
/* *** FALLTHROUGH *** */
- case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
- /* State where cores were registered */
- KBASE_DEBUG_ASSERT(affinity != 0 ||
- (core_req & BASE_JD_REQ_T));
- kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T,
- affinity);
-
- break;
-
case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
- /* State where cores were requested, but not registered */
- KBASE_DEBUG_ASSERT(affinity != 0 ||
- (core_req & BASE_JD_REQ_T));
- kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T,
- affinity);
+ /* State where cores were requested */
+ kbase_pm_release_cores(kbdev,
+ kbase_atom_needs_tiler(kbdev, core_req),
+ kbase_atom_needs_shaders(kbdev, core_req));
break;
case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
/* Initial state - nothing required */
- KBASE_DEBUG_ASSERT(affinity == 0);
break;
default:
@@ -659,8 +462,6 @@
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
- kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
- katom->affinity);
/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
@@ -681,9 +482,6 @@
(katom->protected_state.enter ==
KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
kbase_vinstr_resume(kbdev->vinstr_ctx);
-
- /* Go back to configured model for IPA */
- kbase_ipa_model_use_configured_locked(kbdev);
}
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
@@ -808,11 +606,13 @@
err = kbdev->protected_ops->protected_mode_enable(
kbdev->protected_dev);
- if (err)
+ if (err) {
dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n",
err);
- else
+ } else {
kbdev->protected_mode = true;
+ kbase_ipa_protection_mode_switch_event(kbdev);
+ }
}
return err;
@@ -869,11 +669,6 @@
kbase_jm_return_atom_to_js(kbdev, katom[idx]);
}
- /*
- * Go back to configured model for IPA
- */
- kbase_ipa_model_use_configured_locked(kbdev);
-
return -EINVAL;
}
@@ -922,9 +717,6 @@
return -EAGAIN;
}
- /* Use generic model for IPA in protected mode */
- kbase_ipa_model_use_fallback_locked(kbdev);
-
/* Once reaching this point GPU must be
* switched to protected mode or vinstr
* re-enabled. */
@@ -1090,9 +882,6 @@
kbase_vinstr_resume(kbdev->vinstr_ctx);
- /* Use generic model for IPA in protected mode */
- kbase_ipa_model_use_fallback_locked(kbdev);
-
return -EINVAL;
}
@@ -1239,8 +1028,6 @@
if (!cores_ready)
break;
- kbase_js_affinity_retain_slot_cores(kbdev, js,
- katom[idx]->affinity);
katom[idx]->gpu_rb_state =
KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
@@ -1355,26 +1142,30 @@
if (next_katom && katom->kctx == next_katom->kctx &&
next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
- HAS_DEP(next_katom) &&
- (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+ (HAS_DEP(next_katom) || next_katom->sched_priority ==
+ katom->sched_priority) &&
+ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO))
!= 0 ||
- kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI))
!= 0)) {
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
- JS_COMMAND_NOP, NULL);
+ JS_COMMAND_NOP);
next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
if (completion_code == BASE_JD_EVENT_STOPPED) {
- KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(next_katom,
&kbdev->gpu_props.props.raw_props.js_features
- [katom->slot_nr]);
- KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as
- [katom->kctx->as_nr]);
- KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx,
+ [next_katom->slot_nr]);
+ KBASE_TLSTREAM_TL_NRET_ATOM_AS(next_katom, &kbdev->as
+ [next_katom->kctx->as_nr]);
+ KBASE_TLSTREAM_TL_NRET_CTX_LPU(next_katom->kctx,
&kbdev->gpu_props.props.raw_props.js_features
- [katom->slot_nr]);
+ [next_katom->slot_nr]);
}
+ if (next_katom->core_req & BASE_JD_REQ_PERMON)
+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
return true;
}
@@ -1412,26 +1203,24 @@
* flushed. To prevent future evictions causing possible memory
* corruption we need to flush the cache manually before any
* affected memory gets reused. */
- katom->need_cache_flush_cores_retained = katom->affinity;
- kbase_pm_request_cores(kbdev, false, katom->affinity);
+ katom->need_cache_flush_cores_retained = true;
+ kbase_pm_request_cores(kbdev,
+ kbase_atom_needs_tiler(kbdev, katom->core_req),
+ kbase_atom_needs_shaders(kbdev,
+ katom->core_req));
} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
if (kbdev->gpu_props.num_core_groups > 1 &&
- !(katom->affinity &
- kbdev->gpu_props.props.coherency_info.group[0].core_mask
- ) &&
- (katom->affinity &
- kbdev->gpu_props.props.coherency_info.group[1].core_mask
- )) {
+ katom->device_nr >= 1) {
dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
- katom->need_cache_flush_cores_retained =
- katom->affinity;
- kbase_pm_request_cores(kbdev, false,
- katom->affinity);
+ katom->need_cache_flush_cores_retained = true;
+ kbase_pm_request_cores(kbdev,
+ kbase_atom_needs_tiler(kbdev, katom->core_req),
+ kbase_atom_needs_shaders(kbdev,
+ katom->core_req));
}
}
katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
- kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
if (completion_code == BASE_JD_EVENT_STOPPED) {
struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
@@ -1446,6 +1235,8 @@
if (next_katom && katom->kctx == next_katom->kctx &&
next_katom->sched_priority ==
katom->sched_priority) {
+ WARN_ON(next_katom->gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_SUBMITTED);
kbase_gpu_dequeue_atom(kbdev, js, end_timestamp);
kbase_jm_return_atom_to_js(kbdev, next_katom);
}
@@ -1453,6 +1244,13 @@
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
int i;
+ if (!kbase_ctx_flag(katom->kctx, KCTX_DYING))
+ dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+ js, completion_code,
+ kbase_exception_name
+ (kbdev,
+ completion_code));
+
#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
KBASE_TRACE_DUMP(kbdev);
#endif
@@ -1526,10 +1324,6 @@
if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
katom->event_code = (base_jd_event_code)completion_code;
- kbase_device_trace_register_access(kctx, REG_WRITE,
- JOB_CONTROL_REG(JOB_IRQ_CLEAR),
- 1 << js);
-
/* Complete the job, and start new ones
*
* Also defer remaining work onto the workqueue:
@@ -1641,7 +1435,6 @@
if (keep_in_jm_rb) {
kbasep_js_job_check_deref_cores(kbdev, katom);
katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
- katom->affinity = 0;
katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
/* As the atom was not removed, increment the
* index so that we read the correct atom in the
@@ -1704,12 +1497,6 @@
return -1;
}
-static void kbase_job_evicted(struct kbase_jd_atom *katom)
-{
- kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
- katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
-}
-
bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
struct kbase_context *kctx,
int js,
@@ -1785,7 +1572,7 @@
/* katom_idx0 and katom_idx1 are on GPU */
if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
- JS_COMMAND_NEXT), NULL) == 0) {
+ JS_COMMAND_NEXT)) == 0) {
/* idx0 has already completed - stop
* idx1 if needed*/
if (katom_idx1_valid) {
@@ -1800,19 +1587,18 @@
kbase_reg_write(kbdev,
JOB_SLOT_REG(js,
JS_COMMAND_NEXT),
- JS_COMMAND_NOP, NULL);
+ JS_COMMAND_NOP);
if (kbase_reg_read(kbdev,
JOB_SLOT_REG(js,
- JS_HEAD_NEXT_LO), NULL)
+ JS_HEAD_NEXT_LO))
!= 0 ||
kbase_reg_read(kbdev,
JOB_SLOT_REG(js,
- JS_HEAD_NEXT_HI), NULL)
+ JS_HEAD_NEXT_HI))
!= 0) {
/* idx1 removed successfully,
* will be handled in IRQ */
- kbase_job_evicted(katom_idx1);
kbase_gpu_remove_atom(kbdev,
katom_idx1,
action, true);
@@ -1866,7 +1652,7 @@
} else {
/* idx1 is on GPU */
if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
- JS_COMMAND_NEXT), NULL) == 0) {
+ JS_COMMAND_NEXT)) == 0) {
/* idx0 has already completed - stop idx1 */
kbase_gpu_stop_atom(kbdev, js, katom_idx1,
action);
@@ -1876,15 +1662,14 @@
* remove */
kbase_reg_write(kbdev, JOB_SLOT_REG(js,
JS_COMMAND_NEXT),
- JS_COMMAND_NOP, NULL);
+ JS_COMMAND_NOP);
if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
- JS_HEAD_NEXT_LO), NULL) != 0 ||
+ JS_HEAD_NEXT_LO)) != 0 ||
kbase_reg_read(kbdev, JOB_SLOT_REG(js,
- JS_HEAD_NEXT_HI), NULL) != 0) {
+ JS_HEAD_NEXT_HI)) != 0) {
/* idx1 removed successfully, will be
* handled in IRQ once idx0 completes */
- kbase_job_evicted(katom_idx1);
kbase_gpu_remove_atom(kbdev, katom_idx1,
action,
false);
@@ -1924,11 +1709,11 @@
/* clean & invalidate the caches */
KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+ GPU_COMMAND_CLEAN_INV_CACHES);
/* wait for cache flush to complete before continuing */
while (--max_loops &&
- (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+ (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
CLEAN_CACHES_COMPLETED) == 0)
;
@@ -1936,7 +1721,7 @@
KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
CLEAN_CACHES_COMPLETED);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
- CLEAN_CACHES_COMPLETED, NULL);
+ CLEAN_CACHES_COMPLETED);
KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
KBASE_INSTR_STATE_CLEANING,
"Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
@@ -1953,10 +1738,12 @@
kbase_gpu_cacheclean(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_pm_unrequest_cores(kbdev, false,
- katom->need_cache_flush_cores_retained);
+ kbase_pm_release_cores(kbdev,
+ kbase_atom_needs_tiler(kbdev, katom->core_req),
+ kbase_atom_needs_shaders(kbdev,
+ katom->core_req));
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- katom->need_cache_flush_cores_retained = 0;
+ katom->need_cache_flush_cores_retained = false;
}
}
@@ -1992,18 +1779,16 @@
* this is not done, then if the atom is re-scheduled (following a soft
* stop) then the core reference would not be retaken. */
katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
- katom->affinity = 0;
}
void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
- base_jd_core_req core_req, u64 affinity,
+ base_jd_core_req core_req,
enum kbase_atom_coreref_state coreref_state)
{
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity,
- coreref_state);
+ kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, coreref_state);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (!kbdev->pm.active_count) {
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
deleted file mode 100644
index c937eca..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-/*
- * Base kernel affinity manager APIs
- */
-
-#include <mali_kbase.h>
-#include "mali_kbase_js_affinity.h"
-#include "mali_kbase_hw.h"
-
-#include <backend/gpu/mali_kbase_pm_internal.h>
-
-
-bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
- int js)
-{
- /*
- * Here are the reasons for using job slot 2:
- * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
- * - In absence of the above, then:
- * - Atoms with BASE_JD_REQ_COHERENT_GROUP
- * - But, only when there aren't contexts with
- * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
- * all cores on slot 1 could be blocked by those using a coherent group
- * on slot 2
- * - And, only when you actually have 2 or more coregroups - if you
- * only have 1 coregroup, then having jobs for slot 2 implies they'd
- * also be for slot 1, meaning you'll get interference from them. Jobs
- * able to run on slot 2 could also block jobs that can only run on
- * slot 1 (tiler jobs)
- */
- if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
- return true;
-
- if (js != 2)
- return true;
-
- /* Only deal with js==2 now: */
- if (kbdev->gpu_props.num_core_groups > 1) {
- /* Only use slot 2 in the 2+ coregroup case */
- if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
- KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
- false) {
- /* ...But only when we *don't* have atoms that run on
- * all cores */
-
- /* No specific check for BASE_JD_REQ_COHERENT_GROUP
- * atoms - the policy will sort that out */
- return true;
- }
- }
-
- /* Above checks failed mean we shouldn't use slot 2 */
- return false;
-}
-
-/*
- * As long as it has been decided to have a deeper modification of
- * what job scheduler, power manager and affinity manager will
- * implement, this function is just an intermediate step that
- * assumes:
- * - all working cores will be powered on when this is called.
- * - largest current configuration is 2 core groups.
- * - It has been decided not to have hardcoded values so the low
- * and high cores in a core split will be evently distributed.
- * - Odd combinations of core requirements have been filtered out
- * and do not get to this function (e.g. CS+T+NSS is not
- * supported here).
- * - This function is frequently called and can be optimized,
- * (see notes in loops), but as the functionallity will likely
- * be modified, optimization has not been addressed.
-*/
-bool kbase_js_choose_affinity(u64 * const affinity,
- struct kbase_device *kbdev,
- struct kbase_jd_atom *katom, int js)
-{
- base_jd_core_req core_req = katom->core_req;
- unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
- u64 core_availability_mask;
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
-
- /*
- * If no cores are currently available (core availability policy is
- * transitioning) then fail.
- */
- if (0 == core_availability_mask) {
- *affinity = 0;
- return false;
- }
-
- KBASE_DEBUG_ASSERT(js >= 0);
-
- if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
- BASE_JD_REQ_T) {
- /* If the hardware supports XAFFINITY then we'll only enable
- * the tiler (which is the default so this is a no-op),
- * otherwise enable shader core 0. */
- if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
- *affinity = 1;
- else
- *affinity = 0;
-
- return true;
- }
-
- if (1 == kbdev->gpu_props.num_cores) {
- /* trivial case only one core, nothing to do */
- *affinity = core_availability_mask &
- kbdev->pm.debug_core_mask[js];
- } else {
- if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
- BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
- if (js == 0 || num_core_groups == 1) {
- /* js[0] and single-core-group systems just get
- * the first core group */
- *affinity =
- kbdev->gpu_props.props.coherency_info.group[0].core_mask
- & core_availability_mask &
- kbdev->pm.debug_core_mask[js];
- } else {
- /* js[1], js[2] use core groups 0, 1 for
- * dual-core-group systems */
- u32 core_group_idx = ((u32) js) - 1;
-
- KBASE_DEBUG_ASSERT(core_group_idx <
- num_core_groups);
- *affinity =
- kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
- & core_availability_mask &
- kbdev->pm.debug_core_mask[js];
-
- /* If the job is specifically targeting core
- * group 1 and the core availability policy is
- * keeping that core group off, then fail */
- if (*affinity == 0 && core_group_idx == 1 &&
- kbdev->pm.backend.cg1_disabled
- == true)
- katom->event_code =
- BASE_JD_EVENT_PM_EVENT;
- }
- } else {
- /* All cores are available when no core split is
- * required */
- *affinity = core_availability_mask &
- kbdev->pm.debug_core_mask[js];
- }
- }
-
- /*
- * If no cores are currently available in the desired core group(s)
- * (core availability policy is transitioning) then fail.
- */
- if (*affinity == 0)
- return false;
-
- /* Enable core 0 if tiler required for hardware without XAFFINITY
- * support (notes above) */
- if (core_req & BASE_JD_REQ_T) {
- if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
- *affinity = *affinity | 1;
- }
-
- return true;
-}
-
-static inline bool kbase_js_affinity_is_violating(
- struct kbase_device *kbdev,
- u64 *affinities)
-{
- /* This implementation checks whether the two slots involved in Generic
- * thread creation have intersecting affinity. This is due to micro-
- * architectural issues where a job in slot A targetting cores used by
- * slot B could prevent the job in slot B from making progress until the
- * job in slot A has completed.
- */
- u64 affinity_set_left;
- u64 affinity_set_right;
- u64 intersection;
-
- KBASE_DEBUG_ASSERT(affinities != NULL);
-
- affinity_set_left = affinities[1];
-
- affinity_set_right = affinities[2];
-
- /* A violation occurs when any bit in the left_set is also in the
- * right_set */
- intersection = affinity_set_left & affinity_set_right;
-
- return (bool) (intersection != (u64) 0u);
-}
-
-bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
- u64 affinity)
-{
- struct kbasep_js_device_data *js_devdata;
- u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
- js_devdata = &kbdev->js_data;
-
- memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
- sizeof(js_devdata->runpool_irq.slot_affinities));
-
- new_affinities[js] |= affinity;
-
- return kbase_js_affinity_is_violating(kbdev, new_affinities);
-}
-
-void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
- u64 affinity)
-{
- struct kbasep_js_device_data *js_devdata;
- u64 cores;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
- js_devdata = &kbdev->js_data;
-
- KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
- == false);
-
- cores = affinity;
- while (cores) {
- int bitnum = fls64(cores) - 1;
- u64 bit = 1ULL << bitnum;
- s8 cnt;
-
- cnt =
- ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
-
- if (cnt == 1)
- js_devdata->runpool_irq.slot_affinities[js] |= bit;
-
- cores &= ~bit;
- }
-}
-
-void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
- u64 affinity)
-{
- struct kbasep_js_device_data *js_devdata;
- u64 cores;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
- js_devdata = &kbdev->js_data;
-
- cores = affinity;
- while (cores) {
- int bitnum = fls64(cores) - 1;
- u64 bit = 1ULL << bitnum;
- s8 cnt;
-
- KBASE_DEBUG_ASSERT(
- js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
-
- cnt =
- --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
-
- if (0 == cnt)
- js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
-
- cores &= ~bit;
- }
-}
-
-#if KBASE_TRACE_ENABLE
-void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
-{
- struct kbasep_js_device_data *js_devdata;
- int slot_nr;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- js_devdata = &kbdev->js_data;
-
- for (slot_nr = 0; slot_nr < 3; ++slot_nr)
- KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
- NULL, 0u, slot_nr,
- (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
-}
-#endif /* KBASE_TRACE_ENABLE */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
deleted file mode 100644
index dbabd94..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-/*
- * Affinity Manager internal APIs.
- */
-
-#ifndef _KBASE_JS_AFFINITY_H_
-#define _KBASE_JS_AFFINITY_H_
-
-/**
- * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to
- * submit a job to a particular job slot in the current status
- *
- * @kbdev: The kbase device structure of the device
- * @js: Job slot number to check for allowance
- *
- * Will check if submitting to the given job slot is allowed in the current
- * status. For example using job slot 2 while in soft-stoppable state and only
- * having 1 coregroup is not allowed by the policy. This function should be
- * called prior to submitting a job to a slot to make sure policy rules are not
- * violated.
- *
- * The following locking conditions are made on the caller
- * - it must hold hwaccess_lock
- */
-bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
-
-/**
- * kbase_js_choose_affinity - Compute affinity for a given job.
- *
- * @affinity: Affinity bitmap computed
- * @kbdev: The kbase device structure of the device
- * @katom: Job chain of which affinity is going to be found
- * @js: Slot the job chain is being submitted
- *
- * Currently assumes an all-on/all-off power management policy.
- * Also assumes there is at least one core with tiler available.
- *
- * Returns true if a valid affinity was chosen, false if
- * no cores were available.
- */
-bool kbase_js_choose_affinity(u64 * const affinity,
- struct kbase_device *kbdev,
- struct kbase_jd_atom *katom,
- int js);
-
-/**
- * kbase_js_affinity_would_violate - Determine whether a proposed affinity on
- * job slot @js would cause a violation of affinity restrictions.
- *
- * @kbdev: Kbase device structure
- * @js: The job slot to test
- * @affinity: The affinity mask to test
- *
- * The following locks must be held by the caller
- * - hwaccess_lock
- *
- * Return: true if the affinity would violate the restrictions
- */
-bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
- u64 affinity);
-
-/**
- * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by
- * a slot
- *
- * @kbdev: Kbase device structure
- * @js: The job slot retaining the cores
- * @affinity: The cores to retain
- *
- * The following locks must be held by the caller
- * - hwaccess_lock
- */
-void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
- u64 affinity);
-
-/**
- * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used
- * by a slot
- *
- * @kbdev: Kbase device structure
- * @js: Job slot
- * @affinity: Bit mask of core to be released
- *
- * Cores must be released as soon as a job is dequeued from a slot's 'submit
- * slots', and before another job is submitted to those slots. Otherwise, the
- * refcount could exceed the maximum number submittable to a slot,
- * %BASE_JM_SUBMIT_SLOTS.
- *
- * The following locks must be held by the caller
- * - hwaccess_lock
- */
-void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
- u64 affinity);
-
-/**
- * kbase_js_debug_log_current_affinities - log the current affinities
- *
- * @kbdev: Kbase device structure
- *
- * Output to the Trace log the current tracked affinities on all slots
- */
-#if KBASE_TRACE_ENABLE
-void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
-#else /* KBASE_TRACE_ENABLE */
-static inline void
-kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
-{
-}
-#endif /* KBASE_TRACE_ENABLE */
-
-#endif /* _KBASE_JS_AFFINITY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
index 9cd2982..3e9af77 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -66,15 +66,15 @@
}
static int wait_ready(struct kbase_device *kbdev,
- unsigned int as_nr, struct kbase_context *kctx)
+ unsigned int as_nr)
{
unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
- u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+ u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
/* Wait for the MMU status to indicate there is no active command, in
* case one is pending. Do not log remaining register accesses. */
while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
- val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL);
+ val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
if (max_loops == 0) {
dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
@@ -83,27 +83,24 @@
/* If waiting in loop was performed, log last read value. */
if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops)
- kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx);
+ kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
return 0;
}
-static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
- struct kbase_context *kctx)
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
{
int status;
/* write AS_COMMAND when MMU is ready to accept another command */
- status = wait_ready(kbdev, as_nr, kctx);
+ status = wait_ready(kbdev, as_nr);
if (status == 0)
- kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
- kctx);
+ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
return status;
}
-static void validate_protected_page_fault(struct kbase_device *kbdev,
- struct kbase_context *kctx)
+static void validate_protected_page_fault(struct kbase_device *kbdev)
{
/* GPUs which support (native) protected mode shall not report page
* fault addresses unless it has protected debug mode and protected
@@ -115,8 +112,7 @@
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
protected_debug_mode = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(GPU_STATUS),
- kctx) & GPU_DBGEN;
+ GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN;
}
if (!protected_debug_mode) {
@@ -145,9 +141,9 @@
/* remember current mask */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
- new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
/* mask interrupts for now */
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
while (bf_bits | pf_bits) {
@@ -170,24 +166,21 @@
*/
kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
-
/* find faulting address */
as->fault_addr = kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
- AS_FAULTADDRESS_HI),
- kctx);
+ AS_FAULTADDRESS_HI));
as->fault_addr <<= 32;
as->fault_addr |= kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
- AS_FAULTADDRESS_LO),
- kctx);
+ AS_FAULTADDRESS_LO));
/* Mark the fault protected or not */
as->protected_mode = kbdev->protected_mode;
if (kbdev->protected_mode && as->fault_addr) {
/* check if address reporting is allowed */
- validate_protected_page_fault(kbdev, kctx);
+ validate_protected_page_fault(kbdev);
}
/* report the fault to debugfs */
@@ -196,8 +189,7 @@
/* record the fault status */
as->fault_status = kbase_reg_read(kbdev,
MMU_AS_REG(as_no,
- AS_FAULTSTATUS),
- kctx);
+ AS_FAULTSTATUS));
/* find the fault type */
as->fault_type = (bf_bits & (1 << as_no)) ?
@@ -206,12 +198,10 @@
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
as->fault_extra_addr = kbase_reg_read(kbdev,
- MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
- kctx);
+ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
as->fault_extra_addr <<= 32;
as->fault_extra_addr |= kbase_reg_read(kbdev,
- MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
- kctx);
+ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
}
if (kbase_as_has_bus_fault(as)) {
@@ -240,14 +230,13 @@
/* reenable interrupts */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
- tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
new_mask |= tmp;
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}
-void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
- struct kbase_context *kctx)
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
{
struct kbase_mmu_setup *current_setup = &as->current_setup;
u32 transcfg = 0;
@@ -270,35 +259,34 @@
}
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
- transcfg, kctx);
+ transcfg);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
- (current_setup->transcfg >> 32) & 0xFFFFFFFFUL,
- kctx);
+ (current_setup->transcfg >> 32) & 0xFFFFFFFFUL);
} else {
if (kbdev->system_coherency == COHERENCY_ACE)
current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
}
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
- current_setup->transtab & 0xFFFFFFFFUL, kctx);
+ current_setup->transtab & 0xFFFFFFFFUL);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
- (current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+ (current_setup->transtab >> 32) & 0xFFFFFFFFUL);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
- current_setup->memattr & 0xFFFFFFFFUL, kctx);
+ current_setup->memattr & 0xFFFFFFFFUL);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
- (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+ (current_setup->memattr >> 32) & 0xFFFFFFFFUL);
KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
current_setup->transtab,
current_setup->memattr,
transcfg);
- write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+ write_cmd(kbdev, as->number, AS_COMMAND_UPDATE);
}
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
- struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+ u64 vpfn, u32 nr, u32 op,
unsigned int handling_irq)
{
int ret;
@@ -307,22 +295,22 @@
if (op == AS_COMMAND_UNLOCK) {
/* Unlock doesn't require a lock first */
- ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
} else {
u64 lock_addr = lock_region(kbdev, vpfn, nr);
/* Lock the region that needs to be updated */
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
- lock_addr & 0xFFFFFFFFUL, kctx);
+ lock_addr & 0xFFFFFFFFUL);
kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
- (lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
- write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+ (lock_addr >> 32) & 0xFFFFFFFFUL);
+ write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
/* Run the MMU operation */
- write_cmd(kbdev, as->number, op, kctx);
+ write_cmd(kbdev, as->number, op);
/* Wait for the flush to complete */
- ret = wait_ready(kbdev, as->number, kctx);
+ ret = wait_ready(kbdev, as->number);
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
/* Issue an UNLOCK command to ensure that valid page
@@ -339,8 +327,8 @@
commands in order to flush the MMU/uTLB,
see PRLAM-8812.
*/
- write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
- write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+ write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
+ write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
}
}
@@ -348,7 +336,7 @@
}
void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
- struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+ enum kbase_mmu_fault_type type)
{
unsigned long flags;
u32 pf_bf_mask;
@@ -368,14 +356,14 @@
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
pf_bf_mask |= MMU_BUS_ERROR(as->number);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask);
unlock:
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}
void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
- struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+ enum kbase_mmu_fault_type type)
{
unsigned long flags;
u32 irq_mask;
@@ -391,14 +379,14 @@
if (kbdev->irq_reset_flush)
goto unlock;
- irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+ irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) |
MMU_PAGE_FAULT(as->number);
if (type == KBASE_MMU_FAULT_TYPE_BUS ||
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
irq_mask |= MMU_BUS_ERROR(as->number);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask);
unlock:
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
index 2ed7dfd..51a10a2 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,9 +29,9 @@
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
-static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+static bool always_on_shaders_needed(struct kbase_device *kbdev)
{
- return kbdev->gpu_props.props.raw_props.shader_present;
+ return true;
}
static bool always_on_get_core_active(struct kbase_device *kbdev)
@@ -59,7 +59,7 @@
"always_on", /* name */
always_on_init, /* init */
always_on_term, /* term */
- always_on_get_core_mask, /* get_core_mask */
+ always_on_shaders_needed, /* shaders_needed */
always_on_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
index d61d0d0..e7927cf 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -1,7 +1,6 @@
-
/*
*
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -37,13 +36,13 @@
*
* - When KBase indicates that the GPU will be powered up, but we don't yet
* know which Job Chains are to be run:
- * All Shader Cores are powered up, regardless of whether or not they will
- * be needed later.
+ * Shader Cores are powered up, regardless of whether or not they will be
+ * needed later.
*
- * - When KBase indicates that a set of Shader Cores are needed to submit the
- * currently queued Job Chains:
- * All Shader Cores are kept powered, regardless of whether or not they will
- * be needed
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ * queued Job Chains:
+ * Shader Cores are kept powered, regardless of whether or not they will be
+ * needed
*
* - When KBase indicates that the GPU need not be powered:
* The Shader Cores are kept powered, regardless of whether or not they will
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
index 6069c0f..a448a3b 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -179,11 +179,7 @@
kbase_pm_clock_on(kbdev, is_resume);
/* Update core status as required by the policy */
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
- SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
kbase_pm_update_cores_state(kbdev);
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
- SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
/* NOTE: We don't wait to reach the desired state, since running atoms
* will wait for that state to be reached anyway */
@@ -201,11 +197,7 @@
#if !PLATFORM_POWER_DOWN_ONLY
/* Wait for power transitions to complete. We do this with no locks held
* so that we don't deadlock with any pending workqueues */
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
- SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
kbase_pm_check_transitions_sync(kbdev);
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
- SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
#endif /* !PLATFORM_POWER_DOWN_ONLY */
mutex_lock(&js_devdata->runpool_mutex);
@@ -233,10 +225,6 @@
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#endif /* !PLATFORM_POWER_DOWN_ONLY */
- /* Consume any change-state events */
- kbase_timeline_pm_check_handle_event(kbdev,
- KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
-
/* Disable interrupts and turn the clock off */
if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
/*
@@ -425,21 +413,12 @@
bool cores_are_available;
unsigned long flags;
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
- SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
- SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
- if (cores_are_available) {
- /* Log timelining information that a change in state has
- * completed */
- kbase_timeline_pm_handle_event(kbdev,
- KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
-
+ if (cores_are_available)
kbase_backend_slot_update(kbdev);
- }
+
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
index 5b369fb..d4e8e42 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,145 +28,65 @@
#include <mali_kbase_pm.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
-static const struct kbase_pm_ca_policy *const policy_list[] = {
- &kbase_pm_ca_fixed_policy_ops,
-#ifdef CONFIG_MALI_DEVFREQ
- &kbase_pm_ca_devfreq_policy_ops,
-#endif
-#if !MALI_CUSTOMER_RELEASE
- &kbase_pm_ca_random_policy_ops
-#endif
-};
-
-/**
- * POLICY_COUNT - The number of policies available in the system.
- *
- * This is derived from the number of functions listed in policy_list.
- */
-#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
-
int kbase_pm_ca_init(struct kbase_device *kbdev)
{
- KBASE_DEBUG_ASSERT(kbdev != NULL);
-
- kbdev->pm.backend.ca_current_policy = policy_list[0];
-
- kbdev->pm.backend.ca_current_policy->init(kbdev);
+ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+#ifdef CONFIG_MALI_DEVFREQ
+ if (kbdev->current_core_mask)
+ pm_backend->ca_cores_enabled = kbdev->current_core_mask;
+ else
+ pm_backend->ca_cores_enabled =
+ kbdev->gpu_props.props.raw_props.shader_present;
+#endif
+ pm_backend->ca_in_transition = false;
return 0;
}
void kbase_pm_ca_term(struct kbase_device *kbdev)
{
- kbdev->pm.backend.ca_current_policy->term(kbdev);
}
-int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+#ifdef CONFIG_MALI_DEVFREQ
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
{
- if (!list)
- return POLICY_COUNT;
-
- *list = policy_list;
-
- return POLICY_COUNT;
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
-
-const struct kbase_pm_ca_policy
-*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
-{
- KBASE_DEBUG_ASSERT(kbdev != NULL);
-
- return kbdev->pm.backend.ca_current_policy;
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
-
-void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
- const struct kbase_pm_ca_policy *new_policy)
-{
- const struct kbase_pm_ca_policy *old_policy;
+ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
unsigned long flags;
- KBASE_DEBUG_ASSERT(kbdev != NULL);
- KBASE_DEBUG_ASSERT(new_policy != NULL);
-
- KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
- new_policy->id);
-
- /* During a policy change we pretend the GPU is active */
- /* A suspend won't happen here, because we're in a syscall from a
- * userspace thread */
- kbase_pm_context_active(kbdev);
-
- mutex_lock(&kbdev->pm.lock);
-
- /* Remove the policy to prevent IRQ handlers from working on it */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- old_policy = kbdev->pm.backend.ca_current_policy;
- kbdev->pm.backend.ca_current_policy = NULL;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- if (old_policy->term)
- old_policy->term(kbdev);
+ pm_backend->ca_cores_enabled = core_mask;
- if (new_policy->init)
- new_policy->init(kbdev);
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->pm.backend.ca_current_policy = new_policy;
-
- /* If any core power state changes were previously attempted, but
- * couldn't be made because the policy was changing (current_policy was
- * NULL), then re-try them here. */
kbase_pm_update_cores_state_nolock(kbdev);
- kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
- kbdev->shader_ready_bitmap,
- kbdev->shader_transitioning_bitmap);
-
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- mutex_unlock(&kbdev->pm.lock);
-
- /* Now the policy change is finished, we release our fake context active
- * reference */
- kbase_pm_context_idle(kbdev);
+ dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
+ pm_backend->ca_cores_enabled);
}
-
-KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+#endif
u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
{
+ struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+
lockdep_assert_held(&kbdev->hwaccess_lock);
/* All cores must be enabled when instrumentation is in use */
- if (kbdev->pm.backend.instr_enabled)
+ if (pm_backend->instr_enabled)
return kbdev->gpu_props.props.raw_props.shader_present &
kbdev->pm.debug_core_mask_all;
- if (kbdev->pm.backend.ca_current_policy == NULL)
- return kbdev->gpu_props.props.raw_props.shader_present &
- kbdev->pm.debug_core_mask_all;
-
- return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
- kbdev->pm.debug_core_mask_all;
+#ifdef CONFIG_MALI_DEVFREQ
+ return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all;
+#else
+ return kbdev->gpu_props.props.raw_props.shader_present &
+ kbdev->pm.debug_core_mask_all;
+#endif
}
KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
-void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
- u64 cores_transitioning)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- if (kbdev->pm.backend.ca_current_policy != NULL)
- kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
- cores_ready,
- cores_transitioning);
-}
-
void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
{
unsigned long flags;
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
deleted file mode 100644
index 4bb4c40..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-/*
- * A core availability policy implementing core mask selection from devfreq OPPs
- *
- */
-
-#include <mali_kbase.h>
-#include <mali_kbase_pm.h>
-#include <backend/gpu/mali_kbase_pm_internal.h>
-#include <linux/version.h>
-
-void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
-{
- struct kbasep_pm_ca_policy_devfreq *data =
- &kbdev->pm.backend.ca_policy_data.devfreq;
- unsigned long flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
- data->cores_desired = core_mask;
-
- /* Disable any cores that are now unwanted */
- data->cores_enabled &= data->cores_desired;
-
- kbdev->pm.backend.ca_in_transition = true;
-
- /* If there are no cores to be powered off then power on desired cores
- */
- if (!(data->cores_used & ~data->cores_desired)) {
- data->cores_enabled = data->cores_desired;
- kbdev->pm.backend.ca_in_transition = false;
- }
-
- kbase_pm_update_cores_state_nolock(kbdev);
-
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n",
- data->cores_desired, data->cores_enabled);
-}
-
-static void devfreq_init(struct kbase_device *kbdev)
-{
- struct kbasep_pm_ca_policy_devfreq *data =
- &kbdev->pm.backend.ca_policy_data.devfreq;
-
- if (kbdev->current_core_mask) {
- data->cores_enabled = kbdev->current_core_mask;
- data->cores_desired = kbdev->current_core_mask;
- } else {
- data->cores_enabled =
- kbdev->gpu_props.props.raw_props.shader_present;
- data->cores_desired =
- kbdev->gpu_props.props.raw_props.shader_present;
- }
- data->cores_used = 0;
- kbdev->pm.backend.ca_in_transition = false;
-}
-
-static void devfreq_term(struct kbase_device *kbdev)
-{
-}
-
-static u64 devfreq_get_core_mask(struct kbase_device *kbdev)
-{
- return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled;
-}
-
-static void devfreq_update_core_status(struct kbase_device *kbdev,
- u64 cores_ready,
- u64 cores_transitioning)
-{
- struct kbasep_pm_ca_policy_devfreq *data =
- &kbdev->pm.backend.ca_policy_data.devfreq;
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- data->cores_used = cores_ready | cores_transitioning;
-
- /* If in desired state then clear transition flag */
- if (data->cores_enabled == data->cores_desired)
- kbdev->pm.backend.ca_in_transition = false;
-
- /* If all undesired cores are now off then power on desired cores.
- * The direct comparison against cores_enabled limits potential
- * recursion to one level */
- if (!(data->cores_used & ~data->cores_desired) &&
- data->cores_enabled != data->cores_desired) {
- data->cores_enabled = data->cores_desired;
-
- kbase_pm_update_cores_state_nolock(kbdev);
-
- kbdev->pm.backend.ca_in_transition = false;
- }
-}
-
-/*
- * The struct kbase_pm_ca_policy structure for the devfreq core availability
- * policy.
- *
- * This is the static structure that defines the devfreq core availability power
- * policy's callback and name.
- */
-const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = {
- "devfreq", /* name */
- devfreq_init, /* init */
- devfreq_term, /* term */
- devfreq_get_core_mask, /* get_core_mask */
- devfreq_update_core_status, /* update_core_status */
- 0u, /* flags */
- KBASE_PM_CA_POLICY_ID_DEVFREQ, /* id */
-};
-
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
deleted file mode 100644
index 1eea7e8..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-/*
- * A power policy implementing fixed core availability
- */
-
-#include <mali_kbase.h>
-#include <mali_kbase_pm.h>
-
-static void fixed_init(struct kbase_device *kbdev)
-{
- kbdev->pm.backend.ca_in_transition = false;
-}
-
-static void fixed_term(struct kbase_device *kbdev)
-{
- CSTD_UNUSED(kbdev);
-}
-
-static u64 fixed_get_core_mask(struct kbase_device *kbdev)
-{
- return kbdev->gpu_props.props.raw_props.shader_present;
-}
-
-static void fixed_update_core_status(struct kbase_device *kbdev,
- u64 cores_ready,
- u64 cores_transitioning)
-{
- CSTD_UNUSED(kbdev);
- CSTD_UNUSED(cores_ready);
- CSTD_UNUSED(cores_transitioning);
-}
-
-/*
- * The struct kbase_pm_policy structure for the fixed power policy.
- *
- * This is the static structure that defines the fixed power policy's callback
- * and name.
- */
-const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
- "fixed", /* name */
- fixed_init, /* init */
- fixed_term, /* term */
- fixed_get_core_mask, /* get_core_mask */
- fixed_update_core_status, /* update_core_status */
- 0u, /* flags */
- KBASE_PM_CA_POLICY_ID_FIXED, /* id */
-};
-
-KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
deleted file mode 100644
index 68a2eac..0000000
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-/*
- * A power policy implementing fixed core availability
- */
-
-#ifndef MALI_KBASE_PM_CA_FIXED_H
-#define MALI_KBASE_PM_CA_FIXED_H
-
-/**
- * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
- *
- * @dummy: Dummy member - no state is needed
- *
- * This contains data that is private to the particular power policy that is
- * active.
- */
-struct kbasep_pm_ca_policy_fixed {
- int dummy;
-};
-
-extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
-
-#endif /* MALI_KBASE_PM_CA_FIXED_H */
-
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
index 602e175..e90c44d 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,22 +29,14 @@
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
-static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+static bool coarse_demand_shaders_needed(struct kbase_device *kbdev)
{
- if (kbdev->pm.active_count == 0)
- return 0;
-
- return kbdev->gpu_props.props.raw_props.shader_present;
+ return kbase_pm_is_active(kbdev);
}
static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
{
- if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
- kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
- && !kbdev->tiler_inuse_cnt)
- return false;
-
- return true;
+ return kbase_pm_is_active(kbdev);
}
static void coarse_demand_init(struct kbase_device *kbdev)
@@ -66,7 +58,7 @@
"coarse_demand", /* name */
coarse_demand_init, /* init */
coarse_demand_term, /* term */
- coarse_demand_get_core_mask, /* get_core_mask */
+ coarse_demand_shaders_needed, /* shaders_needed */
coarse_demand_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
index f2b49eb..304e5d7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,11 +35,11 @@
* characteristics:
* - When KBase indicates that the GPU will be powered up, but we don't yet
* know which Job Chains are to be run:
- * - All Shader Cores are powered up, regardless of whether or not they will
- * be needed later.
- * - When KBase indicates that a set of Shader Cores are needed to submit the
- * currently queued Job Chains:
- * - All Shader Cores are kept powered, regardless of whether or not they will
+ * - Shader Cores are powered up, regardless of whether or not they will be
+ * needed later.
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ * queued Job Chains:
+ * - Shader Cores are kept powered, regardless of whether or not they will
* be needed
* - When KBase indicates that the GPU need not be powered:
* - The Shader Cores are powered off, and the GPU itself is powered off too.
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
index 417f6f8..7fe8eb3 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -27,12 +27,6 @@
#ifndef _KBASE_PM_HWACCESS_DEFS_H_
#define _KBASE_PM_HWACCESS_DEFS_H_
-#include "mali_kbase_pm_ca_fixed.h"
-#include "mali_kbase_pm_ca_devfreq.h"
-#if !MALI_CUSTOMER_RELEASE
-#include "mali_kbase_pm_ca_random.h"
-#endif
-
#include "mali_kbase_pm_always_on.h"
#include "mali_kbase_pm_coarse_demand.h"
#include "mali_kbase_pm_demand.h"
@@ -144,25 +138,14 @@
#endif
};
-union kbase_pm_ca_policy_data {
- struct kbasep_pm_ca_policy_fixed fixed;
- struct kbasep_pm_ca_policy_devfreq devfreq;
-#if !MALI_CUSTOMER_RELEASE
- struct kbasep_pm_ca_policy_random random;
-#endif
-};
-
/**
* struct kbase_pm_backend_data - Data stored per device for power management.
*
* This structure contains data for the power management framework. There is one
* instance of this structure per device in the system.
*
- * @ca_current_policy: The policy that is currently actively controlling core
- * availability.
* @pm_current_policy: The policy that is currently actively controlling the
* power state.
- * @ca_policy_data: Private data for current CA policy
* @pm_policy_data: Private data for current PM policy
* @ca_in_transition: Flag indicating when core availability policy is
* transitioning cores. The core availability policy must
@@ -252,20 +235,17 @@
* &struct kbase_pm_callback_conf
* @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
* &struct kbase_pm_callback_conf
+ * @ca_cores_enabled: Cores that are currently available
*
* Note:
- * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
- * policy is being changed with kbase_pm_ca_set_policy() or
- * kbase_pm_set_policy(). The change is protected under
- * kbase_device.pm.power_change_lock. Direct access to this
- * from IRQ context must therefore check for NULL. If NULL, then
- * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy
- * functions that would have been done under IRQ.
+ * During an IRQ, @pm_current_policy can be NULL when the policy is being
+ * changed with kbase_pm_set_policy(). The change is protected under
+ * kbase_device.pm.power_change_lock. Direct access to this from IRQ context
+ * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will
+ * re-issue the policy functions that would have been done under IRQ.
*/
struct kbase_pm_backend_data {
- const struct kbase_pm_ca_policy *ca_current_policy;
const struct kbase_pm_policy *pm_current_policy;
- union kbase_pm_ca_policy_data ca_policy_data;
union kbase_pm_policy_data pm_policy_data;
bool ca_in_transition;
bool reset_done;
@@ -331,6 +311,10 @@
int (*callback_power_runtime_on)(struct kbase_device *kbdev);
void (*callback_power_runtime_off)(struct kbase_device *kbdev);
int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
+
+#ifdef CONFIG_MALI_DEVFREQ
+ u64 ca_cores_enabled;
+#endif
};
@@ -356,7 +340,7 @@
* @name: The name of this policy
* @init: Function called when the policy is selected
* @term: Function called when the policy is unselected
- * @get_core_mask: Function called to get the current shader core mask
+ * @shaders_needed: Function called to find out if shader cores are needed
* @get_core_active: Function called to get the current overall GPU power
* state
* @flags: Field indicating flags for this policy
@@ -391,26 +375,28 @@
void (*term)(struct kbase_device *kbdev);
/**
- * Function called to get the current shader core mask
+ * Function called to find out if shader cores are needed
*
- * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
- * | kbdev->shader_inuse_bitmap).
+ * This needs to at least satisfy kbdev->shader_needed_cnt, and so must
+ * never return false when kbdev->shader_needed_cnt > 0.
+ *
+ * Note that kbdev->pm.active_count being 0 is not a good indicator
+ * that kbdev->shader_needed_cnt is also 0 - refer to the documentation
+ * on the active_count member in struct kbase_pm_device_data and
+ * kbase_pm_is_active().
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
*
- * Return: The mask of shader cores to be powered
+ * Return: true if shader cores are needed, false otherwise
*/
- u64 (*get_core_mask)(struct kbase_device *kbdev);
+ bool (*shaders_needed)(struct kbase_device *kbdev);
/**
* Function called to get the current overall GPU power state
*
- * This function should consider the state of kbdev->pm.active_count. If
- * this count is greater than 0 then there is at least one active
- * context on the device and the GPU should be powered. If it is equal
- * to 0 then there are no active contexts and the GPU could be powered
- * off if desired.
+ * This function must meet or exceed the requirements for power
+ * indicated by kbase_pm_is_active().
*
* @kbdev: The kbase device structure for the device (must be a
* valid pointer)
@@ -423,111 +409,4 @@
enum kbase_pm_policy_id id;
};
-
-enum kbase_pm_ca_policy_id {
- KBASE_PM_CA_POLICY_ID_FIXED = 1,
- KBASE_PM_CA_POLICY_ID_DEVFREQ,
- KBASE_PM_CA_POLICY_ID_RANDOM
-};
-
-typedef u32 kbase_pm_ca_policy_flags;
-
-/**
- * Maximum length of a CA policy names
- */
-#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15
-
-/**
- * struct kbase_pm_ca_policy - Core availability policy structure.
- *
- * Each core availability policy exposes a (static) instance of this structure
- * which contains function pointers to the policy's methods.
- *
- * @name: The name of this policy
- * @init: Function called when the policy is selected
- * @term: Function called when the policy is unselected
- * @get_core_mask: Function called to get the current shader core
- * availability mask
- * @update_core_status: Function called to update the current core status
- * @flags: Field indicating flags for this policy
- * @id: Field indicating an ID for this policy. This is not
- * necessarily the same as its index in the list returned
- * by kbase_pm_list_policies().
- * It is used purely for debugging.
- */
-struct kbase_pm_ca_policy {
- char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1];
-
- /**
- * Function called when the policy is selected
- *
- * This should initialize the kbdev->pm.ca_policy_data structure. It
- * should not attempt to make any changes to hardware state.
- *
- * It is undefined what state the cores are in when the function is
- * called.
- *
- * @kbdev The kbase device structure for the device (must be a
- * valid pointer)
- */
- void (*init)(struct kbase_device *kbdev);
-
- /**
- * Function called when the policy is unselected.
- *
- * @kbdev The kbase device structure for the device (must be a
- * valid pointer)
- */
- void (*term)(struct kbase_device *kbdev);
-
- /**
- * Function called to get the current shader core availability mask
- *
- * When a change in core availability is occurring, the policy must set
- * kbdev->pm.ca_in_transition to true. This is to indicate that
- * reporting changes in power state cannot be optimized out, even if
- * kbdev->pm.desired_shader_state remains unchanged. This must be done
- * by any functions internal to the Core Availability Policy that change
- * the return value of kbase_pm_ca_policy::get_core_mask.
- *
- * @kbdev The kbase device structure for the device (must be a
- * valid pointer)
- *
- * Return: The current core availability mask
- */
- u64 (*get_core_mask)(struct kbase_device *kbdev);
-
- /**
- * Function called to update the current core status
- *
- * If none of the cores in core group 0 are ready or transitioning, then
- * the policy must ensure that the next call to get_core_mask does not
- * return 0 for all cores in core group 0. It is an error to disable
- * core group 0 through the core availability policy.
- *
- * When a change in core availability has finished, the policy must set
- * kbdev->pm.ca_in_transition to false. This is to indicate that
- * changes in power state can once again be optimized out when
- * kbdev->pm.desired_shader_state is unchanged.
- *
- * @kbdev: The kbase device structure for the device
- * (must be a valid pointer)
- * @cores_ready: The mask of cores currently powered and
- * ready to run jobs
- * @cores_transitioning: The mask of cores currently transitioning
- * power state
- */
- void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
- u64 cores_transitioning);
-
- kbase_pm_ca_policy_flags flags;
-
- /**
- * Field indicating an ID for this policy. This is not necessarily the
- * same as its index in the list returned by kbase_pm_list_policies().
- * It is used purely for debugging.
- */
- enum kbase_pm_ca_policy_id id;
-};
-
#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
index e0edddc..01727d6 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -29,24 +29,14 @@
#include <mali_kbase.h>
#include <mali_kbase_pm.h>
-static u64 demand_get_core_mask(struct kbase_device *kbdev)
+static bool demand_shaders_needed(struct kbase_device *kbdev)
{
- u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
-
- if (0 == kbdev->pm.active_count)
- return 0;
-
- return desired;
+ return (kbdev->shader_needed_cnt > 0);
}
static bool demand_get_core_active(struct kbase_device *kbdev)
{
- if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap |
- kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt
- && !kbdev->tiler_inuse_cnt)
- return false;
-
- return true;
+ return kbase_pm_is_active(kbdev);
}
static void demand_init(struct kbase_device *kbdev)
@@ -69,7 +59,7 @@
"demand", /* name */
demand_init, /* init */
demand_term, /* term */
- demand_get_core_mask, /* get_core_mask */
+ demand_shaders_needed, /* shaders_needed */
demand_get_core_active, /* get_core_active */
0u, /* flags */
KBASE_PM_POLICY_ID_DEMAND, /* id */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
index 5ee1824..4b05e6d 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -37,9 +37,9 @@
* know which Job Chains are to be run:
* - The Shader Cores are not powered up
*
- * - When KBase indicates that a set of Shader Cores are needed to submit the
- * currently queued Job Chains:
- * - Only those Shader Cores are powered up
+ * - When KBase indicates that Shader Cores are needed to submit the currently
+ * queued Job Chains:
+ * - Shader Cores are powered up
*
* - When KBase indicates that the GPU need not be powered:
* - The Shader Cores are powered off, and the GPU itself is powered off too.
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
index 0fc8a99..cdd5cf7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -135,19 +135,16 @@
kbase_reg_write(kbdev,
GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CLEAN_INV_CACHES,
- NULL);
+ GPU_COMMAND_CLEAN_INV_CACHES);
raw = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
- NULL);
+ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
/* Wait for cache flush to complete before continuing, exit on
* gpu resets or loop expiry. */
while (((raw & mask) == 0) && --loops) {
raw = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
- NULL);
+ GPU_CONTROL_REG(GPU_IRQ_RAWSTAT));
}
}
#endif
@@ -238,10 +235,10 @@
}
if (lo != 0)
- kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo);
if (hi != 0)
- kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi);
}
/**
@@ -269,24 +266,20 @@
KBASE_DEBUG_ASSERT(reg);
- lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
- hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+ lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg));
+ hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4));
return (((u64) hi) << 32) | ((u64) lo);
}
void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
{
- kbdev->shader_inuse_bitmap = 0;
- kbdev->shader_needed_bitmap = 0;
kbdev->shader_available_bitmap = 0;
kbdev->tiler_available_bitmap = 0;
kbdev->l2_users_count = 0;
kbdev->l2_available_bitmap = 0;
kbdev->tiler_needed_cnt = 0;
- kbdev->tiler_inuse_cnt = 0;
-
- memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+ kbdev->shader_needed_cnt = 0;
}
/**
@@ -438,19 +431,21 @@
present = kbase_pm_get_present_cores(kbdev, type);
trans = kbase_pm_get_trans_cores(kbdev, type);
ready = kbase_pm_get_ready_cores(kbdev, type);
+
/* mask off ready from trans in case transitions finished between the
* register reads */
trans &= ~ready;
- if (trans) /* Do not progress if any cores are transitioning */
- return false;
-
powering_on_trans = trans & *powering_on;
- *powering_on = powering_on_trans;
if (available != NULL)
*available = (ready | powering_on_trans) & desired_state;
+ if (trans) /* Do not progress if any cores are transitioning */
+ return false;
+
+ *powering_on = powering_on_trans;
+
/* Update desired state to include the in-use cores. These have to be
* kept powered up because there are jobs running or about to run on
* these cores
@@ -632,15 +627,6 @@
return false;
}
- /* Trace that a change-state is being requested, and that it took
- * (effectively) no time to start it. This is useful for counting how
- * many state changes occurred, in a way that's backwards-compatible
- * with processing the trace data */
- kbase_timeline_pm_send_event(kbdev,
- KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
- kbase_timeline_pm_handle_event(kbdev,
- KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
-
/* If any cores are already powered then, we must keep the caches on */
shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
KBASE_PM_CORE_SHADER);
@@ -689,9 +675,6 @@
&l2_available_bitmap,
&kbdev->pm.backend.powering_on_l2_state);
- if (kbdev->l2_available_bitmap != l2_available_bitmap)
- KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
-
kbdev->l2_available_bitmap = l2_available_bitmap;
@@ -713,27 +696,20 @@
in_desired_state &= kbase_pm_transition_core_type(kbdev,
KBASE_PM_CORE_SHADER,
kbdev->pm.backend.desired_shader_state,
- kbdev->shader_inuse_bitmap,
- &shader_available_bitmap,
+ 0, &shader_available_bitmap,
&kbdev->pm.backend.powering_on_shader_state);
- if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+ if (kbdev->shader_available_bitmap != shader_available_bitmap)
KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
NULL, 0u,
(u32) shader_available_bitmap);
- KBASE_TIMELINE_POWER_SHADER(kbdev,
- shader_available_bitmap);
- }
kbdev->shader_available_bitmap = shader_available_bitmap;
- if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+ if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
NULL, NULL, 0u,
(u32) tiler_available_bitmap);
- KBASE_TIMELINE_POWER_TILER(kbdev,
- tiler_available_bitmap);
- }
kbdev->tiler_available_bitmap = tiler_available_bitmap;
@@ -742,10 +718,6 @@
kbdev->gpu_props.props.raw_props.tiler_present) {
tiler_available_bitmap = 0;
- if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
- KBASE_TIMELINE_POWER_TILER(kbdev,
- tiler_available_bitmap);
-
kbdev->tiler_available_bitmap = tiler_available_bitmap;
}
@@ -774,13 +746,6 @@
KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
(u32)(kbdev->tiler_available_bitmap &
kbdev->pm.backend.desired_tiler_state));
-
- /* Log timelining information about handling events that power
- * up cores, to match up either with immediate submission either
- * because cores already available, or from PM IRQ */
- if (!in_desired_state)
- kbase_timeline_pm_send_event(kbdev,
- KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
}
if (in_desired_state) {
@@ -830,9 +795,6 @@
KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
(u32)kbdev->pm.backend.desired_tiler_state);
- /* Log timelining information for synchronous waiters */
- kbase_timeline_pm_send_event(kbdev,
- KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
/* Wake slow-path waiters. Job scheduler does not use this. */
KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
@@ -841,19 +803,8 @@
spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
- /* kbase_pm_ca_update_core_status can cause one-level recursion into
- * this function, so it must only be called once all changes to kbdev
- * have been committed, and after the gpu_powered_lock has been
- * dropped. */
- if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
- kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
- kbdev->shader_ready_bitmap = shader_ready_bitmap;
- kbdev->shader_transitioning_bitmap =
- shader_transitioning_bitmap;
-
- kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
- shader_transitioning_bitmap);
- }
+ kbdev->shader_ready_bitmap = shader_ready_bitmap;
+ kbdev->shader_transitioning_bitmap = shader_transitioning_bitmap;
/* The core availability policy is not allowed to keep core group 0
* turned off (unless it was changing the l2 power state) */
@@ -916,46 +867,40 @@
dev_err(kbdev->dev, "Current state :\n");
dev_err(kbdev->dev, "\tShader=%08x%08x\n",
kbase_reg_read(kbdev,
- GPU_CONTROL_REG(SHADER_READY_HI), NULL),
+ GPU_CONTROL_REG(SHADER_READY_HI)),
kbase_reg_read(kbdev,
- GPU_CONTROL_REG(SHADER_READY_LO),
- NULL));
+ GPU_CONTROL_REG(SHADER_READY_LO)));
dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TILER_READY_HI), NULL),
+ GPU_CONTROL_REG(TILER_READY_HI)),
kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TILER_READY_LO), NULL));
+ GPU_CONTROL_REG(TILER_READY_LO)));
dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
kbase_reg_read(kbdev,
- GPU_CONTROL_REG(L2_READY_HI), NULL),
+ GPU_CONTROL_REG(L2_READY_HI)),
kbase_reg_read(kbdev,
- GPU_CONTROL_REG(L2_READY_LO), NULL));
+ GPU_CONTROL_REG(L2_READY_LO)));
dev_err(kbdev->dev, "Cores transitioning :\n");
dev_err(kbdev->dev, "\tShader=%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
- SHADER_PWRTRANS_HI), NULL),
+ SHADER_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
- SHADER_PWRTRANS_LO), NULL));
+ SHADER_PWRTRANS_LO)));
dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
- TILER_PWRTRANS_HI), NULL),
+ TILER_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
- TILER_PWRTRANS_LO), NULL));
+ TILER_PWRTRANS_LO)));
dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
- L2_PWRTRANS_HI), NULL),
+ L2_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
- L2_PWRTRANS_LO), NULL));
+ L2_PWRTRANS_LO)));
#if KBASE_GPU_RESET_EN
dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
if (kbase_prepare_to_reset_gpu(kbdev))
kbase_reset_gpu(kbdev);
#endif /* KBASE_GPU_RESET_EN */
- } else {
- /* Log timelining information that a change in state has
- * completed */
- kbase_timeline_pm_handle_event(kbdev,
- KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
@@ -970,18 +915,15 @@
* and unmask them all.
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
- NULL);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
- NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
- NULL);
- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF);
}
KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
@@ -995,15 +937,13 @@
*/
lockdep_assert_held(&kbdev->hwaccess_lock);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
- NULL);
- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
- kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
- NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
}
void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
@@ -1218,10 +1158,10 @@
if (!kbdev->hw_quirks_sc)
kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(SHADER_CONFIG), NULL);
+ GPU_CONTROL_REG(SHADER_CONFIG));
kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TILER_CONFIG), NULL);
+ GPU_CONTROL_REG(TILER_CONFIG));
/* Set tiler clock gate override if required */
if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
@@ -1229,7 +1169,7 @@
/* Limit the GPU bus bandwidth if the platform needs this. */
kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+ GPU_CONTROL_REG(L2_MMU_CONFIG));
/* Limit read & write ID width for AXI */
@@ -1296,7 +1236,7 @@
u32 coherency_features;
coherency_features = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+ GPU_CONTROL_REG(COHERENCY_FEATURES));
/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
* documented for tMIx so force correct value here.
@@ -1314,7 +1254,7 @@
if (!kbdev->hw_quirks_jm)
kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(JM_CONFIG), NULL);
+ GPU_CONTROL_REG(JM_CONFIG));
#ifdef CONFIG_MALI_CORESTACK
#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
@@ -1325,16 +1265,16 @@
static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
{
kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
- kbdev->hw_quirks_sc, NULL);
+ kbdev->hw_quirks_sc);
kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
- kbdev->hw_quirks_tiler, NULL);
+ kbdev->hw_quirks_tiler);
kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
- kbdev->hw_quirks_mmu, NULL);
+ kbdev->hw_quirks_mmu);
kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
- kbdev->hw_quirks_jm, NULL);
+ kbdev->hw_quirks_jm);
}
@@ -1374,11 +1314,10 @@
KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_SOFT_RESET, NULL);
+ GPU_COMMAND_SOFT_RESET);
/* Unmask the reset complete interrupt only */
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
- NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED);
/* Initialize a structure for tracking the status of the reset */
rtdata.kbdev = kbdev;
@@ -1403,7 +1342,7 @@
/* No interrupt has been received - check if the RAWSTAT register says
* the reset has completed */
- if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+ if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
RESET_COMPLETED) {
/* The interrupt is set in the RAWSTAT; this suggests that the
* interrupts are not getting to the CPU */
@@ -1419,7 +1358,7 @@
RESET_TIMEOUT);
KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_HARD_RESET, NULL);
+ GPU_COMMAND_HARD_RESET);
/* Restart the timer to wait for the hard reset to complete */
rtdata.timed_out = 0;
@@ -1450,7 +1389,7 @@
struct kbase_device *kbdev = pdev->data;
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+ GPU_COMMAND_SET_PROTECTED_MODE);
return 0;
}
@@ -1521,7 +1460,6 @@
if (kbdev->protected_mode)
resume_vinstr = true;
kbdev->protected_mode = false;
- kbase_ipa_model_use_configured_locked(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
@@ -1537,7 +1475,7 @@
/* Sanity check protected mode was left after reset */
if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
u32 gpu_status = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(GPU_STATUS), NULL);
+ GPU_CONTROL_REG(GPU_STATUS));
WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE);
}
@@ -1556,7 +1494,7 @@
irq_flags);
if (kbdev->pm.backend.gpu_cycle_counter_requests)
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CYCLE_COUNT_START, NULL);
+ GPU_COMMAND_CYCLE_COUNT_START);
spin_unlock_irqrestore(
&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
irq_flags);
@@ -1607,7 +1545,7 @@
if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CYCLE_COUNT_START, NULL);
+ GPU_COMMAND_CYCLE_COUNT_START);
spin_unlock_irqrestore(
&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
@@ -1663,7 +1601,7 @@
if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
- GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+ GPU_COMMAND_CYCLE_COUNT_STOP);
spin_unlock_irqrestore(
&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
index c558736..0d3599a 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -565,4 +565,16 @@
*/
void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
+#ifdef CONFIG_MALI_DEVFREQ
+/**
+ * kbase_devfreq_set_core_mask - Set devfreq core mask
+ * @kbdev: Device pointer
+ * @core_mask: New core mask
+ *
+ * This function is used by devfreq to change the available core mask as
+ * required by Dynamic Core Scaling.
+ */
+void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
+#endif
+
#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
index 5a043d5..6dd00a9 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -89,72 +89,6 @@
};
typedef u32 kbase_pm_change_state;
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-/* Timeline Trace code lookups for each function */
-static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
- [KBASE_PM_CHANGE_STATE_COUNT] = {
- /* kbase_pm_request_cores */
- [KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
- [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
- SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
- [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
- SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
- [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
- KBASE_PM_CHANGE_STATE_TILER] =
- SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
-
- [KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
- [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
- SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
- [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
- SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
- [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
- KBASE_PM_CHANGE_STATE_TILER] =
- SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
-
- /* kbase_pm_release_cores */
- [KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
- [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
- SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
- [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
- SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
- [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
- KBASE_PM_CHANGE_STATE_TILER] =
- SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
-
- [KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
- [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
- SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
- [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
- SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
- [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
- KBASE_PM_CHANGE_STATE_TILER] =
- SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
-};
-
-static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
- enum kbase_pm_func_id func_id,
- kbase_pm_change_state state)
-{
- int trace_code;
-
- KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
- KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
- state);
-
- trace_code = kbase_pm_change_state_trace_code[func_id][state];
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
-}
-
-#else /* CONFIG_MALI_TRACE_TIMELINE */
-static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
- enum kbase_pm_func_id func_id, kbase_pm_change_state state)
-{
-}
-
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
/**
* kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
* requested shader cores
@@ -181,11 +115,7 @@
kbdev->pm.backend.ca_in_transition) {
bool cores_are_available;
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
- SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
- KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
- SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
/* Don't need 'cores_are_available',
* because we don't return anything */
@@ -356,6 +286,9 @@
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
active = backend->pm_current_policy->get_core_active(kbdev);
+ WARN((kbase_pm_is_active(kbdev) && !active),
+ "GPU is active but policy '%s' is indicating that it can be powered off",
+ kbdev->pm.backend.pm_current_policy->name);
if (active) {
if (backend->gpu_poweroff_pending) {
@@ -436,6 +369,42 @@
}
}
+/**
+ * get_desired_shader_bitmap - Get the desired shader bitmap, based on the
+ * current power policy
+ *
+ * @kbdev: The kbase device structure for the device
+ *
+ * Queries the current power policy to determine if shader cores will be
+ * required in the current state, and apply any HW workarounds.
+ *
+ * Return: bitmap of desired shader cores
+ */
+
+static u64 get_desired_shader_bitmap(struct kbase_device *kbdev)
+{
+ u64 desired_bitmap = 0u;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if (kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev))
+ desired_bitmap = kbase_pm_ca_get_core_mask(kbdev);
+
+ WARN(!desired_bitmap && kbdev->shader_needed_cnt,
+ "Shader cores are needed but policy '%s' did not make them needed",
+ kbdev->pm.backend.pm_current_policy->name);
+
+ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
+ /* Unless XAFFINITY is supported, enable core 0 if tiler
+ * required, regardless of core availability
+ */
+ if (kbdev->tiler_needed_cnt > 0)
+ desired_bitmap |= 1;
+ }
+
+ return desired_bitmap;
+}
+
void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
{
u64 desired_bitmap;
@@ -450,30 +419,19 @@
if (kbdev->pm.backend.poweroff_wait_in_progress)
return;
- if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap &&
- !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt
- && !kbdev->tiler_inuse_cnt) {
+ if (kbdev->protected_mode_transition && !kbdev->shader_needed_cnt &&
+ !kbdev->tiler_needed_cnt) {
/* We are trying to change in/out of protected mode - force all
* cores off so that the L2 powers down */
desired_bitmap = 0;
desired_tiler_bitmap = 0;
} else {
- desired_bitmap =
- kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
- desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+ desired_bitmap = get_desired_shader_bitmap(kbdev);
- if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+ if (kbdev->tiler_needed_cnt > 0)
desired_tiler_bitmap = 1;
else
desired_tiler_bitmap = 0;
-
- if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
- /* Unless XAFFINITY is supported, enable core 0 if tiler
- * required, regardless of core availability */
- if (kbdev->tiler_needed_cnt > 0 ||
- kbdev->tiler_inuse_cnt > 0)
- desired_bitmap |= 1;
- }
}
if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
@@ -650,55 +608,28 @@
KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
-/* Check whether a state change has finished, and trace it as completed */
-static void
-kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
-{
- if ((kbdev->shader_available_bitmap &
- kbdev->pm.backend.desired_shader_state)
- == kbdev->pm.backend.desired_shader_state &&
- (kbdev->tiler_available_bitmap &
- kbdev->pm.backend.desired_tiler_state)
- == kbdev->pm.backend.desired_tiler_state)
- kbase_timeline_pm_check_handle_event(kbdev,
- KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
-}
-
void kbase_pm_request_cores(struct kbase_device *kbdev,
- bool tiler_required, u64 shader_cores)
+ bool tiler_required, bool shader_required)
{
- u64 cores;
-
kbase_pm_change_state change_gpu_state = 0u;
KBASE_DEBUG_ASSERT(kbdev != NULL);
lockdep_assert_held(&kbdev->hwaccess_lock);
- cores = shader_cores;
- while (cores) {
- int bitnum = fls64(cores) - 1;
- u64 bit = 1ULL << bitnum;
+ if (shader_required) {
+ int cnt = ++kbdev->shader_needed_cnt;
- /* It should be almost impossible for this to overflow. It would
- * require 2^32 atoms to request a particular core, which would
- * require 2^24 contexts to submit. This would require an amount
- * of memory that is impossible on a 32-bit system and extremely
- * unlikely on a 64-bit system. */
- int cnt = ++kbdev->shader_needed_cnt[bitnum];
-
- if (1 == cnt) {
- kbdev->shader_needed_bitmap |= bit;
+ if (cnt == 1)
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
- }
- cores &= ~bit;
+ KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt != 0);
}
if (tiler_required) {
int cnt = ++kbdev->tiler_needed_cnt;
- if (1 == cnt)
+ if (cnt == 1)
change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
@@ -706,22 +637,18 @@
if (change_gpu_state) {
KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
- NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+ NULL, 0u, kbdev->shader_needed_cnt);
+ KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_TILER_NEEDED, NULL,
+ NULL, 0u, kbdev->tiler_needed_cnt);
- kbase_timeline_pm_cores_func(kbdev,
- KBASE_PM_FUNC_ID_REQUEST_CORES_START,
- change_gpu_state);
kbase_pm_update_cores_state_nolock(kbdev);
- kbase_timeline_pm_cores_func(kbdev,
- KBASE_PM_FUNC_ID_REQUEST_CORES_END,
- change_gpu_state);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
-void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
- bool tiler_required, u64 shader_cores)
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+ bool tiler_required, bool shader_required)
{
kbase_pm_change_state change_gpu_state = 0u;
@@ -729,22 +656,16 @@
lockdep_assert_held(&kbdev->hwaccess_lock);
- while (shader_cores) {
- int bitnum = fls64(shader_cores) - 1;
- u64 bit = 1ULL << bitnum;
+ if (shader_required) {
int cnt;
- KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+ KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt > 0);
- cnt = --kbdev->shader_needed_cnt[bitnum];
+ cnt = --kbdev->shader_needed_cnt;
if (0 == cnt) {
- kbdev->shader_needed_bitmap &= ~bit;
-
change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
}
-
- shader_cores &= ~bit;
}
if (tiler_required) {
@@ -759,165 +680,26 @@
}
if (change_gpu_state) {
- KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
- NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+ KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_NEEDED, NULL,
+ NULL, 0u, kbdev->shader_needed_cnt);
+ KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_TILER_NEEDED, NULL,
+ NULL, 0u, kbdev->tiler_needed_cnt);
kbase_pm_update_cores_state_nolock(kbdev);
-
- /* Trace that any state change effectively completes immediately
- * - no-one will wait on the state change */
- kbase_pm_trace_check_and_finish_state_change(kbdev);
- }
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
-
-enum kbase_pm_cores_ready
-kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
- bool tiler_required, u64 shader_cores)
-{
- u64 prev_shader_needed; /* Just for tracing */
- u64 prev_shader_inuse; /* Just for tracing */
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- prev_shader_needed = kbdev->shader_needed_bitmap;
- prev_shader_inuse = kbdev->shader_inuse_bitmap;
-
- /* If desired_shader_state does not contain the requested cores, then
- * power management is not attempting to powering those cores (most
- * likely due to core availability policy) and a new job affinity must
- * be chosen */
- if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
- shader_cores) {
- return (kbdev->pm.backend.poweroff_wait_in_progress ||
- kbdev->pm.backend.pm_current_policy == NULL) ?
- KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY;
- }
-
- if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
- (tiler_required && !kbdev->tiler_available_bitmap)) {
- /* Trace ongoing core transition */
- kbase_timeline_pm_l2_transition_start(kbdev);
- return KBASE_CORES_NOT_READY;
- }
-
- /* If we started to trace a state change, then trace it has being
- * finished by now, at the very latest */
- kbase_pm_trace_check_and_finish_state_change(kbdev);
- /* Trace core transition done */
- kbase_timeline_pm_l2_transition_done(kbdev);
-
- while (shader_cores) {
- int bitnum = fls64(shader_cores) - 1;
- u64 bit = 1ULL << bitnum;
- int cnt;
-
- KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
-
- cnt = --kbdev->shader_needed_cnt[bitnum];
-
- if (0 == cnt)
- kbdev->shader_needed_bitmap &= ~bit;
-
- /* shader_inuse_cnt should not overflow because there can only
- * be a very limited number of jobs on the h/w at one time */
-
- kbdev->shader_inuse_cnt[bitnum]++;
- kbdev->shader_inuse_bitmap |= bit;
-
- shader_cores &= ~bit;
- }
-
- if (tiler_required) {
- KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
-
- --kbdev->tiler_needed_cnt;
-
- kbdev->tiler_inuse_cnt++;
-
- KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
- }
-
- if (prev_shader_needed != kbdev->shader_needed_bitmap)
- KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
- NULL, 0u, (u32) kbdev->shader_needed_bitmap);
-
- if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
- KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
- NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
-
- return KBASE_CORES_READY;
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
-
-void kbase_pm_release_cores(struct kbase_device *kbdev,
- bool tiler_required, u64 shader_cores)
-{
- kbase_pm_change_state change_gpu_state = 0u;
-
- KBASE_DEBUG_ASSERT(kbdev != NULL);
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- while (shader_cores) {
- int bitnum = fls64(shader_cores) - 1;
- u64 bit = 1ULL << bitnum;
- int cnt;
-
- KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
-
- cnt = --kbdev->shader_inuse_cnt[bitnum];
-
- if (0 == cnt) {
- kbdev->shader_inuse_bitmap &= ~bit;
- change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
- }
-
- shader_cores &= ~bit;
- }
-
- if (tiler_required) {
- int cnt;
-
- KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
-
- cnt = --kbdev->tiler_inuse_cnt;
-
- if (0 == cnt)
- change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
- }
-
- if (change_gpu_state) {
- KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
- NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
-
- kbase_timeline_pm_cores_func(kbdev,
- KBASE_PM_FUNC_ID_RELEASE_CORES_START,
- change_gpu_state);
- kbase_pm_update_cores_state_nolock(kbdev);
- kbase_timeline_pm_cores_func(kbdev,
- KBASE_PM_FUNC_ID_RELEASE_CORES_END,
- change_gpu_state);
-
- /* Trace that any state change completed immediately */
- kbase_pm_trace_check_and_finish_state_change(kbdev);
}
}
KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
- bool tiler_required,
- u64 shader_cores)
+ bool tiler_required, bool shader_required)
{
unsigned long flags;
kbase_pm_wait_for_poweroff_complete(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+ kbase_pm_request_cores(kbdev, tiler_required, shader_required);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_pm_check_transitions_sync(kbdev);
@@ -960,9 +742,6 @@
wait_event(kbdev->pm.backend.l2_powered_wait,
kbdev->pm.backend.l2_powered == 1);
-
- /* Trace that any state change completed immediately */
- kbase_pm_trace_check_and_finish_state_change(kbdev);
}
KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
@@ -972,10 +751,6 @@
/* Take the reference on l2_users_count and check core transitions.
*/
kbase_pm_l2_caches_ref(kbdev);
-
- /* Trace that any state change completed immediately
- */
- kbase_pm_trace_check_and_finish_state_change(kbdev);
}
void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
@@ -995,11 +770,8 @@
--kbdev->l2_users_count;
- if (!kbdev->l2_users_count) {
+ if (!kbdev->l2_users_count)
kbase_pm_check_transitions_nolock(kbdev);
- /* Trace that any state change completed immediately */
- kbase_pm_trace_check_and_finish_state_change(kbdev);
- }
}
KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
index 2f15888..2e86929 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -73,11 +73,25 @@
/**
- * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ * kbase_pm_request_cores - Request the desired cores to be powered up.
+ * @kbdev: Kbase device
+ * @tiler_required: true if tiler is required
+ * @shader_required: true if shaders are required
*
- * @kbdev: The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores: A bitmask of shader cores which are necessary for the job
+ * Called by the scheduler to request power to the desired cores.
+ *
+ * There is no guarantee that the HW will be powered up on return. Use
+ * kbase_pm_cores_requested()/kbase_pm_cores_ready() to verify that cores are
+ * now powered, or instead call kbase_pm_request_cores_sync().
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required,
+ bool shader_required);
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ * @kbdev: Kbase device
+ * @tiler_required: true if tiler is required
+ * @shader_required: true if shaders are required
*
* When this function returns, the @shader_cores will be in the READY state.
*
@@ -87,98 +101,79 @@
* is made.
*/
void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
- bool tiler_required, u64 shader_cores);
+ bool tiler_required, bool shader_required);
/**
- * kbase_pm_request_cores - Mark one or more cores as being required
- * for jobs to be submitted
+ * kbase_pm_release_cores - Request the desired cores to be powered down.
+ * @kbdev: Kbase device
+ * @tiler_required: true if tiler is required
+ * @shader_required: true if shaders are required
*
- * @kbdev: The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores: A bitmask of shader cores which are necessary for the job
- *
- * This function is called by the job scheduler to mark one or more cores as
- * being required to submit jobs that are ready to run.
- *
- * The cores requested are reference counted and a subsequent call to
- * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
- * made to dereference the cores as being 'needed'.
- *
- * The active power policy will meet or exceed the requirements of the
- * requested cores in the system. Any core transitions needed will be begun
- * immediately, but they might not complete/the cores might not be available
- * until a Power Management IRQ.
- *
- * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ * Called by the scheduler to release its power reference on the desired cores.
*/
-void kbase_pm_request_cores(struct kbase_device *kbdev,
- bool tiler_required, u64 shader_cores);
+void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required,
+ bool shader_required);
/**
- * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
- * jobs to be submitted.
+ * kbase_pm_cores_requested - Check that a power request has been locked into
+ * the HW.
+ * @kbdev: Kbase device
+ * @tiler_required: true if tiler is required
+ * @shader_required: true if shaders are required
*
- * @kbdev: The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores: A bitmask of shader cores (as given to
- * kbase_pm_request_cores() )
+ * Called by the scheduler to check if a power on request has been locked into
+ * the HW.
*
- * This function undoes the effect of kbase_pm_request_cores(). It should be
- * used when a job is not going to be submitted to the hardware (e.g. the job is
- * cancelled before it is enqueued).
+ * Note that there is no guarantee that the cores are actually ready, however
+ * when the request has been locked into the HW, then it is safe to submit work
+ * since the HW will wait for the transition to ready.
*
- * The active power policy will meet or exceed the requirements of the
- * requested cores in the system. Any core transitions needed will be begun
- * immediately, but they might not complete until a Power Management IRQ.
+ * A reference must first be taken prior to making this call.
*
- * The policy may use this as an indication that it can power down cores.
+ * Caller must hold the hwaccess_lock.
+ *
+ * Return: true if the request to the HW was successfully made else false if the
+ * request is still pending.
*/
-void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
- bool tiler_required, u64 shader_cores);
+static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
+ bool tiler_required, bool shader_required)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ if ((shader_required && !kbdev->shader_available_bitmap) ||
+ (tiler_required && !kbdev->tiler_available_bitmap))
+ return false;
+
+ return true;
+}
/**
- * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ * kbase_pm_cores_ready - Check that the required cores have been powered on by
+ * the HW.
+ * @kbdev: Kbase device
+ * @tiler_required: true if tiler is required
+ * @shader_required: true if shaders are required
*
- * @kbdev: The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores: A bitmask of shader cores (as given to
- * kbase_pm_request_cores() )
+ * Called by the scheduler to check if cores are ready.
*
- * This function should be called after kbase_pm_request_cores() when the job
- * is about to be submitted to the hardware. It will check that the necessary
- * cores are available and if so update the 'needed' and 'inuse' bitmasks to
- * reflect that the job is now committed to being run.
+ * Note that the caller should ensure that they have first requested cores
+ * before calling this function.
*
- * If the necessary cores are not currently available then the function will
- * return %KBASE_CORES_NOT_READY and have no effect.
+ * Caller must hold the hwaccess_lock.
*
- * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
- *
- * %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
- *
- * %KBASE_CORES_READY if the cores requested are already available
+ * Return: true if the cores are ready.
*/
-enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
- struct kbase_device *kbdev,
- bool tiler_required,
- u64 shader_cores);
+static inline bool kbase_pm_cores_ready(struct kbase_device *kbdev,
+ bool tiler_required, bool shader_required)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
-/**
- * kbase_pm_release_cores - Release cores after a job has run
- *
- * @kbdev: The kbase device structure for the device
- * @tiler_required: true if the tiler is required, false otherwise
- * @shader_cores: A bitmask of shader cores (as given to
- * kbase_pm_register_inuse_cores() )
- *
- * This function should be called when a job has finished running on the
- * hardware. A call to kbase_pm_register_inuse_cores() must have previously
- * occurred. The reference counts of the specified cores will be decremented
- * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
- * may then turn off any cores which are no longer 'inuse'.
- */
-void kbase_pm_release_cores(struct kbase_device *kbdev,
- bool tiler_required, u64 shader_cores);
+ if ((shader_required && !kbdev->shader_ready_bitmap) ||
+ (tiler_required && !kbdev->tiler_available_bitmap))
+ return false;
+
+ return true;
+}
/**
* kbase_pm_request_l2_caches - Request l2 caches
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
index cef0745..5e1b761 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,24 +35,20 @@
/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
* correctly */
do {
- hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
- NULL);
+ hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
*cycle_counter = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
- hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
- NULL);
+ GPU_CONTROL_REG(CYCLE_COUNT_LO));
+ hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
*cycle_counter |= (((u64) hi1) << 32);
} while (hi1 != hi2);
/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
* correctly */
do {
- hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
- NULL);
+ hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
*system_time = kbase_reg_read(kbdev,
- GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
- hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
- NULL);
+ GPU_CONTROL_REG(TIMESTAMP_LO));
+ hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
*system_time |= (((u64) hi1) << 32);
} while (hi1 != hi2);
@@ -64,7 +60,7 @@
/**
* kbase_wait_write_flush - Wait for GPU write flush
- * @kctx: Context pointer
+ * @kbdev: Kbase device
*
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
* its write buffer.
@@ -75,7 +71,7 @@
* not be as expected.
*/
#ifndef CONFIG_MALI_NO_MALI
-void kbase_wait_write_flush(struct kbase_context *kctx)
+void kbase_wait_write_flush(struct kbase_device *kbdev)
{
u32 base_count = 0;
@@ -83,14 +79,14 @@
* The caller must be holding onto the kctx or the call is from
* userspace.
*/
- kbase_pm_context_active(kctx->kbdev);
- kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+ kbase_pm_context_active(kbdev);
+ kbase_pm_request_gpu_cycle_counter(kbdev);
while (true) {
u32 new_count;
- new_count = kbase_reg_read(kctx->kbdev,
- GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+ new_count = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(CYCLE_COUNT_LO));
/* First time around, just store the count. */
if (base_count == 0) {
base_count = new_count;
@@ -102,7 +98,7 @@
break;
}
- kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
- kbase_pm_context_idle(kctx->kbdev);
+ kbase_pm_release_gpu_cycle_counter(kbdev);
+ kbase_pm_context_idle(kbdev);
}
#endif /* CONFIG_MALI_NO_MALI */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
index e1bd263..ece70092 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,7 @@
/**
* kbase_wait_write_flush() - Wait for GPU write flush
- * @kctx: Context pointer
+ * @kbdev: Kbase device
*
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
* its write buffer.
@@ -47,11 +47,11 @@
* This function is only in use for BASE_HW_ISSUE_6367
*/
#ifdef CONFIG_MALI_NO_MALI
-static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+static inline void kbase_wait_write_flush(struct kbase_device *kbdev)
{
}
#else
-void kbase_wait_write_flush(struct kbase_context *kctx);
+void kbase_wait_write_flush(struct kbase_device *kbdev);
#endif
#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/build.bp b/drivers/gpu/arm/midgard/build.bp
index afc39ff..ada6920 100644
--- a/drivers/gpu/arm/midgard/build.bp
+++ b/drivers/gpu/arm/midgard/build.bp
@@ -28,9 +28,6 @@
mali_midgard_dvfs: {
kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"],
},
- mali_trace_timeline: {
- kbuild_options: ["CONFIG_MALI_TRACE_TIMELINE=y"],
- },
mali_debug: {
kbuild_options: ["CONFIG_MALI_DEBUG=y"],
},
@@ -46,9 +43,13 @@
mali_system_trace: {
kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"],
},
+ mali_pwrsoft_765: {
+ kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"],
+ },
kbuild_options: [
"MALI_UNIT_TEST={{.unit_test_code}}",
"MALI_CUSTOMER_RELEASE={{.release}}",
+ "MALI_USE_CSF={{.gpu_has_csf}}",
"MALI_KERNEL_TEST_API={{.debug}}",
],
defaults: ["kernel_defaults"],
@@ -77,7 +78,6 @@
"CONFIG_MALI_MIDGARD=m",
"CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
- "MALI_KERNEL_TEST_API={{.unit_test_code}}",
"MALI_MOCK_TEST={{.mali_mock_test}}",
],
mali_error_inject: {
@@ -86,7 +86,7 @@
mali_error_inject_random: {
kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
},
- mali_prfcnt_set_secondary: {
+ cinstr_secondary_hwc: {
kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"],
},
mali_2mb_alloc: {
@@ -95,6 +95,13 @@
mali_mock_test: {
srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"],
},
+ gpu_has_csf: {
+ srcs: [
+ "csf/*.c",
+ "csf/*.h",
+ "csf/Kbuild",
+ ],
+ },
defaults: ["mali_kbase_shared_config_defaults"],
}
diff --git a/drivers/gpu/arm/midgard/ipa/Kbuild b/drivers/gpu/arm/midgard/ipa/Kbuild
index b83c1a2..3d9cf80 100644
--- a/drivers/gpu/arm/midgard/ipa/Kbuild
+++ b/drivers/gpu/arm/midgard/ipa/Kbuild
@@ -21,12 +21,8 @@
mali_kbase-y += \
ipa/mali_kbase_ipa_simple.o \
- ipa/mali_kbase_ipa.o
-
-mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
-
-ifneq ($(wildcard $(srctree)/$(src)/ipa/mali_kbase_ipa_vinstr_common.c),)
- mali_kbase-y += \
+ ipa/mali_kbase_ipa.o \
ipa/mali_kbase_ipa_vinstr_g7x.o \
ipa/mali_kbase_ipa_vinstr_common.o
-endif
+
+mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
\ No newline at end of file
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
index 2b47f31..15566f6 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -85,16 +85,6 @@
return NULL;
}
-void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
-{
- atomic_set(&kbdev->ipa_use_configured_model, false);
-}
-
-void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
-{
- atomic_set(&kbdev->ipa_use_configured_model, true);
-}
-
const char *kbase_ipa_model_name_from_id(u32 gpu_id)
{
const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
@@ -362,8 +352,6 @@
kbdev->ipa.configured_model = default_model;
}
- kbase_ipa_model_use_configured_locked(kbdev);
-
end:
if (err)
kbase_ipa_term_locked(kbdev);
@@ -452,14 +440,40 @@
return div_u64(v3c_big, 1000000);
}
+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ /* Record the event of GPU entering protected mode. */
+ kbdev->ipa_protection_mode_switched = true;
+}
+
static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
{
+ struct kbase_ipa_model *model;
+ unsigned long flags;
+
lockdep_assert_held(&kbdev->ipa.lock);
- if (atomic_read(&kbdev->ipa_use_configured_model))
- return kbdev->ipa.configured_model;
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ if (kbdev->ipa_protection_mode_switched)
+ model = kbdev->ipa.fallback_model;
else
- return kbdev->ipa.fallback_model;
+ model = kbdev->ipa.configured_model;
+
+ /*
+ * Having taken cognizance of the fact that whether GPU earlier
+ * protected mode or not, the event can be now reset (if GPU is not
+ * currently in protected mode) so that configured model is used
+ * for the next sample.
+ */
+ if (!kbdev->protected_mode)
+ kbdev->ipa_protection_mode_switched = false;
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return model;
}
static u32 get_static_power_locked(struct kbase_device *kbdev,
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
index 639fb6d..4656ded 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -155,7 +155,7 @@
/**
* kbase_ipa_init_model - Initilaize the particular IPA model
- * @kbdev: pointer to the IPA model object, already initialized
+ * @kbdev: pointer to kbase device
* @ops: pointer to object containing model specific methods.
*
* Initialize the model corresponding to the @ops pointer passed.
@@ -174,18 +174,20 @@
*/
void kbase_ipa_term_model(struct kbase_ipa_model *model);
-/* Switch to the fallback model */
-void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev);
-
-/* Switch to the model retrieved from device tree */
-void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev);
+/**
+ * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into
+ * protected mode
+ * @kbdev: pointer to kbase device
+ *
+ * Makes IPA aware of the GPU switching to protected mode.
+ */
+void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
extern struct kbase_ipa_model_ops kbase_tgox_r1_ipa_model_ops;
-#if MALI_UNIT_TEST
/**
* kbase_get_real_power() - get the real power consumption of the GPU
* @df: dynamic voltage and frequency scaling information for the GPU.
@@ -193,8 +195,7 @@
* @freq: a frequency, in HZ.
* @voltage: a voltage, in mV.
*
- * This function is only exposed for use by unit tests. The returned value
- * incorporates both static and dynamic power consumption.
+ * The returned value incorporates both static and dynamic power consumption.
*
* Return: 0 on success, or an error code.
*/
@@ -202,8 +203,10 @@
unsigned long freq,
unsigned long voltage);
+#if MALI_UNIT_TEST
/* Called by kbase_get_real_power() to invoke the power models.
* Must be called with kbdev->ipa.lock held.
+ * This function is only exposed for use by unit tests.
*/
int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
unsigned long freq,
@@ -218,10 +221,7 @@
#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
-static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev)
-{ }
-
-static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev)
+static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
{ }
#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
index 029023c..071a530 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -188,6 +188,23 @@
}
}
+static int current_power_get(void *data, u64 *val)
+{
+ struct kbase_device *kbdev = data;
+ struct devfreq *df = kbdev->devfreq;
+ u32 power;
+
+ kbase_pm_context_active(kbdev);
+ kbase_get_real_power(df, &power,
+ kbdev->current_nominal_freq, (kbdev->current_voltage / 1000));
+ kbase_pm_context_idle(kbdev);
+
+ *val = power;
+
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n");
+
static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
{
struct list_head *it;
@@ -264,5 +281,8 @@
kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model);
kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
+ debugfs_create_file("ipa_current_power", 0444,
+ kbdev->mali_debugfs_directory, kbdev, ¤t_power);
+
mutex_unlock(&kbdev->ipa.lock);
}
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
index 6ef9be8..699252d 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
@@ -83,6 +83,30 @@
return ret * coeff;
}
+s64 kbase_ipa_sum_all_memsys_blocks(
+ struct kbase_ipa_model_vinstr_data *model_data,
+ s32 coeff, u32 counter)
+{
+ struct kbase_device *kbdev = model_data->kbdev;
+ const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices;
+ u32 base = 0;
+ s64 ret = 0;
+ u32 i;
+
+ for (i = 0; i < num_blocks; i++) {
+ /* 0 < counter_value < 2^27 */
+ u32 counter_value = kbase_ipa_read_hwcnt(model_data,
+ base + counter);
+
+ /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */
+ ret = kbase_ipa_add_saturate(ret, counter_value);
+ base += KBASE_IPA_NR_BYTES_PER_BLOCK;
+ }
+
+ /* Range: -2^51 < ret * coeff < 2^51 */
+ return ret * coeff;
+}
+
s64 kbase_ipa_single_counter(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff, u32 counter)
@@ -198,8 +222,10 @@
u32 active_cycles;
int err = 0;
- if (!kbdev->ipa.vinstr_active)
+ if (!kbdev->ipa.vinstr_active) {
+ err = -ENODATA;
goto err0; /* GPU powered off - no counters to collect */
+ }
err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
BASE_HWCNT_READER_EVENT_MANUAL);
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
index ec331cc..0deafae 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
@@ -96,11 +96,12 @@
};
/**
- * sum_all_shader_cores() - sum a counter over all cores
- * @model_data pointer to model data
- * @coeff model coefficient. Unity is ~2^20, so range approx
- * +/- 4.0: -2^22 < coeff < 2^22
- * @counter offset in bytes of the counter used to calculate energy for IPA group
+ * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores
+ * @model_data: pointer to model data
+ * @coeff: model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter offset in bytes of the counter used to calculate energy
+ * for IPA group
*
* Calculate energy estimation based on hardware counter `counter'
* across all shader cores.
@@ -112,11 +113,29 @@
s32 coeff, u32 counter);
/**
- * sum_single_counter() - sum a single counter
- * @model_data pointer to model data
- * @coeff model coefficient. Unity is ~2^20, so range approx
- * +/- 4.0: -2^22 < coeff < 2^22
- * @counter offset in bytes of the counter used to calculate energy for IPA group
+ * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks
+ * @model_data: pointer to model data
+ * @coeff: model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter: offset in bytes of the counter used to calculate energy
+ * for IPA group
+ *
+ * Calculate energy estimation based on hardware counter `counter' across all
+ * memory system blocks.
+ *
+ * Return: Sum of counter values. Range: -2^51 < ret < 2^51
+ */
+s64 kbase_ipa_sum_all_memsys_blocks(
+ struct kbase_ipa_model_vinstr_data *model_data,
+ s32 coeff, u32 counter);
+
+/**
+ * kbase_ipa_single_counter() - sum a single counter
+ * @model_data: pointer to model data
+ * @coeff: model coefficient. Unity is ~2^20, so range approx
+ * +/- 4.0: -2^22 < coeff < 2^22
+ * @counter: offset in bytes of the counter used to calculate energy
+ * for IPA group
*
* Calculate energy estimation based on hardware counter `counter'.
*
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
index 6ebd703..8366033 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -30,8 +30,6 @@
#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK)
#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK)
-#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK)
/* JM counter block offsets */
#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6)
@@ -55,10 +53,6 @@
#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61)
#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62)
-/** Maximum number of cores for which a single Memory System block of performance counters is present. */
-#define KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4)
-
-
/**
* get_jm_counter() - get performance counter offset inside the Job Manager block
* @model_data: pointer to GPU model data.
@@ -98,9 +92,9 @@
static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data,
u32 counter_block_offset)
{
- const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ?
- SC0_BASE_ONE_MEMSYS :
- SC0_BASE_TWO_MEMSYS;
+ const u32 sc_base = MEMSYS_BASE +
+ (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices *
+ KBASE_IPA_NR_BYTES_PER_BLOCK);
return sc_base + counter_block_offset;
}
@@ -113,7 +107,7 @@
*
* Return: Energy estimation for a single Memory System performance counter.
*/
-static s64 kbase_g7x_memsys_single_counter(
+static s64 kbase_g7x_sum_all_memsys_blocks(
struct kbase_ipa_model_vinstr_data *model_data,
s32 coeff,
u32 offset)
@@ -121,7 +115,7 @@
u32 counter;
counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
- return kbase_ipa_single_counter(model_data, coeff, counter);
+ return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
}
/**
@@ -192,7 +186,7 @@
{
.name = "l2_access",
.default_value = 526300,
- .op = kbase_g7x_memsys_single_counter,
+ .op = kbase_g7x_sum_all_memsys_blocks,
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
},
{
@@ -225,7 +219,7 @@
{
.name = "l2_access",
.default_value = 393000,
- .op = kbase_g7x_memsys_single_counter,
+ .op = kbase_g7x_sum_all_memsys_blocks,
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
},
{
@@ -282,7 +276,7 @@
{
.name = "l2_access",
.default_value = 378100,
- .op = kbase_g7x_memsys_single_counter,
+ .op = kbase_g7x_sum_all_memsys_blocks,
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
},
};
@@ -315,7 +309,7 @@
{
.name = "l2_access",
.default_value = 551400,
- .op = kbase_g7x_memsys_single_counter,
+ .op = kbase_g7x_sum_all_memsys_blocks,
.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
},
};
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
index 73f5801..19ffd69 100644
--- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -97,6 +97,7 @@
BASE_HW_ISSUE_11042,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T720_1386,
BASE_HW_ISSUE_T76X_26,
BASE_HW_ISSUE_T76X_1909,
@@ -111,6 +112,7 @@
BASE_HW_ISSUE_T76X_3964,
BASE_HW_ISSUE_T76X_3966,
BASE_HW_ISSUE_T76X_3979,
+ BASE_HW_ISSUE_T83X_817,
BASE_HW_ISSUE_TMIX_7891,
BASE_HW_ISSUE_TMIX_7940,
BASE_HW_ISSUE_TMIX_8042,
@@ -191,6 +193,7 @@
BASE_HW_ISSUE_11035,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_3964,
GPUCORE_1619,
@@ -232,6 +235,7 @@
BASE_HW_ISSUE_11035,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_3964,
BASE_HW_ISSUE_TMIX_8438,
@@ -269,6 +273,7 @@
BASE_HW_ISSUE_11035,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
BASE_HW_ISSUE_T76X_3964,
@@ -304,6 +309,7 @@
BASE_HW_ISSUE_11042,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
BASE_HW_ISSUE_TMIX_8438,
@@ -328,6 +334,7 @@
BASE_HW_ISSUE_11042,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
BASE_HW_ISSUE_T76X_3964,
@@ -351,6 +358,7 @@
BASE_HW_ISSUE_11042,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
BASE_HW_ISSUE_TMIX_8438,
@@ -529,6 +537,7 @@
BASE_HW_ISSUE_11042,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
BASE_HW_ISSUE_T76X_3964,
@@ -549,6 +558,7 @@
BASE_HW_ISSUE_11042,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T720_1386,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
@@ -570,6 +580,7 @@
BASE_HW_ISSUE_11042,
BASE_HW_ISSUE_11051,
BASE_HW_ISSUE_11054,
+ BASE_HW_ISSUE_11056,
BASE_HW_ISSUE_T720_1386,
BASE_HW_ISSUE_T76X_1909,
BASE_HW_ISSUE_T76X_1963,
@@ -836,6 +847,7 @@
BASE_HW_ISSUE_T76X_3953,
BASE_HW_ISSUE_T76X_3960,
BASE_HW_ISSUE_T76X_3979,
+ BASE_HW_ISSUE_T83X_817,
BASE_HW_ISSUE_TMIX_7891,
BASE_HW_ISSUE_TMIX_8438,
BASE_HW_ISSUE_END
@@ -856,6 +868,7 @@
BASE_HW_ISSUE_T76X_3953,
BASE_HW_ISSUE_T76X_3960,
BASE_HW_ISSUE_T76X_3979,
+ BASE_HW_ISSUE_T83X_817,
BASE_HW_ISSUE_TMIX_7891,
BASE_HW_ISSUE_TMIX_8438,
BASE_HW_ISSUE_END
@@ -871,6 +884,7 @@
BASE_HW_ISSUE_T76X_3793,
BASE_HW_ISSUE_T76X_3964,
BASE_HW_ISSUE_T76X_3979,
+ BASE_HW_ISSUE_T83X_817,
BASE_HW_ISSUE_TMIX_7891,
GPUCORE_1619,
BASE_HW_ISSUE_TMIX_8438,
@@ -894,6 +908,7 @@
BASE_HW_ISSUE_T76X_3960,
BASE_HW_ISSUE_T76X_3964,
BASE_HW_ISSUE_T76X_3979,
+ BASE_HW_ISSUE_T83X_817,
BASE_HW_ISSUE_TMIX_7891,
BASE_HW_ISSUE_TMIX_8438,
BASE_HW_ISSUE_END
@@ -915,6 +930,7 @@
BASE_HW_ISSUE_T76X_3953,
BASE_HW_ISSUE_T76X_3960,
BASE_HW_ISSUE_T76X_3979,
+ BASE_HW_ISSUE_T83X_817,
BASE_HW_ISSUE_TMIX_7891,
BASE_HW_ISSUE_TMIX_8438,
BASE_HW_ISSUE_END
@@ -935,6 +951,7 @@
BASE_HW_ISSUE_T76X_3953,
BASE_HW_ISSUE_T76X_3960,
BASE_HW_ISSUE_T76X_3979,
+ BASE_HW_ISSUE_T83X_817,
BASE_HW_ISSUE_TMIX_7891,
BASE_HW_ISSUE_TMIX_8438,
BASE_HW_ISSUE_END
@@ -949,6 +966,7 @@
BASE_HW_ISSUE_T76X_3700,
BASE_HW_ISSUE_T76X_3793,
BASE_HW_ISSUE_T76X_3979,
+ BASE_HW_ISSUE_T83X_817,
BASE_HW_ISSUE_TMIX_7891,
GPUCORE_1619,
BASE_HW_ISSUE_END
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h
index e53528e..cc44ff2 100644
--- a/drivers/gpu/arm/midgard/mali_base_kernel.h
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -127,18 +127,19 @@
*/
#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
- /* BASE_MEM_HINT flags have been removed, but their values are reserved
- * for backwards compatibility with older user-space drivers. The values
- * can be re-used once support for r5p0 user-space drivers is removed,
- * presumably in r7p0.
- *
- * RESERVED: (1U << 5)
- * RESERVED: (1U << 6)
- * RESERVED: (1U << 7)
- * RESERVED: (1U << 8)
- */
-#define BASE_MEM_RESERVED_BIT_5 ((base_mem_alloc_flags)1 << 5)
-#define BASE_MEM_RESERVED_BIT_6 ((base_mem_alloc_flags)1 << 6)
+/* Will be permanently mapped in kernel space.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASE_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+
+/* The allocation will completely reside within the same 4GB chunk in the GPU
+ * virtual space.
+ * Since this flag is primarily required only for the TLS memory which will
+ * not be used to contain executable code and also not used for Tiler heap,
+ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
+ */
+#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
+
#define BASE_MEM_RESERVED_BIT_7 ((base_mem_alloc_flags)1 << 7)
#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
@@ -192,6 +193,7 @@
* Do not remove, use the next unreserved bit for new flags
*/
#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19
/**
* Memory starting from the end of the initial commit is aligned to 'extent'
@@ -200,11 +202,20 @@
*/
#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
+/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode.
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ * The 3 shareability flags will be ignored for GPU uncached memory.
+ * If used while importing USER_BUFFER type memory, then the import will fail
+ * if the memory is not aligned to GPU and CPU cache line width.
+ */
+#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
+
/* Number of bits used as flags for base memory management
*
* Must be kept in sync with the base_mem_alloc_flags flags
*/
-#define BASE_MEM_FLAGS_NR_BITS 21
+#define BASE_MEM_FLAGS_NR_BITS 22
/* A mask for all output bits, excluding IN/OUT bits.
*/
@@ -226,9 +237,13 @@
/* A mask of all currently reserved flags
*/
#define BASE_MEM_FLAGS_RESERVED \
- (BASE_MEM_RESERVED_BIT_5 | BASE_MEM_RESERVED_BIT_6 | \
- BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \
- BASE_MEM_RESERVED_BIT_19)
+ (BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \
+ BASE_MEM_MAYBE_RESERVED_BIT_19)
+
+/* A mask of all the flags which are only valid for allocations within kbase,
+ * and may not be passed from user space.
+ */
+#define BASE_MEM_FLAGS_KERNEL_ONLY (BASE_MEM_PERMANENT_KERNEL_MAPPING)
/* A mask of all the flags that can be returned via the base_mem_get_flags()
* interface.
@@ -236,7 +251,8 @@
#define BASE_MEM_FLAGS_QUERYABLE \
(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
- BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED))
+ BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
+ BASE_MEM_FLAGS_KERNEL_ONLY))
/**
* enum base_mem_import_type - Memory types supported by @a base_mem_import
@@ -304,13 +320,15 @@
#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
-/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+/* reserved handles ..-48<<PAGE_SHIFT> for future special handles */
#define BASE_MEM_COOKIE_BASE (64ul << 12)
#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
BASE_MEM_COOKIE_BASE)
/* Mask to detect 4GB boundary alignment */
#define BASE_MEM_MASK_4GB 0xfffff000UL
+/* Mask to detect 4GB boundary (in page units) alignment */
+#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
/**
* Limit on the 'extent' parameter for an allocation with the
@@ -326,6 +344,9 @@
/* Bit mask of cookies used for for memory allocation setup */
#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */
+/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
+#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
+
/**
* @brief Result codes of changing the size of the backing store allocated to a tmem region
@@ -643,9 +664,10 @@
/**
* SW only requirement: Just In Time allocation
*
- * This job requests a JIT allocation based on the request in the
- * @base_jit_alloc_info structure which is passed via the jc element of
- * the atom.
+ * This job requests a single or multiple JIT allocations through a list
+ * of @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom. The number of @base_jit_alloc_info structures present in the
+ * list is passed via the nr_extres element of the atom
*
* It should be noted that the id entry in @base_jit_alloc_info must not
* be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
@@ -659,9 +681,9 @@
/**
* SW only requirement: Just In Time free
*
- * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
- * to be freed. The ID of the JIT allocation is passed via the jc element of
- * the atom.
+ * This job requests a single or multiple JIT allocations created by
+ * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT
+ * allocations is passed via the jc element of the atom.
*
* The job will complete immediately.
*/
@@ -777,41 +799,20 @@
(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
/**
- * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
- * handles retaining cores for power management and affinity management.
+ * enum kbase_atom_coreref_state - States to model state machine processed by
+ * kbasep_js_job_check_ref_cores(), which handles retaining cores for power
+ * management.
*
- * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
- * where lots of atoms could be submitted before powerup, and each has an
- * affinity chosen that causes other atoms to have an affinity
- * violation. Whilst the affinity was not causing violations at the time it
- * was chosen, it could cause violations thereafter. For example, 1000 jobs
- * could have had their affinity chosen during the powerup time, so any of
- * those 1000 jobs could cause an affinity violation later on.
- *
- * The attack would otherwise occur because other atoms/contexts have to wait for:
- * -# the currently running atoms (which are causing the violation) to
- * finish
- * -# and, the atoms that had their affinity chosen during powerup to
- * finish. These are run preferentially because they don't cause a
- * violation, but instead continue to cause the violation in others.
- * -# or, the attacker is scheduled out (which might not happen for just 2
- * contexts)
- *
- * By re-choosing the affinity (which is designed to avoid violations at the
- * time it's chosen), we break condition (2) of the wait, which minimizes the
- * problem to just waiting for current jobs to finish (which can be bounded if
- * the Job Scheduling Policy has a timer).
+ * @KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: Starting state: Cores must be
+ * requested.
+ * @KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: Cores requested, but
+ * waiting for them to be powered
+ * @KBASE_ATOM_COREREF_STATE_READY: Cores are powered, atom can be submitted to
+ * HW
*/
enum kbase_atom_coreref_state {
- /** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
- /** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
- /** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
- KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
- /** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
- KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
- /** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
KBASE_ATOM_COREREF_STATE_READY
};
@@ -889,7 +890,7 @@
u64 jc; /**< job-chain GPU address */
struct base_jd_udata udata; /**< user data */
u64 extres_list; /**< list of external resources */
- u16 nr_extres; /**< nr of external resources */
+ u16 nr_extres; /**< nr of external resources or JIT allocations */
u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */
struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field,
this is done in order to reduce possibility of improper assigment of a dependency field */
@@ -1642,20 +1643,21 @@
#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
((base_context_create_flags)1 << 1)
+
/**
* Bitpattern describing the ::base_context_create_flags that can be
* passed to base_context_init()
*/
#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
- (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
- ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
+ (BASE_CONTEXT_CCTX_EMBEDDED | \
+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
/**
* Bitpattern describing the ::base_context_create_flags that can be
* passed to the kernel
*/
#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
- ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED
/*
* Private flags used on the base context
@@ -1780,4 +1782,5 @@
#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
BASE_TLSTREAM_JOB_DUMPING_ENABLED)
+
#endif /* _BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h
index 8603432..dc0d5f1 100644
--- a/drivers/gpu/arm/midgard/mali_kbase.h
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -59,7 +59,6 @@
#include "mali_kbase_context.h"
#include "mali_kbase_strings.h"
#include "mali_kbase_mem_lowlevel.h"
-#include "mali_kbase_trace_timeline.h"
#include "mali_kbase_js.h"
#include "mali_kbase_utility.h"
#include "mali_kbase_mem.h"
@@ -77,6 +76,7 @@
#include <trace/events/gpu.h>
#endif
+
#ifndef u64_to_user_ptr
/* Introduced in Linux v4.6 */
#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
@@ -239,6 +239,44 @@
void kbase_event_cleanup(struct kbase_context *kctx);
void kbase_event_wakeup(struct kbase_context *kctx);
+/**
+ * kbasep_jit_alloc_validate() - Validate the JIT allocation info.
+ *
+ * @kctx: Pointer to the kbase context within which the JIT
+ * allocation is to be validated.
+ * @info: Pointer to struct @base_jit_alloc_info
+ * which is to be validated.
+ * @return: 0 if jit allocation is valid; negative error code otherwise
+ */
+int kbasep_jit_alloc_validate(struct kbase_context *kctx,
+ struct base_jit_alloc_info *info);
+/**
+ * kbase_mem_copy_from_extres_page() - Copy pages from external resources.
+ *
+ * @kctx: kbase context within which the copying is to take place.
+ * @extres_pages: Pointer to the pages which correspond to the external
+ * resources from which the copying will take place.
+ * @pages: Pointer to the pages to which the content is to be
+ * copied from the provided external resources.
+ * @nr_pages: Number of pages to copy.
+ * @target_page_nr: Number of target pages which will be used for copying.
+ * @offset: Offset into the target pages from which the copying
+ * is to be performed.
+ * @to_copy: Size of the chunk to be copied, in bytes.
+ */
+void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+ void *extres_page, struct page **pages, unsigned int nr_pages,
+ unsigned int *target_page_nr, size_t offset, size_t *to_copy);
+/**
+ * kbase_mem_copy_from_extres() - Copy from external resources.
+ *
+ * @kctx: kbase context within which the copying is to take place.
+ * @buf_data: Pointer to the information about external resources:
+ * pages pertaining to the external resource, number of
+ * pages to copy.
+ */
+int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+ struct kbase_debug_copy_buffer *buf_data);
int kbase_process_soft_job(struct kbase_jd_atom *katom);
int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
void kbase_finish_soft_job(struct kbase_jd_atom *katom);
@@ -257,12 +295,6 @@
void kbasep_soft_job_timeout_worker(struct timer_list *timer);
void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
-/* api used internally for register access. Contains validation and tracing */
-void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
-int kbase_device_trace_buffer_install(
- struct kbase_context *kctx, u32 *tb, size_t size);
-void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
-
void kbasep_as_do_poke(struct work_struct *work);
/** Returns the name associated with a Mali exception code
@@ -293,6 +325,29 @@
}
/**
+ * kbase_pm_is_active - Determine whether the GPU is active
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This takes into account the following
+ *
+ * - whether there is an active context reference
+ *
+ * - whether any of the shader cores or the tiler are needed
+ *
+ * It should generally be preferred against checking just
+ * kbdev->pm.active_count on its own, because some code paths drop their
+ * reference on this whilst still having the shader cores/tiler in use.
+ *
+ * Return: true if the GPU is active, false otherwise
+ */
+static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
+{
+ return (kbdev->pm.active_count > 0 || kbdev->shader_needed_cnt ||
+ kbdev->tiler_needed_cnt);
+}
+
+/**
* Return the atom's ID, as was originally supplied by userspace in
* base_jd_atom_v2::atom_number
*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
index f3e71d1..8d71926 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -74,7 +74,7 @@
page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
- p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+ p = as_page(page_array[page_index]);
/* we need the first 10 words of the fragment shader job descriptor.
* We need to check that the offset + 10 words is less that the page
@@ -98,7 +98,7 @@
/* The data needed overflows page the dimension,
* need to map the subsequent page */
if (copy_size < JOB_HEADER_SIZE) {
- p = phys_to_page(as_phys_addr_t(page_array[page_index + 1]));
+ p = as_page(page_array[page_index + 1]);
page_2 = kmap_atomic(p);
kbase_sync_single_for_cpu(katom->kctx->kbdev,
@@ -181,7 +181,7 @@
/* Flush CPU cache to update memory for future GPU reads*/
memcpy(page_1, dst, copy_size);
- p = phys_to_page(as_phys_addr_t(page_array[page_index]));
+ p = as_page(page_array[page_index]);
kbase_sync_single_for_device(katom->kctx->kbdev,
kbase_dma_addr(p) + offset,
@@ -190,8 +190,7 @@
if (copy_size < JOB_HEADER_SIZE) {
memcpy(page_2, dst + copy_size,
JOB_HEADER_SIZE - copy_size);
- p = phys_to_page(as_phys_addr_t(page_array[page_index +
- 1]));
+ p = as_page(page_array[page_index + 1]);
kbase_sync_single_for_device(katom->kctx->kbdev,
kbase_dma_addr(p),
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
index 18444b8..27a03cf 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,7 +30,12 @@
/*
* The output flags should be a combination of the following values:
- * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled
+ * KBASE_REG_GPU_CACHED: GPU cache should be enabled
+ *
+ * NOTE: Some components within the GPU might only be able to access memory
+ * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for
+ * more details.
*/
u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
{
@@ -38,6 +43,9 @@
CSTD_UNUSED(nr_pages);
+ if (!(flags & BASE_MEM_UNCACHED_GPU))
+ cache_flags |= KBASE_REG_GPU_CACHED;
+
if (flags & BASE_MEM_CACHED_CPU)
cache_flags |= KBASE_REG_CPU_CACHED;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c
index 127d9bf..970be89 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_context.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -60,9 +60,6 @@
kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
#endif /* !defined(CONFIG_64BIT) */
-#ifdef CONFIG_MALI_TRACE_TIMELINE
- kctx->timeline.owner_tgid = task_tgid_nr(current);
-#endif
atomic_set(&kctx->setup_complete, 0);
atomic_set(&kctx->setup_in_progress, 0);
spin_lock_init(&kctx->mm_update_lock);
@@ -106,11 +103,12 @@
if (err)
goto free_jd;
+
atomic_set(&kctx->drain_pending, 0);
mutex_init(&kctx->reg_lock);
- mutex_init(&kctx->mem_partials_lock);
+ spin_lock_init(&kctx->mem_partials_lock);
INIT_LIST_HEAD(&kctx->mem_partials);
INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
@@ -119,21 +117,10 @@
if (err)
goto free_event;
- err = kbase_mmu_init(kctx);
+ err = kbase_mmu_init(kbdev, &kctx->mmu, kctx);
if (err)
goto term_dma_fence;
- do {
- err = kbase_mem_pool_grow(&kctx->mem_pool,
- MIDGARD_MMU_BOTTOMLEVEL);
- if (err)
- goto pgd_no_mem;
-
- mutex_lock(&kctx->mmu_lock);
- kctx->pgd = kbase_mmu_alloc_pgd(kctx);
- mutex_unlock(&kctx->mmu_lock);
- } while (!kctx->pgd);
-
p = kbase_mem_alloc_page(&kctx->mem_pool);
if (!p)
goto no_sink_page;
@@ -143,6 +130,7 @@
kctx->cookies = KBASE_COOKIE_MASK;
+
/* Make sure page 0 is not used... */
err = kbase_region_tracker_init(kctx);
if (err)
@@ -158,9 +146,6 @@
#ifdef CONFIG_GPU_TRACEPOINTS
atomic_set(&kctx->jctx.work_id, 0);
#endif
-#ifdef CONFIG_MALI_TRACE_TIMELINE
- atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
-#endif
kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
@@ -180,12 +165,7 @@
no_region_tracker:
kbase_mem_pool_free(&kctx->mem_pool, p, false);
no_sink_page:
- /* VM lock needed for the call to kbase_mmu_free_pgd */
- kbase_gpu_vm_lock(kctx);
- kbase_mmu_free_pgd(kctx);
- kbase_gpu_vm_unlock(kctx);
-pgd_no_mem:
- kbase_mmu_term(kctx);
+ kbase_mmu_term(kbdev, &kctx->mmu);
term_dma_fence:
kbase_dma_fence_term(kctx);
free_event:
@@ -207,9 +187,10 @@
}
KBASE_EXPORT_SYMBOL(kbase_create_context);
-static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+static void kbase_reg_pending_dtor(struct kbase_device *kbdev,
+ struct kbase_va_region *reg)
{
- dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+ dev_dbg(kbdev->dev, "Freeing pending unmapped region\n");
kbase_mem_phy_alloc_put(reg->cpu_alloc);
kbase_mem_phy_alloc_put(reg->gpu_alloc);
kfree(reg);
@@ -248,6 +229,7 @@
kbase_event_cleanup(kctx);
+
/*
* JIT must be terminated before the code below as it must be called
* without the region lock being held.
@@ -260,11 +242,8 @@
kbase_sticky_resource_term(kctx);
- /* MMU is disabled as part of scheduling out the context */
- kbase_mmu_free_pgd(kctx);
-
/* drop the aliasing sink page now that it can't be mapped anymore */
- p = phys_to_page(as_phys_addr_t(kctx->aliasing_sink_page));
+ p = as_page(kctx->aliasing_sink_page);
kbase_mem_pool_free(&kctx->mem_pool, p, false);
/* free pending region setups */
@@ -274,7 +253,7 @@
BUG_ON(!kctx->pending_regions[cookie]);
- kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+ kbase_reg_pending_dtor(kbdev, kctx->pending_regions[cookie]);
kctx->pending_regions[cookie] = NULL;
pending_regions_to_clean &= ~(1UL << cookie);
@@ -283,6 +262,7 @@
kbase_region_tracker_term(kctx);
kbase_gpu_vm_unlock(kctx);
+
/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
kbasep_js_kctx_term(kctx);
@@ -296,7 +276,7 @@
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
- kbase_mmu_term(kctx);
+ kbase_mmu_term(kbdev, &kctx->mmu);
pages = atomic_read(&kctx->used_pages);
if (pages != 0)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
index d696b40..d44ebd9 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -164,7 +164,6 @@
inited_backend_late = (1u << 6),
inited_device = (1u << 7),
inited_vinstr = (1u << 8),
-
inited_job_fault = (1u << 10),
inited_sysfs_group = (1u << 11),
inited_misc_register = (1u << 12),
@@ -282,9 +281,9 @@
*/
static int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
{
- char buf[32];
+ char buf[4];
- count = min(sizeof(buf), count);
+ count = min(count, sizeof(buf) - 1);
if (copy_from_user(buf, s, count))
return -EFAULT;
@@ -579,12 +578,27 @@
u64 flags = alloc->in.flags;
u64 gpu_va;
+ rcu_read_lock();
+ /* Don't allow memory allocation until user space has set up the
+ * tracking page (which sets kctx->process_mm). Also catches when we've
+ * forked.
+ */
+ if (rcu_dereference(kctx->process_mm) != current->mm) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ rcu_read_unlock();
+
+ if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+ return -ENOMEM;
+
if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
/* force SAME_VA if a 64-bit client */
flags |= BASE_MEM_SAME_VA;
}
+
reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
alloc->in.commit_pages,
alloc->in.extent,
@@ -822,6 +836,10 @@
}
flags = alias->in.flags;
+ if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) {
+ vfree(ai);
+ return -EINVAL;
+ }
alias->out.gpu_va = kbase_mem_alias(kctx, &flags,
alias->in.stride, alias->in.nents,
@@ -843,6 +861,9 @@
int ret;
u64 flags = import->in.flags;
+ if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+ return -ENOMEM;
+
ret = kbase_mem_import(kctx,
import->in.type,
u64_to_user_ptr(import->in.phandle),
@@ -859,6 +880,9 @@
static int kbase_api_mem_flags_change(struct kbase_context *kctx,
struct kbase_ioctl_mem_flags_change *change)
{
+ if (change->flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+ return -ENOMEM;
+
return kbase_mem_flags_change(kctx, change->gpu_va,
change->flags, change->mask);
}
@@ -1039,6 +1063,7 @@
}
#endif /* MALI_UNIT_TEST */
+
#define KBASE_HANDLE_IOCTL(cmd, function) \
do { \
BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \
@@ -1495,111 +1520,6 @@
*/
static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
-/**
- * show_ca_policy - Show callback for the core_availability_policy sysfs file.
- *
- * This function is called to get the contents of the core_availability_policy
- * sysfs file. This is a list of the available policies with the currently
- * active one surrounded by square brackets.
- *
- * @dev: The device this sysfs file is for
- * @attr: The attributes of the sysfs file
- * @buf: The output buffer for the sysfs file contents
- *
- * Return: The number of bytes output to @buf.
- */
-static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
-{
- struct kbase_device *kbdev;
- const struct kbase_pm_ca_policy *current_policy;
- const struct kbase_pm_ca_policy *const *policy_list;
- int policy_count;
- int i;
- ssize_t ret = 0;
-
- kbdev = to_kbase_device(dev);
-
- if (!kbdev)
- return -ENODEV;
-
- current_policy = kbase_pm_ca_get_policy(kbdev);
-
- policy_count = kbase_pm_ca_list_policies(&policy_list);
-
- for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
- if (policy_list[i] == current_policy)
- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
- else
- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
- }
-
- if (ret < PAGE_SIZE - 1) {
- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
- } else {
- buf[PAGE_SIZE - 2] = '\n';
- buf[PAGE_SIZE - 1] = '\0';
- ret = PAGE_SIZE - 1;
- }
-
- return ret;
-}
-
-/**
- * set_ca_policy - Store callback for the core_availability_policy sysfs file.
- *
- * This function is called when the core_availability_policy sysfs file is
- * written to. It matches the requested policy against the available policies
- * and if a matching policy is found calls kbase_pm_set_policy() to change
- * the policy.
- *
- * @dev: The device with sysfs file is for
- * @attr: The attributes of the sysfs file
- * @buf: The value written to the sysfs file
- * @count: The number of bytes written to the sysfs file
- *
- * Return: @count if the function succeeded. An error code on failure.
- */
-static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
- struct kbase_device *kbdev;
- const struct kbase_pm_ca_policy *new_policy = NULL;
- const struct kbase_pm_ca_policy *const *policy_list;
- int policy_count;
- int i;
-
- kbdev = to_kbase_device(dev);
-
- if (!kbdev)
- return -ENODEV;
-
- policy_count = kbase_pm_ca_list_policies(&policy_list);
-
- for (i = 0; i < policy_count; i++) {
- if (sysfs_streq(policy_list[i]->name, buf)) {
- new_policy = policy_list[i];
- break;
- }
- }
-
- if (!new_policy) {
- dev_err(dev, "core_availability_policy: policy not found\n");
- return -EINVAL;
- }
-
- kbase_pm_ca_set_policy(kbdev, new_policy);
-
- return count;
-}
-
-/*
- * The sysfs file core_availability_policy
- *
- * This is used for obtaining information about the available policies,
- * determining which policy is currently active, and changing the active
- * policy.
- */
-static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
-
/*
* show_core_mask - Show callback for the core_mask sysfs file.
*
@@ -2410,7 +2330,7 @@
{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
.name = "Mali-G51" },
{ .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
- .name = "Mali-TNOx" },
+ .name = "Mali-G76" },
{ .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
.name = "Mali-G31" },
{ .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
@@ -3195,6 +3115,7 @@
kbdev->reg_start = reg_res->start;
kbdev->reg_size = resource_size(reg_res);
+
err = kbase_common_reg_map(kbdev);
if (err) {
dev_err(kbdev->dev, "Failed to map registers\n");
@@ -3310,6 +3231,7 @@
#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
}
+#ifdef MALI_KBASE_BUILD
#ifdef CONFIG_DEBUG_FS
#if KBASE_GPU_RESET_EN
@@ -3370,7 +3292,7 @@
ssize_t ret_val;
kbase_pm_context_active(kbdev);
- gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL);
+ gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS));
kbase_pm_context_idle(kbdev);
if (gpu_status & GPU_DBGEN)
@@ -3464,10 +3386,6 @@
kbasep_trace_debugfs_init(kbdev);
#endif /* KBASE_TRACE_ENABLE */
-#ifdef CONFIG_MALI_TRACE_TIMELINE
- kbasep_trace_timeline_debugfs_init(kbdev);
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
#ifdef CONFIG_MALI_DEVFREQ
#ifdef CONFIG_DEVFREQ_THERMAL
if (kbdev->inited_subsys & inited_devfreq)
@@ -3501,6 +3419,7 @@
static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
#endif /* CONFIG_DEBUG_FS */
+#endif /* MALI_KBASE_BUILD */
static void kbase_device_coherency_init(struct kbase_device *kbdev,
unsigned prod_id)
@@ -3589,7 +3508,6 @@
&dev_attr_reset_timeout.attr,
&dev_attr_js_scheduling_period.attr,
&dev_attr_power_policy.attr,
- &dev_attr_core_availability_policy.attr,
&dev_attr_core_mask.attr,
&dev_attr_mem_pool_size.attr,
&dev_attr_mem_pool_max_size.attr,
@@ -3643,10 +3561,12 @@
kbdev->inited_subsys &= ~inited_get_device;
}
+#ifdef MALI_KBASE_BUILD
if (kbdev->inited_subsys & inited_debugfs) {
kbase_device_debugfs_term(kbdev);
kbdev->inited_subsys &= ~inited_debugfs;
}
+#endif
if (kbdev->inited_subsys & inited_job_fault) {
kbase_debug_job_fault_dev_term(kbdev);
@@ -3660,6 +3580,7 @@
}
#endif
+
if (kbdev->inited_subsys & inited_vinstr) {
kbase_vinstr_term(kbdev->vinstr_ctx);
kbdev->inited_subsys &= ~inited_vinstr;
@@ -3913,6 +3834,7 @@
}
kbdev->inited_subsys |= inited_vinstr;
+
#ifdef CONFIG_MALI_DEVFREQ
/* Devfreq uses vinstr, so must be initialized after it. */
err = kbase_devfreq_init(kbdev);
@@ -3922,6 +3844,7 @@
dev_err(kbdev->dev, "Continuing without devfreq\n");
#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef MALI_KBASE_BUILD
err = kbase_debug_job_fault_dev_init(kbdev);
if (err) {
dev_err(kbdev->dev, "Job fault debug initialization failed\n");
@@ -4000,6 +3923,7 @@
"Probed as %s\n", dev_name(kbdev->mdev.this_device));
kbase_dev_nr++;
+#endif /* MALI_KBASE_BUILD */
return err;
}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
index 85a6afd..bda0560 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -121,7 +121,8 @@
kctx->as_nr = free_as;
kbdev->as_to_kctx[free_as] = kctx;
- kbase_mmu_update(kctx);
+ kbase_mmu_update(kbdev, &kctx->mmu,
+ kctx->as_nr);
}
} else {
atomic_dec(&kctx->refcount);
@@ -193,7 +194,8 @@
if (atomic_read(&kctx->refcount)) {
WARN_ON(kctx->as_nr != i);
- kbase_mmu_update(kctx);
+ kbase_mmu_update(kbdev, &kctx->mmu,
+ kctx->as_nr);
} else {
/* This context might have been assigned an
* AS before, clear it.
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
index 857fe97..ee45529 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -130,7 +130,7 @@
if (!(map->flags & KBASE_REG_CPU_CACHED))
prot = pgprot_writecombine(prot);
- page = phys_to_page(as_phys_addr_t(map->alloc->pages[data->offset]));
+ page = as_page(map->alloc->pages[data->offset]);
mapping = vmap(&page, 1, VM_MAP, prot);
if (!mapping)
goto out;
@@ -223,12 +223,6 @@
goto out;
}
- ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
- if (ret != 0) {
- kbase_gpu_vm_unlock(kctx);
- goto out;
- }
-
ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
if (0 != ret) {
kbase_gpu_vm_unlock(kctx);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h
index 1401f47..4adfe35 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -42,6 +42,7 @@
#include <mali_kbase_gpuprops_types.h>
#include <protected_mode_switcher.h>
+
#include <linux/atomic.h>
#include <linux/mempool.h>
#include <linux/slab.h>
@@ -52,7 +53,6 @@
#include <linux/bus_logger.h>
#endif
-
#if defined(CONFIG_SYNC)
#include <sync.h>
#else
@@ -171,6 +171,13 @@
/* Maximum force replay limit when randomization is enabled */
#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+/* Maximum number of pages of memory that require a permanent mapping, per
+ * kbase_context
+ */
+#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((1024ul * 1024ul) >> \
+ PAGE_SHIFT)
+
+
/** Atom has been previously soft-stoppped */
#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
/** Atom has been previously retried to execute */
@@ -504,7 +511,6 @@
* external resources referenced by the atom.
* @device_nr: indicates the coregroup with which the atom is associated,
* when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified.
- * @affinity: bitmask of the shader cores on which the atom can execute.
* @jc: GPU address of the job-chain.
* @softjob_data: Copy of data read from the user space buffer that @jc
* points to.
@@ -600,7 +606,6 @@
struct kbase_ext_res *extres;
u32 device_nr;
- u64 affinity;
u64 jc;
void *softjob_data;
enum kbase_atom_coreref_state coreref_state;
@@ -698,7 +703,7 @@
enum kbase_atom_gpu_rb_state gpu_rb_state;
- u64 need_cache_flush_cores_retained;
+ bool need_cache_flush_cores_retained;
atomic_t blocked;
@@ -731,6 +736,30 @@
u32 age;
};
+/**
+ * struct kbase_debug_copy_buffer - information about the buffer to be copied.
+ *
+ * @size: size of the buffer in bytes
+ * @pages: pointer to an array of pointers to the pages which contain
+ * the buffer
+ * @nr_pages: number of pages
+ * @offset: offset into the pages
+ * @gpu_alloc: pointer to physical memory allocated by the GPU
+ * @extres_pages: array of pointers to the pages containing external resources
+ * for this buffer
+ * @nr_extres_pages: number of pages in @extres_pages
+ */
+struct kbase_debug_copy_buffer {
+ size_t size;
+ struct page **pages;
+ int nr_pages;
+ size_t offset;
+ struct kbase_mem_phy_alloc *gpu_alloc;
+
+ struct page **extres_pages;
+ int nr_extres_pages;
+};
+
static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
{
return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
@@ -872,6 +901,28 @@
struct hrtimer poke_timer;
};
+/**
+ * struct kbase_mmu_table - object representing a set of GPU page tables
+ * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries
+ * of top & intermediate level page tables to avoid
+ * repeated calls to kmap_atomic during the MMU teardown.
+ * @mmu_lock: Lock to serialize the accesses made to multi level GPU
+ * page tables
+ * @pgd: Physical address of the page allocated for the top
+ * level page table of the context, this is used for
+ * MMU HW programming as the address translation will
+ * start from the top level page table.
+ * @kctx: If this set of MMU tables belongs to a context then
+ * this is a back-reference to the context, otherwise
+ * it is NULL
+ */
+struct kbase_mmu_table {
+ u64 *mmu_teardown_pages;
+ struct mutex mmu_lock;
+ phys_addr_t pgd;
+ struct kbase_context *kctx;
+};
+
static inline int kbase_as_has_bus_fault(struct kbase_as *as)
{
return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
@@ -952,88 +1003,6 @@
u8 flags;
};
-/** Event IDs for the power management framework.
- *
- * Any of these events might be missed, so they should not be relied upon to
- * find the precise state of the GPU at a particular time in the
- * trace. Overall, we should get a high percentage of these events for
- * statisical purposes, and so a few missing should not be a problem */
-enum kbase_timeline_pm_event {
- /* helper for tests */
- KBASEP_TIMELINE_PM_EVENT_FIRST,
-
- /** Event reserved for backwards compatibility with 'init' events */
- KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
-
- /** The power state of the device has changed.
- *
- * Specifically, the device has reached a desired or available state.
- */
- KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
-
- /** The GPU is becoming active.
- *
- * This event is sent when the first context is about to use the GPU.
- */
- KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
-
- /** The GPU is becoming idle.
- *
- * This event is sent when the last context has finished using the GPU.
- */
- KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
-
- /** Event reserved for backwards compatibility with 'policy_change'
- * events */
- KBASE_TIMELINE_PM_EVENT_RESERVED_4,
-
- /** Event reserved for backwards compatibility with 'system_suspend'
- * events */
- KBASE_TIMELINE_PM_EVENT_RESERVED_5,
-
- /** Event reserved for backwards compatibility with 'system_resume'
- * events */
- KBASE_TIMELINE_PM_EVENT_RESERVED_6,
-
- /** The job scheduler is requesting to power up/down cores.
- *
- * This event is sent when:
- * - powered down cores are needed to complete a job
- * - powered up cores are not needed anymore
- */
- KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
-
- KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
-};
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-struct kbase_trace_kctx_timeline {
- atomic_t jd_atoms_in_flight;
- u32 owner_tgid;
-};
-
-struct kbase_trace_kbdev_timeline {
- /* Note: strictly speaking, not needed, because it's in sync with
- * kbase_device::jm_slots[]::submitted_nr
- *
- * But it's kept as an example of how to add global timeline tracking
- * information
- *
- * The caller must hold hwaccess_lock when accessing this */
- u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
-
- /* Last UID for each PM event */
- atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
- /* Counter for generating PM event UIDs */
- atomic_t pm_event_uid_counter;
- /*
- * L2 transition state - true indicates that the transition is ongoing
- * Expected to be protected by hwaccess_lock */
- bool l2_transitioning;
-};
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
-
struct kbasep_kctx_list_element {
struct list_head link;
struct kbase_context *kctx;
@@ -1055,7 +1024,11 @@
*/
struct mutex lock;
- /** The reference count of active contexts on this device. */
+ /**
+ * The reference count of active contexts on this device. Note that
+ * some code paths keep shaders/the tiler powered whilst this is 0. Use
+ * kbase_pm_is_active() instead to check for such cases.
+ */
int active_count;
/** Flag indicating suspending/suspended */
bool suspending;
@@ -1148,9 +1121,33 @@
u64 core_mask;
};
+/* MMU mode flags */
+#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */
+
+/**
+ * struct kbase_mmu_mode - object containing pointer to methods invoked for
+ * programming the MMU, as per the MMU mode supported
+ * by Hw.
+ * @update: enable & setup/configure one of the GPU address space.
+ * @get_as_setup: retrieve the configuration of one of the GPU address space.
+ * @disable_as: disable one of the GPU address space.
+ * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry.
+ * @ate_is_valid: check if the pte is a valid address translation entry
+ * encoding the physical address of the actual mapped page.
+ * @pte_is_valid: check if the pte is a valid entry encoding the physical
+ * address of the next lower level page table.
+ * @entry_set_ate: program the pte to be a valid address translation entry to
+ * encode the physical address of the actual page being mapped.
+ * @entry_set_pte: program the pte to be a valid entry to encode the physical
+ * address of the next lower level page table.
+ * @entry_invalidate: clear out or invalidate the pte.
+ * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants.
+ */
struct kbase_mmu_mode {
- void (*update)(struct kbase_context *kctx);
- void (*get_as_setup)(struct kbase_context *kctx,
+ void (*update)(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ int as_nr);
+ void (*get_as_setup)(struct kbase_mmu_table *mmut,
struct kbase_mmu_setup * const setup);
void (*disable_as)(struct kbase_device *kbdev, int as_nr);
phys_addr_t (*pte_to_phy_addr)(u64 entry);
@@ -1160,6 +1157,7 @@
unsigned long flags, unsigned int level);
void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
void (*entry_invalidate)(u64 *entry);
+ unsigned long flags;
};
struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
@@ -1168,6 +1166,7 @@
#define DEVNAME_SIZE 16
+
/**
* struct kbase_device - Object representing an instance of GPU platform device,
* allocated from the probe method of mali driver.
@@ -1234,7 +1233,7 @@
* @mmu_mode: Pointer to the object containing methods for programming
* the MMU, depending on the type of MMU supported by Hw.
* @as: Array of objects representing address spaces of GPU.
- * @as_free: Bitpattern of free/available address space lots
+ * @as_free: Bitpattern of free/available GPU address spaces.
* @as_to_kctx: Array of pointers to struct kbase_context, having
* GPU adrress spaces assigned to them.
* @mmu_mask_change: Lock to serialize the access to MMU interrupt mask
@@ -1243,26 +1242,12 @@
* configuration/properties of GPU HW device in use.
* @hw_issues_mask: List of SW workarounds for HW issues
* @hw_features_mask: List of available HW features.
- * shader_inuse_bitmap: Bitmaps of shader cores that are currently in use.
- * These should be kept up to date by the job scheduler.
- * The bit to be set in this bitmap should already be set
- * in the @shader_needed_bitmap.
- * @pm.power_change_lock should be held when accessing
- * these members.
- * @shader_inuse_cnt: Usage count for each of the 64 shader cores
- * @shader_needed_bitmap: Bitmaps of cores the JS needs for jobs ready to run
- * kbase_pm_check_transitions_nolock() should be called
- * when the bitmap is modified to update the power
- * management system and allow transitions to occur.
- * @shader_needed_cnt: Count for each of the 64 shader cores, incremented
- * when the core is requested for use and decremented
- * later when the core is known to be powered up for use.
- * @tiler_inuse_cnt: Usage count for the Tiler block. @tiler_needed_cnt
- * should be non zero at the time of incrementing the
- * usage count.
+ * @shader_needed_cnt: Count for the 64 shader cores, incremented when
+ * shaders are requested for use and decremented later
+ * when they are no longer required.
* @tiler_needed_cnt: Count for the Tiler block shader cores, incremented
* when Tiler is requested for use and decremented
- * later when Tiler is known to be powered up for use.
+ * later when the Tiler is no longer required.
* @disjoint_event: struct for keeping track of the disjoint information,
* that whether the GPU is in a disjoint state and the
* number of disjoint events that have occurred on GPU.
@@ -1331,8 +1316,10 @@
* GPU device.
* @devfreq_cooling: Pointer returned on registering devfreq cooling device
* corresponding to @devfreq.
- * @ipa_use_configured_model: set to TRUE when configured model is used for IPA and
- * FALSE when fallback model is used.
+ * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected
+ * mode. It is a sticky flag which is cleared by IPA
+ * once it has made use of information that GPU had
+ * previously entered protected mode.
* @ipa: Top level structure for IPA, containing pointers to both
* configured & fallback models.
* @timeline: Stores the global timeline tracking information.
@@ -1467,7 +1454,6 @@
u16 as_free; /* Bitpattern of free Address Spaces */
struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
-
spinlock_t mmu_mask_change;
struct kbase_gpu_props gpu_props;
@@ -1475,17 +1461,8 @@
unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
- u64 shader_inuse_bitmap;
-
- u32 shader_inuse_cnt[64];
-
- u64 shader_needed_bitmap;
-
- u32 shader_needed_cnt[64];
-
- u32 tiler_inuse_cnt;
-
u32 tiler_needed_cnt;
+ u32 shader_needed_cnt;
struct {
atomic_t count;
@@ -1549,7 +1526,7 @@
#else
struct thermal_cooling_device *devfreq_cooling;
#endif
- atomic_t ipa_use_configured_model;
+ bool ipa_protection_mode_switched;
struct {
/* Access to this struct must be with ipa.lock held */
struct mutex lock;
@@ -1585,11 +1562,6 @@
#endif /* CONFIG_DEVFREQ_THERMAL */
#endif /* CONFIG_MALI_DEVFREQ */
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
- struct kbase_trace_kbdev_timeline timeline;
-#endif
-
bool job_fault_debug;
#ifdef CONFIG_DEBUG_FS
@@ -1682,6 +1654,7 @@
/* See KBASE_JS_*_PRIORITY_MODE for details. */
u32 js_ctx_scheduling_mode;
+
};
/**
@@ -1785,6 +1758,7 @@
DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
};
+
/**
* struct kbase_context - Object representing an entity, among which GPU is
* scheduled and gets its own GPU address space.
@@ -1792,14 +1766,12 @@
* @filp: Pointer to the struct file corresponding to device file
* /dev/malixx instance, passed to the file's open method.
* @kbdev: Pointer to the Kbase device for which the context is created.
+ * @mmu: Structure holding details of the MMU tables for this
+ * context
* @id: Unique indentifier for the context, indicates the number of
* contexts which have been created for the device so far.
* @api_version: contains the version number for User/kernel interface,
* used for compatibility check.
- * @pgd: Physical address of the page allocated for the top level
- * page table of the context, this will be used for MMU Hw
- * programming as the address translation will start from
- * the top level page table.
* @event_list: list of posted events about completed atoms, to be sent to
* event handling thread of Userpsace.
* @event_coalesce_list: list containing events corresponding to successive atoms
@@ -1823,9 +1795,6 @@
* @api_version value 0.
* @setup_in_progress: Indicates if the context's setup is in progress and other
* setup calls during that shall be rejected.
- * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries of
- * top & intermediate level page tables to avoid repeated
- * calls to kmap_atomic during the MMU teardown.
* @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations,
* which can alias number of memory regions. The page is
* represent a region where it is mapped with a write-alloc
@@ -1836,18 +1805,12 @@
* @mem_partials: List head for the list of large pages, 2MB in size, which
* which have been split into 4 KB pages and are used
* partially for the allocations >= 2 MB in size.
- * @mmu_lock: Lock to serialize the accesses made to multi level GPU
- * page tables, maintained for every context.
* @reg_lock: Lock used for GPU virtual address space management operations,
* like adding/freeing a memory region in the address space.
* Can be converted to a rwlock ?.
* @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA
* zone of the GPU virtual address space. Used for allocations
* having the same value for GPU & CPU virtual address.
- * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC
- * zone of the GPU virtual address space. Used for
- * allocations containing executable code for
- * shader programs.
* @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA
* zone of the GPU virtual address space.
* @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for
@@ -1875,6 +1838,7 @@
* @nonmapped_pages: Updated in the same way as @used_pages, except for the case
* when special tracking page is freed by userspace where it
* is reset to 0.
+ * @permanent_mapped_pages: Usage count of permanently mapped memory
* @mem_pool: Object containing the state for the context specific pool of
* 4KB size physical pages.
* @lp_mem_pool: Object containing the state for the context specific pool of
@@ -2017,9 +1981,10 @@
struct kbase_context {
struct file *filp;
struct kbase_device *kbdev;
+ struct kbase_mmu_table mmu;
+
u32 id;
unsigned long api_version;
- phys_addr_t pgd;
struct list_head event_list;
struct list_head event_coalesce_list;
struct mutex event_mutex;
@@ -2033,19 +1998,16 @@
atomic_t setup_complete;
atomic_t setup_in_progress;
- u64 *mmu_teardown_pages;
-
struct tagged_addr aliasing_sink_page;
- struct mutex mem_partials_lock;
+ spinlock_t mem_partials_lock;
struct list_head mem_partials;
- struct mutex mmu_lock;
struct mutex reg_lock;
struct rb_root reg_rbtree_same;
- struct rb_root reg_rbtree_exec;
struct rb_root reg_rbtree_custom;
+
unsigned long cookies;
struct kbase_va_region *pending_regions[BITS_PER_LONG];
@@ -2056,6 +2018,7 @@
struct kbase_jd_context jctx;
atomic_t used_pages;
atomic_t nonmapped_pages;
+ unsigned long permanent_mapped_pages;
struct kbase_mem_pool mem_pool;
struct kbase_mem_pool lp_mem_pool;
@@ -2076,6 +2039,7 @@
atomic_t refcount;
+
/* NOTE:
*
* Flags are in jctx.sched_info.ctx.flags
@@ -2083,12 +2047,9 @@
*
* All other flags must be added there */
spinlock_t mm_update_lock;
- struct mm_struct *process_mm;
+ struct mm_struct __rcu *process_mm;
u64 same_va_end;
-#ifdef CONFIG_MALI_TRACE_TIMELINE
- struct kbase_trace_kctx_timeline timeline;
-#endif
#ifdef CONFIG_DEBUG_FS
char *mem_profile_data;
size_t mem_profile_size;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c
index e58e27c..804cf3f 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_device.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -225,14 +225,6 @@
mutex_init(&kbdev->cacheclean_lock);
-#ifdef CONFIG_MALI_TRACE_TIMELINE
- for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
- kbdev->timeline.slot_atoms_submitted[i] = 0;
-
- for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
- atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
kbdev->kbase_profiling_controls[i] = 0;
@@ -285,91 +277,6 @@
kfree(kbdev);
}
-int kbase_device_trace_buffer_install(
- struct kbase_context *kctx, u32 *tb, size_t size)
-{
- unsigned long flags;
-
- KBASE_DEBUG_ASSERT(kctx);
- KBASE_DEBUG_ASSERT(tb);
-
- /* Interface uses 16-bit value to track last accessed entry. Each entry
- * is composed of two 32-bit words.
- * This limits the size that can be handled without an overflow. */
- if (0xFFFF * (2 * sizeof(u32)) < size)
- return -EINVAL;
-
- /* set up the header */
- /* magic number in the first 4 bytes */
- tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
- /* Store (write offset = 0, wrap counter = 0, transaction active = no)
- * write offset 0 means never written.
- * Offsets 1 to (wrap_offset - 1) used to store values when trace started
- */
- tb[1] = 0;
-
- /* install trace buffer */
- spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
- kctx->jctx.tb_wrap_offset = size / 8;
- kctx->jctx.tb = tb;
- spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
-
- return 0;
-}
-
-void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
-{
- unsigned long flags;
-
- KBASE_DEBUG_ASSERT(kctx);
- spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
- kctx->jctx.tb = NULL;
- kctx->jctx.tb_wrap_offset = 0;
- spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
-}
-
-void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
- if (kctx->jctx.tb) {
- u16 wrap_count;
- u16 write_offset;
- u32 *tb = kctx->jctx.tb;
- u32 header_word;
-
- header_word = tb[1];
- KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
-
- wrap_count = (header_word >> 1) & 0x7FFF;
- write_offset = (header_word >> 16) & 0xFFFF;
-
- /* mark as transaction in progress */
- tb[1] |= 0x1;
- mb();
-
- /* calculate new offset */
- write_offset++;
- if (write_offset == kctx->jctx.tb_wrap_offset) {
- /* wrap */
- write_offset = 1;
- wrap_count++;
- wrap_count &= 0x7FFF; /* 15bit wrap counter */
- }
-
- /* store the trace entry at the selected offset */
- tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
- tb[write_offset * 2 + 1] = reg_value;
- mb();
-
- /* new header word */
- header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */
- tb[1] = header_word;
- }
- spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
-}
-
/*
* Device trace functions
*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c
index e290fce..3c9cef3 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_event.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -38,8 +38,6 @@
data = katom->udata;
- KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
-
KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
KBASE_TLSTREAM_TL_DEL_ATOM(katom);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
index 040b209..7077c3a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -73,10 +73,26 @@
hardware_counters = hardware_counters_mali_tSIx;
count = ARRAY_SIZE(hardware_counters_mali_tSIx);
break;
+ case GPU_ID2_PRODUCT_TDVX:
+ hardware_counters = hardware_counters_mali_tSIx;
+ count = ARRAY_SIZE(hardware_counters_mali_tSIx);
+ break;
case GPU_ID2_PRODUCT_TNOX:
hardware_counters = hardware_counters_mali_tNOx;
count = ARRAY_SIZE(hardware_counters_mali_tNOx);
break;
+ case GPU_ID2_PRODUCT_TGOX:
+ hardware_counters = hardware_counters_mali_tGOx;
+ count = ARRAY_SIZE(hardware_counters_mali_tGOx);
+ break;
+ case GPU_ID2_PRODUCT_TKAX:
+ hardware_counters = hardware_counters_mali_tKAx;
+ count = ARRAY_SIZE(hardware_counters_mali_tKAx);
+ break;
+ case GPU_ID2_PRODUCT_TTRX:
+ hardware_counters = hardware_counters_mali_tTRx;
+ count = ARRAY_SIZE(hardware_counters_mali_tTRx);
+ break;
default:
hardware_counters = NULL;
count = 0;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
index a17870d..c1e315b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h
@@ -116,10 +116,10 @@
"",
"TTRx_BUS_WRITE",
"TTRx_LOADING_DESC",
- "",
- "",
- "",
- "",
+ "TTRx_IDVS_POS_SHAD_REQ",
+ "TTRx_IDVS_POS_SHAD_WAIT",
+ "TTRx_IDVS_POS_SHAD_STALL",
+ "TTRx_IDVS_POS_FIFO_FULL",
"TTRx_PREFETCH_STALL",
"TTRx_VCACHE_HIT",
"TTRx_VCACHE_MISS",
@@ -129,11 +129,11 @@
"TTRx_VFETCH_STALL",
"TTRx_PRIMASSY_STALL",
"TTRx_BBOX_GEN_STALL",
- "",
- "",
- "",
- "",
- "",
+ "TTRx_IDVS_VBU_HIT",
+ "TTRx_IDVS_VBU_MISS",
+ "TTRx_IDVS_VBU_LINE_DEALLOCATE",
+ "TTRx_IDVS_VAR_SHAD_REQ",
+ "TTRx_IDVS_VAR_SHAD_STALL",
"TTRx_BINNER_STALL",
"TTRx_ITER_STALL",
"TTRx_COMPRESS_MISS",
@@ -196,15 +196,15 @@
"TTRx_EXEC_ICACHE_MISS",
"TTRx_EXEC_STARVE_ARITH",
"TTRx_CALL_BLEND_SHADER",
- "TTRx_TEX_INSTR",
- "TTRx_TEX_INSTR_MIPMAP",
- "TTRx_TEX_INSTR_COMPRESSED",
- "TTRx_TEX_INSTR_3D",
- "TTRx_TEX_INSTR_TRILINEAR",
- "TTRx_TEX_COORD_ISSUE",
- "TTRx_TEX_COORD_STALL",
- "TTRx_TEX_STARVE_CACHE",
- "TTRx_TEX_STARVE_FILTER",
+ "TTRx_TEX_MSGI_NUM_QUADS",
+ "TTRx_TEX_DFCH_NUM_PASSES",
+ "TTRx_TEX_DFCH_NUM_PASSES_MISS",
+ "TTRx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+ "TTRx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+ "TTRx_TEX_TFCH_NUM_LINES_FETCHED",
+ "TTRx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK",
+ "TTRx_TEX_TFCH_NUM_OPERATIONS",
+ "TTRx_TEX_FILT_NUM_OPERATIONS",
"TTRx_LS_MEM_READ_FULL",
"TTRx_LS_MEM_READ_SHORT",
"TTRx_LS_MEM_WRITE_FULL",
@@ -222,9 +222,9 @@
"TTRx_BEATS_RD_TEX",
"TTRx_BEATS_RD_TEX_EXT",
"TTRx_BEATS_RD_OTHER",
- "",
+ "TTRx_BEATS_WR_LSC_OTHER",
"TTRx_BEATS_WR_TIB",
- "TTRx_BEATS_WR_LSC",
+ "TTRx_BEATS_WR_LSC_WB",
/* Performance counters for the Memory System */
"",
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/drivers/gpu/arm/midgard/mali_kbase_gwt.c
index b362546..0481f80 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gwt.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gwt.c
@@ -55,8 +55,6 @@
kbase_gpu_gwt_setup_page_permission(kctx, flag,
rb_first(&(kctx->reg_rbtree_same)));
kbase_gpu_gwt_setup_page_permission(kctx, flag,
- rb_first(&(kctx->reg_rbtree_exec)));
- kbase_gpu_gwt_setup_page_permission(kctx, flag,
rb_first(&(kctx->reg_rbtree_custom)));
}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
index ea87913..580ac987 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -160,14 +160,13 @@
* any scheduling has taken place.
* @kbdev: Device pointer
* @core_req: Core requirements of atom
- * @affinity: Affinity of atom
* @coreref_state: Coreref state of atom
*
* This function should only be called from kbase_jd_done_worker() or
* js_return_worker().
*/
void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
- base_jd_core_req core_req, u64 affinity,
+ base_jd_core_req core_req,
enum kbase_atom_coreref_state coreref_state);
/**
@@ -179,17 +178,6 @@
void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
/**
- * kbase_backend_inspect_head() - Return the atom currently at the head of slot
- * @js
- * @kbdev: Device pointer
- * @js: Job slot to inspect
- *
- * Return : Atom currently at the head of slot @js, or NULL
- */
-struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
- int js);
-
-/**
* kbase_backend_inspect_tail - Return the atom currently at the tail of slot
* @js
* @kbdev: Device pointer
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
index 7f64936..9b86b51 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -41,7 +41,7 @@
/**
* kbase_wait_write_flush() - Wait for GPU write flush
- * @kctx: Context pointer
+ * @kbdev: Kbase device
*
* Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
* its write buffer.
@@ -52,7 +52,7 @@
* This function is only in use for BASE_HW_ISSUE_6367
*/
#ifndef CONFIG_MALI_NO_MALI
-void kbase_wait_write_flush(struct kbase_context *kctx);
+void kbase_wait_write_flush(struct kbase_device *kbdev);
#endif
#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
index fcb9ad3..bee2f3a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -51,9 +51,20 @@
* specify pseudo chunked tiler alignment for JIT allocations.
* 11.7:
* - Removed UMP support
+ * 11.8:
+ * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
+ * 11.9:
+ * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
+ * under base_mem_alloc_flags
+ * 11.10:
+ * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
+ * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
+ * with one softjob.
+ * 11.11:
+ * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
*/
#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 7
+#define BASE_UK_VERSION_MINOR 11
/**
* struct kbase_ioctl_version_check - Check version compatibility with kernel
@@ -652,6 +663,7 @@
*
* @in: Input parameters
* @out: Output parameters
+ *
* This structure is used when performing a call to dump GPU write fault
* addresses.
*/
@@ -673,9 +685,6 @@
#define KBASE_IOCTL_CINSTR_GWT_DUMP \
_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
-/* IOCTLs 36-41 are reserved */
-
-/* IOCTL 42 is free for use */
/***************
* test ioctls *
@@ -718,6 +727,37 @@
#define KBASE_IOCTL_TLSTREAM_STATS \
_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+/**
+ * struct kbase_ioctl_cs_event_memory_write - Write an event memory address
+ * @cpu_addr: Memory address to write
+ * @value: Value to write
+ * @padding: Currently unused, must be zero
+ */
+struct kbase_ioctl_cs_event_memory_write {
+ __u64 cpu_addr;
+ __u8 value;
+ __u8 padding[7];
+};
+
+/**
+ * union kbase_ioctl_cs_event_memory_read - Read an event memory address
+ * @cpu_addr: Memory address to read
+ * @value: Value read
+ * @padding: Currently unused, must be zero
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ */
+union kbase_ioctl_cs_event_memory_read {
+ struct {
+ __u64 cpu_addr;
+ } in;
+ struct {
+ __u8 value;
+ __u8 padding[7];
+ } out;
+};
+
#endif
/**********************************
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c
index ae3e0f9..7a862bcc 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jd.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -91,6 +91,7 @@
} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
/* Soft-job */
if (katom->will_fail_event_code) {
+ kbase_finish_soft_job(katom);
katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
return 0;
}
@@ -808,7 +809,6 @@
katom->nr_extres = user_atom->nr_extres;
katom->extres = NULL;
katom->device_nr = user_atom->device_nr;
- katom->affinity = 0;
katom->jc = user_atom->jc;
katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
katom->core_req = user_atom->core_req;
@@ -923,10 +923,35 @@
if (will_fail) {
if (!queued) {
+ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+ /* This softjob has failed due to a previous
+ * dependency, however we should still run the
+ * prepare & finish functions
+ */
+ int err = kbase_prepare_soft_job(katom);
+
+ if (err >= 0)
+ kbase_finish_soft_job(katom);
+ }
+
ret = jd_done_nolock(katom, NULL);
goto out;
} else {
+
+ if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+ /* This softjob has failed due to a previous
+ * dependency, however we should still run the
+ * prepare & finish functions
+ */
+ if (kbase_prepare_soft_job(katom) != 0) {
+ katom->event_code =
+ BASE_JD_EVENT_JOB_INVALID;
+ ret = jd_done_nolock(katom, NULL);
+ goto out;
+ }
+ }
+
katom->will_fail_event_code = katom->event_code;
ret = false;
@@ -1003,11 +1028,13 @@
goto out;
}
- /* Reject fence wait soft-job atoms accessing external resources */
+ /* Reject soft-job atom of certain types from accessing external resources */
if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) &&
- ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT)) {
+ (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) ||
+ ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) ||
+ ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) {
dev_warn(kctx->kbdev->dev,
- "Rejecting fence wait soft-job atom accessing external resources");
+ "Rejecting soft-job atom accessing external resources");
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
ret = jd_done_nolock(katom, NULL);
goto out;
@@ -1123,9 +1150,6 @@
return -EINVAL;
}
- KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(nr_atoms,
- &kctx->timeline.jd_atoms_in_flight));
-
/* All atoms submitted in this call have the same flush ID */
latest_flush = kbase_backend_get_current_flush_id(kbdev);
@@ -1136,9 +1160,6 @@
if (copy_from_user(&user_atom, user_addr,
sizeof(user_atom)) != 0) {
err = -EINVAL;
- KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
- atomic_sub_return(nr_atoms - i,
- &kctx->timeline.jd_atoms_in_flight));
break;
}
@@ -1222,7 +1243,6 @@
struct kbasep_js_atom_retained_state katom_retained_state;
bool context_idle;
base_jd_core_req core_req = katom->core_req;
- u64 affinity = katom->affinity;
enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
/* Soft jobs should never reach this function */
@@ -1270,7 +1290,8 @@
return;
}
- if (katom->event_code != BASE_JD_EVENT_DONE)
+ if ((katom->event_code != BASE_JD_EVENT_DONE) &&
+ (!kbase_ctx_flag(katom->kctx, KCTX_DYING)))
dev_err(kbdev->dev,
"t6xx: GPU fault 0x%02lx from job slot %d\n",
(unsigned long)katom->event_code,
@@ -1368,8 +1389,7 @@
mutex_unlock(&jctx->lock);
}
- kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
- coreref_state);
+ kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
if (context_idle)
kbase_pm_context_idle(kbdev);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c
index def56d2..66a8444 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -608,6 +608,7 @@
struct kbasep_js_kctx_info *js_kctx_info;
int js;
bool update_ctx_count = false;
+ unsigned long flags;
KBASE_DEBUG_ASSERT(kctx != NULL);
@@ -623,8 +624,10 @@
mutex_lock(&kbdev->js_data.queue_mutex);
mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) {
WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
@@ -1181,7 +1184,6 @@
}
KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
- KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
@@ -1935,7 +1937,7 @@
retained = retained << 1;
- if (kctx) {
+ if (kctx && !(kbdev->as_free & (1u << i))) {
kbase_ctx_sched_retain_ctx_refcount(kctx);
retained |= 1u;
/* We can only cope with up to 1 privileged context -
@@ -1979,14 +1981,20 @@
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
struct kbase_context *kctx, *n;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
list_for_each_entry_safe(kctx, n,
&kbdev->js_data.ctx_list_unpullable[js][prio],
jctx.sched_info.ctx.ctx_list_entry[js]) {
struct kbasep_js_kctx_info *js_kctx_info;
- unsigned long flags;
bool timer_sync = false;
+ /* Drop lock so we can take kctx mutexes */
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock,
+ flags);
+
js_kctx_info = &kctx->jctx.sched_info;
mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
@@ -2003,7 +2011,11 @@
kbase_backend_ctx_count_changed(kbdev);
mutex_unlock(&js_devdata->runpool_mutex);
mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ /* Take lock before accessing list again */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
}
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
}
mutex_unlock(&js_devdata->queue_mutex);
@@ -2247,7 +2259,6 @@
bool context_idle = false;
unsigned long flags;
base_jd_core_req core_req = katom->core_req;
- u64 affinity = katom->affinity;
enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
@@ -2338,8 +2349,7 @@
kbase_js_sched_all(kbdev);
- kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity,
- coreref_state);
+ kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
}
void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
@@ -2747,12 +2757,16 @@
* handled when it leaves the runpool.
*/
if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
if (!list_empty(
&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
list_del_init(
&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
}
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
/* The following events require us to kill off remaining jobs
* and update PM book-keeping:
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h
index 963cef9..355da27 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -662,7 +662,8 @@
set_bit = (u16) (1u << kctx->as_nr);
- dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)",
+ kctx, kctx->as_nr);
js_devdata->runpool_irq.submit_allowed |= set_bit;
}
@@ -687,7 +688,8 @@
clear_bit = (u16) (1u << kctx->as_nr);
clear_mask = ~clear_bit;
- dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)",
+ kctx, kctx->as_nr);
js_devdata->runpool_irq.submit_allowed &= clear_mask;
}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c
index 1dd161b..3eff83a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -42,29 +42,34 @@
#include <mali_kbase_hw.h>
#include <mali_kbase_tlstream.h>
-/* This function finds out which RB tree the given GPU VA region belongs to
- * based on the region zone */
-static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx,
- struct kbase_va_region *reg)
+/* Forward declarations */
+static void free_partial_locked(struct kbase_context *kctx,
+ struct kbase_mem_pool *pool, struct tagged_addr tp);
+
+static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
{
- struct rb_root *rbtree = NULL;
+#if defined(CONFIG_ARM64)
+ /* VA_BITS can be as high as 48 bits, but all bits are available for
+ * both user and kernel.
+ */
+ size_t cpu_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+ /* x86_64 can access 48 bits of VA, but the 48th is used to denote
+ * kernel (1) vs userspace (0), so the max here is 47.
+ */
+ size_t cpu_va_bits = 47;
+#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
+ size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
+#else
+#error "Unknown CPU VA width for this architecture"
+#endif
- switch (reg->flags & KBASE_REG_ZONE_MASK) {
- case KBASE_REG_ZONE_CUSTOM_VA:
- rbtree = &kctx->reg_rbtree_custom;
- break;
- case KBASE_REG_ZONE_EXEC:
- rbtree = &kctx->reg_rbtree_exec;
- break;
- case KBASE_REG_ZONE_SAME_VA:
- rbtree = &kctx->reg_rbtree_same;
- /* fall through */
- default:
- rbtree = &kctx->reg_rbtree_same;
- break;
- }
+#ifdef CONFIG_64BIT
+ if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+ cpu_va_bits = 32;
+#endif
- return rbtree;
+ return cpu_va_bits;
}
/* This function finds out which RB tree the given pfn from the GPU VA belongs
@@ -79,8 +84,6 @@
#endif /* CONFIG_64BIT */
if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
rbtree = &kctx->reg_rbtree_custom;
- else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE)
- rbtree = &kctx->reg_rbtree_exec;
else
rbtree = &kctx->reg_rbtree_same;
#ifdef CONFIG_64BIT
@@ -96,15 +99,14 @@
}
/* This function inserts a region into the tree. */
-static void kbase_region_tracker_insert(struct kbase_context *kctx,
- struct kbase_va_region *new_reg)
+static void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
{
u64 start_pfn = new_reg->start_pfn;
struct rb_node **link = NULL;
struct rb_node *parent = NULL;
struct rb_root *rbtree = NULL;
- rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg);
+ rbtree = new_reg->rbtree;
link = &(rbtree->rb_node);
/* Find the right place in the tree using tree search */
@@ -129,18 +131,13 @@
rb_insert_color(&(new_reg->rblink), rbtree);
}
-/* Find allocated region enclosing free range. */
-static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
- struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+static struct kbase_va_region *find_region_enclosing_range_rbtree(
+ struct rb_root *rbtree, u64 start_pfn, size_t nr_pages)
{
- struct rb_node *rbnode = NULL;
- struct kbase_va_region *reg = NULL;
- struct rb_root *rbtree = NULL;
-
+ struct rb_node *rbnode;
+ struct kbase_va_region *reg;
u64 end_pfn = start_pfn + nr_pages;
- rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn);
-
rbnode = rbtree->rb_node;
while (rbnode) {
@@ -163,19 +160,12 @@
return NULL;
}
-/* Find region enclosing given address. */
-struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+struct kbase_va_region *kbase_find_region_enclosing_address(
+ struct rb_root *rbtree, u64 gpu_addr)
{
+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
struct rb_node *rbnode;
struct kbase_va_region *reg;
- u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
- struct rb_root *rbtree = NULL;
-
- KBASE_DEBUG_ASSERT(NULL != kctx);
-
- lockdep_assert_held(&kctx->reg_lock);
-
- rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
rbnode = rbtree->rb_node;
@@ -199,14 +189,11 @@
return NULL;
}
-KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
-
-/* Find region with given base address */
-struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
+ struct kbase_context *kctx, u64 gpu_addr)
{
u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
- struct rb_node *rbnode = NULL;
- struct kbase_va_region *reg = NULL;
struct rb_root *rbtree = NULL;
KBASE_DEBUG_ASSERT(NULL != kctx);
@@ -215,6 +202,18 @@
rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+ return kbase_find_region_enclosing_address(rbtree, gpu_addr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+struct kbase_va_region *kbase_find_region_base_address(
+ struct rb_root *rbtree, u64 gpu_addr)
+{
+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+ struct rb_node *rbnode = NULL;
+ struct kbase_va_region *reg = NULL;
+
rbnode = rbtree->rb_node;
while (rbnode) {
@@ -231,11 +230,25 @@
return NULL;
}
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(
+ struct kbase_context *kctx, u64 gpu_addr)
+{
+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+ struct rb_root *rbtree = NULL;
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+
+ return kbase_find_region_base_address(rbtree, gpu_addr);
+}
+
KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
/* Find region meeting given requirements */
static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
- struct kbase_context *kctx, struct kbase_va_region *reg_reqs,
+ struct kbase_va_region *reg_reqs,
size_t nr_pages, size_t align_offset, size_t align_mask,
u64 *out_start_pfn)
{
@@ -245,11 +258,9 @@
/* Note that this search is a linear search, as we do not have a target
address in mind, so does not benefit from the rbtree search */
- rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs);
+ rbtree = reg_reqs->rbtree;
- rbnode = rb_first(rbtree);
-
- while (rbnode) {
+ for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) {
reg = rb_entry(rbnode, struct kbase_va_region, rblink);
if ((reg->nr_pages >= nr_pages) &&
(reg->flags & KBASE_REG_FREE)) {
@@ -265,6 +276,27 @@
start_pfn += align_mask;
start_pfn -= (start_pfn - align_offset) & (align_mask);
+ if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) {
+ /* Can't end at 4GB boundary */
+ if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB))
+ start_pfn += align_offset;
+
+ /* Can't start at 4GB boundary */
+ if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB))
+ start_pfn += align_offset;
+
+ if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) ||
+ !(start_pfn & BASE_MEM_PFN_MASK_4GB))
+ continue;
+ } else if (reg_reqs->flags &
+ KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+ u64 end_pfn = start_pfn + nr_pages - 1;
+
+ if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) !=
+ (end_pfn & ~BASE_MEM_PFN_MASK_4GB))
+ start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB;
+ }
+
if ((start_pfn >= reg->start_pfn) &&
(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) {
@@ -272,7 +304,6 @@
return reg;
}
}
- rbnode = rb_next(rbnode);
}
return NULL;
@@ -286,7 +317,7 @@
* region lock held. The associated memory is not released (see
* kbase_free_alloced_region). Internal use only.
*/
-static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+int kbase_remove_va_region(struct kbase_va_region *reg)
{
struct rb_node *rbprev;
struct kbase_va_region *prev = NULL;
@@ -298,7 +329,7 @@
int merged_back = 0;
int err = 0;
- reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg);
+ reg_rbtree = reg->rbtree;
/* Try to merge with the previous block first */
rbprev = rb_prev(&(reg->rblink));
@@ -344,7 +375,9 @@
*/
struct kbase_va_region *free_reg;
- free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+ free_reg = kbase_alloc_free_region(reg_rbtree,
+ reg->start_pfn, reg->nr_pages,
+ reg->flags & KBASE_REG_ZONE_MASK);
if (!free_reg) {
err = -ENOMEM;
goto out;
@@ -359,14 +392,21 @@
KBASE_EXPORT_TEST_API(kbase_remove_va_region);
/**
- * @brief Insert a VA region to the list, replacing the current at_reg.
+ * kbase_insert_va_region_nolock - Insert a VA region to the list,
+ * replacing the existing one.
+ *
+ * @new_reg: The new region to insert
+ * @at_reg: The region to replace
+ * @start_pfn: The Page Frame Number to insert at
+ * @nr_pages: The number of pages of the region
*/
-static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
+ struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
{
struct rb_root *reg_rbtree = NULL;
int err = 0;
- reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg);
+ reg_rbtree = at_reg->rbtree;
/* Must be a free region */
KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
@@ -390,19 +430,19 @@
KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
at_reg->nr_pages -= nr_pages;
- kbase_region_tracker_insert(kctx, new_reg);
+ kbase_region_tracker_insert(new_reg);
}
/* New region replaces the end of the old one, so insert after. */
else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
at_reg->nr_pages -= nr_pages;
- kbase_region_tracker_insert(kctx, new_reg);
+ kbase_region_tracker_insert(new_reg);
}
/* New region splits the old one, so insert and create new */
else {
struct kbase_va_region *new_front_reg;
- new_front_reg = kbase_alloc_free_region(kctx,
+ new_front_reg = kbase_alloc_free_region(reg_rbtree,
at_reg->start_pfn,
start_pfn - at_reg->start_pfn,
at_reg->flags & KBASE_REG_ZONE_MASK);
@@ -411,8 +451,8 @@
at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
at_reg->start_pfn = start_pfn + nr_pages;
- kbase_region_tracker_insert(kctx, new_front_reg);
- kbase_region_tracker_insert(kctx, new_reg);
+ kbase_region_tracker_insert(new_front_reg);
+ kbase_region_tracker_insert(new_reg);
} else {
err = -ENOMEM;
}
@@ -422,83 +462,44 @@
}
/**
- * @brief Add a VA region to the list.
+ * kbase_add_va_region - Add a VA region to the region list for a context.
+ *
+ * @kctx: kbase context containing the region
+ * @reg: the region to add
+ * @addr: the address to insert the region at
+ * @nr_pages: the number of pages in the region
+ * @align: the minimum alignment in pages
*/
int kbase_add_va_region(struct kbase_context *kctx,
struct kbase_va_region *reg, u64 addr,
size_t nr_pages, size_t align)
{
- struct kbase_va_region *tmp;
- u64 gpu_pfn = addr >> PAGE_SHIFT;
int err = 0;
+ struct kbase_device *kbdev = kctx->kbdev;
+ int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx);
+ int gpu_pc_bits =
+ kbdev->gpu_props.props.core_props.log2_program_counter_size;
KBASE_DEBUG_ASSERT(NULL != kctx);
KBASE_DEBUG_ASSERT(NULL != reg);
lockdep_assert_held(&kctx->reg_lock);
- if (!align)
- align = 1;
-
- /* must be a power of 2 */
- KBASE_DEBUG_ASSERT(is_power_of_2(align));
- KBASE_DEBUG_ASSERT(nr_pages > 0);
-
- /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
- if (gpu_pfn) {
- struct device *dev = kctx->kbdev->dev;
-
- KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
-
- tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
- if (!tmp) {
- dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
- err = -ENOMEM;
- goto exit;
+ /* The executable allocation from the SAME_VA zone would already have an
+ * appropriately aligned GPU VA chosen for it.
+ */
+ if (!(reg->flags & KBASE_REG_GPU_NX) && !addr) {
+ if (cpu_va_bits > gpu_pc_bits) {
+ align = max(align, (size_t)((1ULL << gpu_pc_bits)
+ >> PAGE_SHIFT));
}
- if (!(tmp->flags & KBASE_REG_FREE)) {
- dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
- dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
- dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
- err = -ENOMEM;
- goto exit;
- }
-
- err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
- if (err) {
- dev_warn(dev, "Failed to insert va region");
- err = -ENOMEM;
- goto exit;
- }
-
- goto exit;
}
- /* Path 2: Map any free address which meets the requirements.
- *
- * Depending on the zone the allocation request is for
- * we might need to retry it. */
do {
- u64 start_pfn;
- size_t align_offset = align;
- size_t align_mask = align - 1;
-
- if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
- WARN(align > 1,
- "kbase_add_va_region with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
- (unsigned long)align);
- align_mask = reg->extent - 1;
- align_offset = reg->extent - reg->initial_commit;
- }
-
- tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg,
- nr_pages, align_offset, align_mask,
- &start_pfn);
- if (tmp) {
- err = kbase_insert_va_region_nolock(kctx, reg, tmp,
- start_pfn, nr_pages);
+ err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages,
+ align);
+ if (err != -ENOMEM)
break;
- }
/*
* If the allocation is not from the same zone as JIT
@@ -510,37 +511,118 @@
break;
} while (kbase_jit_evict(kctx));
- if (!tmp)
- err = -ENOMEM;
-
- exit:
return err;
}
KBASE_EXPORT_TEST_API(kbase_add_va_region);
/**
+ * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree
+ *
+ * Insert a region into the rbtree that was specified when the region was
+ * created. If addr is 0 a free area in the rbtree is used, otherwise the
+ * specified address is used.
+ *
+ * @kbdev: The kbase device
+ * @reg: The region to add
+ * @addr: The address to add the region at, or 0 to map at any available address
+ * @nr_pages: The size of the region in pages
+ * @align: The minimum alignment in pages
+ */
+int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
+ struct kbase_va_region *reg,
+ u64 addr, size_t nr_pages, size_t align)
+{
+ struct rb_root *rbtree = NULL;
+ struct kbase_va_region *tmp;
+ u64 gpu_pfn = addr >> PAGE_SHIFT;
+ int err = 0;
+
+ rbtree = reg->rbtree;
+
+ if (!align)
+ align = 1;
+
+ /* must be a power of 2 */
+ KBASE_DEBUG_ASSERT(is_power_of_2(align));
+ KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+ /* Path 1: Map a specific address. Find the enclosing region,
+ * which *must* be free.
+ */
+ if (gpu_pfn) {
+ struct device *dev = kbdev->dev;
+
+ KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+ tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn,
+ nr_pages);
+ if (!tmp) {
+ dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+ err = -ENOMEM;
+ goto exit;
+ }
+ if (!(tmp->flags & KBASE_REG_FREE)) {
+ dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n",
+ tmp->start_pfn, tmp->flags,
+ tmp->nr_pages, gpu_pfn, nr_pages);
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
+ nr_pages);
+ if (err) {
+ dev_warn(dev, "Failed to insert va region");
+ err = -ENOMEM;
+ }
+ } else {
+ /* Path 2: Map any free address which meets the requirements. */
+ u64 start_pfn;
+ size_t align_offset = align;
+ size_t align_mask = align - 1;
+
+ if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
+ WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
+ __func__,
+ (unsigned long)align);
+ align_mask = reg->extent - 1;
+ align_offset = reg->extent - reg->initial_commit;
+ }
+
+ tmp = kbase_region_tracker_find_region_meeting_reqs(reg,
+ nr_pages, align_offset, align_mask,
+ &start_pfn);
+ if (tmp) {
+ err = kbase_insert_va_region_nolock(reg, tmp,
+ start_pfn, nr_pages);
+ } else {
+ err = -ENOMEM;
+ }
+ }
+
+exit:
+ return err;
+}
+
+/**
* @brief Initialize the internal region tracker data structure.
*/
static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
struct kbase_va_region *same_va_reg,
- struct kbase_va_region *exec_reg,
struct kbase_va_region *custom_va_reg)
{
kctx->reg_rbtree_same = RB_ROOT;
- kbase_region_tracker_insert(kctx, same_va_reg);
+ kbase_region_tracker_insert(same_va_reg);
- /* Although exec and custom_va_reg don't always exist,
+ /* Although custom_va_reg doesn't always exist,
* initialize unconditionally because of the mem_view debugfs
- * implementation which relies on these being empty
+ * implementation which relies on this being empty.
*/
- kctx->reg_rbtree_exec = RB_ROOT;
kctx->reg_rbtree_custom = RB_ROOT;
- if (exec_reg)
- kbase_region_tracker_insert(kctx, exec_reg);
if (custom_va_reg)
- kbase_region_tracker_insert(kctx, custom_va_reg);
+ kbase_region_tracker_insert(custom_va_reg);
}
static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
@@ -561,34 +643,18 @@
void kbase_region_tracker_term(struct kbase_context *kctx)
{
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
}
+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
+{
+ kbase_region_tracker_erase_rbtree(rbtree);
+}
+
static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
{
-#if defined(CONFIG_ARM64)
- /* VA_BITS can be as high as 48 bits, but all bits are available for
- * both user and kernel.
- */
- size_t cpu_va_bits = VA_BITS;
-#elif defined(CONFIG_X86_64)
- /* x86_64 can access 48 bits of VA, but the 48th is used to denote
- * kernel (1) vs userspace (0), so the max here is 47.
- */
- size_t cpu_va_bits = 47;
-#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
- size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
-#else
-#error "Unknown CPU VA width for this architecture"
-#endif
-
-#ifdef CONFIG_64BIT
- if (kbase_ctx_flag(kctx, KCTX_COMPAT))
- cpu_va_bits = 32;
-#endif
-
- return min(cpu_va_bits, (size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+ return min(kbase_get_num_cpu_va_bits(kctx),
+ (size_t) kctx->kbdev->gpu_props.mmu.va_bits);
}
/**
@@ -597,7 +663,6 @@
int kbase_region_tracker_init(struct kbase_context *kctx)
{
struct kbase_va_region *same_va_reg;
- struct kbase_va_region *exec_reg = NULL;
struct kbase_va_region *custom_va_reg = NULL;
size_t same_va_bits = kbase_get_same_va_bits(kctx);
u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
@@ -610,7 +675,7 @@
same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
/* all have SAME_VA */
- same_va_reg = kbase_alloc_free_region(kctx, 1,
+ same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1,
same_va_pages,
KBASE_REG_ZONE_SAME_VA);
@@ -620,7 +685,7 @@
}
#ifdef CONFIG_64BIT
- /* 32-bit clients have exec and custom VA zones */
+ /* 32-bit clients have custom VA zones */
if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
#endif
if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
@@ -634,38 +699,27 @@
if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
- exec_reg = kbase_alloc_free_region(kctx,
- KBASE_REG_ZONE_EXEC_BASE,
- KBASE_REG_ZONE_EXEC_SIZE,
- KBASE_REG_ZONE_EXEC);
-
- if (!exec_reg) {
- err = -ENOMEM;
- goto fail_free_same_va;
- }
-
- custom_va_reg = kbase_alloc_free_region(kctx,
+ custom_va_reg = kbase_alloc_free_region(
+ &kctx->reg_rbtree_custom,
KBASE_REG_ZONE_CUSTOM_VA_BASE,
custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
if (!custom_va_reg) {
err = -ENOMEM;
- goto fail_free_exec;
+ goto fail_free_same_va;
}
#ifdef CONFIG_64BIT
}
#endif
- kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg,
- custom_va_reg);
+ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
kctx->same_va_end = same_va_pages + 1;
+
kbase_gpu_vm_unlock(kctx);
return 0;
-fail_free_exec:
- kbase_free_alloced_region(exec_reg);
fail_free_same_va:
kbase_free_alloced_region(same_va_reg);
fail_unlock:
@@ -720,7 +774,7 @@
* Create a custom VA zone at the end of the VA for allocations which
* JIT can use so it doesn't have to allocate VA from the kernel.
*/
- custom_va_reg = kbase_alloc_free_region(kctx,
+ custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
kctx->same_va_end,
jit_va_pages,
KBASE_REG_ZONE_CUSTOM_VA);
@@ -734,7 +788,7 @@
goto fail_unlock;
}
- kbase_region_tracker_insert(kctx, custom_va_reg);
+ kbase_region_tracker_insert(custom_va_reg);
kbase_gpu_vm_unlock(kctx);
return 0;
@@ -765,6 +819,7 @@
return 0;
}
+
int kbase_mem_init(struct kbase_device *kbdev)
{
struct kbasep_mem_device *memdev;
@@ -830,15 +885,15 @@
* The allocated object is not part of any list yet, and is flagged as
* KBASE_REG_FREE. No mapping is allocated yet.
*
- * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA,
- * or KBASE_REG_ZONE_EXEC
+ * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
*
*/
-struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
+ u64 start_pfn, size_t nr_pages, int zone)
{
struct kbase_va_region *new_reg;
- KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(rbtree != NULL);
/* zone argument should only contain zone related region flags */
KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
@@ -853,7 +908,7 @@
new_reg->cpu_alloc = NULL; /* no alloc bound yet */
new_reg->gpu_alloc = NULL; /* no alloc bound yet */
- new_reg->kctx = kctx;
+ new_reg->rbtree = rbtree;
new_reg->flags = zone | KBASE_REG_FREE;
new_reg->flags |= KBASE_REG_GROWABLE;
@@ -868,6 +923,29 @@
KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+static struct kbase_context *kbase_reg_flags_to_kctx(
+ struct kbase_va_region *reg)
+{
+ struct kbase_context *kctx = NULL;
+ struct rb_root *rbtree = reg->rbtree;
+
+ switch (reg->flags & KBASE_REG_ZONE_MASK) {
+ case KBASE_REG_ZONE_CUSTOM_VA:
+ kctx = container_of(rbtree, struct kbase_context,
+ reg_rbtree_custom);
+ break;
+ case KBASE_REG_ZONE_SAME_VA:
+ kctx = container_of(rbtree, struct kbase_context,
+ reg_rbtree_same);
+ break;
+ default:
+ WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
+ break;
+ }
+
+ return kctx;
+}
+
/**
* @brief Free a region object.
*
@@ -881,7 +959,13 @@
void kbase_free_alloced_region(struct kbase_va_region *reg)
{
if (!(reg->flags & KBASE_REG_FREE)) {
- mutex_lock(®->kctx->jit_evict_lock);
+ struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+
+ if (WARN_ON(!kctx))
+ return;
+
+
+ mutex_lock(&kctx->jit_evict_lock);
/*
* The physical allocation should have been removed from the
@@ -891,7 +975,7 @@
* on the list at termination time of the region tracker.
*/
if (!list_empty(®->gpu_alloc->evict_node)) {
- mutex_unlock(®->kctx->jit_evict_lock);
+ mutex_unlock(&kctx->jit_evict_lock);
/*
* Unlink the physical allocation before unmaking it
@@ -916,14 +1000,14 @@
kbase_mem_evictable_unmake(reg->gpu_alloc);
}
} else {
- mutex_unlock(®->kctx->jit_evict_lock);
+ mutex_unlock(&kctx->jit_evict_lock);
}
/*
* Remove the region from the sticky resource metadata
* list should it be there.
*/
- kbase_sticky_resource_release(reg->kctx, NULL,
+ kbase_sticky_resource_release(kctx, NULL,
reg->start_pfn << PAGE_SHIFT);
kbase_mem_phy_alloc_put(reg->cpu_alloc);
@@ -971,11 +1055,13 @@
KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
for (i = 0; i < alloc->imported.alias.nents; i++) {
if (alloc->imported.alias.aliased[i].alloc) {
- err = kbase_mmu_insert_pages(kctx,
+ err = kbase_mmu_insert_pages(kctx->kbdev,
+ &kctx->mmu,
reg->start_pfn + (i * stride),
alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
alloc->imported.alias.aliased[i].length,
- reg->flags & gwt_mask);
+ reg->flags & gwt_mask,
+ kctx->as_nr);
if (err)
goto bad_insert;
@@ -992,10 +1078,13 @@
}
}
} else {
- err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+ err = kbase_mmu_insert_pages(kctx->kbdev,
+ &kctx->mmu,
+ reg->start_pfn,
kbase_get_gpu_phy_pages(reg),
kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask);
+ reg->flags & gwt_mask,
+ kctx->as_nr);
if (err)
goto bad_insert;
kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
@@ -1011,12 +1100,16 @@
KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
while (i--)
if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
- kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+ kbase_mmu_teardown_pages(kctx->kbdev,
+ &kctx->mmu,
+ reg->start_pfn + (i * stride),
+ reg->gpu_alloc->imported.alias.aliased[i].length,
+ kctx->as_nr);
kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
}
}
- kbase_remove_va_region(kctx, reg);
+ kbase_remove_va_region(reg);
return err;
}
@@ -1036,13 +1129,16 @@
if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
size_t i;
- err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+ reg->start_pfn, reg->nr_pages, kctx->as_nr);
KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
} else {
- err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+ reg->start_pfn, kbase_reg_current_backed_size(reg),
+ kctx->as_nr);
kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
}
@@ -1063,7 +1159,7 @@
if (err)
return err;
- err = kbase_remove_va_region(kctx, reg);
+ err = kbase_remove_va_region(reg);
return err;
}
@@ -1345,7 +1441,7 @@
lockdep_assert_held(&kctx->reg_lock);
if (reg->flags & KBASE_REG_JIT) {
- dev_warn(reg->kctx->kbdev->dev, "Attempt to free JIT memory!\n");
+ dev_warn(kctx->kbdev->dev, "Attempt to free JIT memory!\n");
return -EINVAL;
}
@@ -1372,7 +1468,7 @@
err = kbase_gpu_munmap(kctx, reg);
if (err) {
- dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+ dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n");
goto out;
}
@@ -1482,7 +1578,8 @@
reg->flags |= KBASE_REG_GPU_NX;
if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
- if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+ if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED &&
+ !(flags & BASE_MEM_UNCACHED_GPU))
return -EINVAL;
} else if (flags & (BASE_MEM_COHERENT_SYSTEM |
BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
@@ -1497,8 +1594,20 @@
if (flags & BASE_MEM_TILER_ALIGN_TOP)
reg->flags |= KBASE_REG_TILER_ALIGN_TOP;
+
/* Set up default MEMATTR usage */
- if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
+ if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
+ if (kctx->kbdev->mmu_mode->flags &
+ KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
+ /* Override shareability, and MEMATTR for uncached */
+ reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+ reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+ } else {
+ dev_warn(kctx->kbdev->dev,
+ "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n");
+ return -EINVAL;
+ }
+ } else if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
(reg->flags & KBASE_REG_SHARE_BOTH)) {
reg->flags |=
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
@@ -1507,6 +1616,12 @@
KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
}
+ if (flags & BASE_MEM_PERMANENT_KERNEL_MAPPING)
+ reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
+
+ if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
+ reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
+
return 0;
}
@@ -1520,14 +1635,14 @@
struct tagged_addr *tp;
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
- KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+ KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
if (alloc->reg) {
if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
goto invalid_request;
}
- kctx = alloc->imported.kctx;
+ kctx = alloc->imported.native.kctx;
if (nr_pages_requested == 0)
goto done; /*nothing to do*/
@@ -1563,7 +1678,7 @@
if (nr_left) {
struct kbase_sub_alloc *sa, *temp_sa;
- mutex_lock(&kctx->mem_partials_lock);
+ spin_lock(&kctx->mem_partials_lock);
list_for_each_entry_safe(sa, temp_sa,
&kctx->mem_partials, link) {
@@ -1586,7 +1701,7 @@
}
}
}
- mutex_unlock(&kctx->mem_partials_lock);
+ spin_unlock(&kctx->mem_partials_lock);
}
/* only if we actually have a chunk left <512. If more it indicates
@@ -1633,9 +1748,9 @@
nr_left = 0;
/* expose for later use */
- mutex_lock(&kctx->mem_partials_lock);
+ spin_lock(&kctx->mem_partials_lock);
list_add(&sa->link, &kctx->mem_partials);
- mutex_unlock(&kctx->mem_partials_lock);
+ spin_unlock(&kctx->mem_partials_lock);
}
}
}
@@ -1696,7 +1811,7 @@
struct tagged_addr *new_pages = NULL;
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
- KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+ KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
lockdep_assert_held(&pool->pool_lock);
@@ -1709,7 +1824,7 @@
goto invalid_request;
}
- kctx = alloc->imported.kctx;
+ kctx = alloc->imported.native.kctx;
lockdep_assert_held(&kctx->mem_partials_lock);
@@ -1837,14 +1952,36 @@
if (nr_left != nr_pages_requested) {
size_t nr_pages_to_free = nr_pages_requested - nr_left;
- alloc->nents += nr_pages_to_free;
+ struct tagged_addr *start_free = alloc->pages + alloc->nents;
- kbase_process_page_usage_inc(kctx, nr_pages_to_free);
- kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
- kbase_atomic_add_pages(nr_pages_to_free,
- &kctx->kbdev->memdev.used_pages);
-
- kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
+#ifdef CONFIG_MALI_2MB_ALLOC
+ if (pool->order) {
+ while (nr_pages_to_free) {
+ if (is_huge_head(*start_free)) {
+ kbase_mem_pool_free_pages_locked(
+ pool, 512,
+ start_free,
+ false, /* not dirty */
+ true); /* return to pool */
+ nr_pages_to_free -= 512;
+ start_free += 512;
+ } else if (is_partial(*start_free)) {
+ free_partial_locked(kctx, pool,
+ *start_free);
+ nr_pages_to_free--;
+ start_free++;
+ }
+ }
+ } else {
+#endif
+ kbase_mem_pool_free_pages_locked(pool,
+ nr_pages_to_free,
+ start_free,
+ false, /* not dirty */
+ true); /* return to pool */
+#ifdef CONFIG_MALI_2MB_ALLOC
+ }
+#endif
}
kbase_process_page_usage_dec(kctx, nr_pages_requested);
@@ -1861,10 +1998,10 @@
struct page *p, *head_page;
struct kbase_sub_alloc *sa;
- p = phys_to_page(as_phys_addr_t(tp));
+ p = as_page(tp);
head_page = (struct page *)p->lru.prev;
sa = (struct kbase_sub_alloc *)head_page->lru.next;
- mutex_lock(&kctx->mem_partials_lock);
+ spin_lock(&kctx->mem_partials_lock);
clear_bit(p - head_page, sa->sub_pages);
if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
list_del(&sa->link);
@@ -1875,14 +2012,14 @@
/* expose the partial again */
list_add(&sa->link, &kctx->mem_partials);
}
- mutex_unlock(&kctx->mem_partials_lock);
+ spin_unlock(&kctx->mem_partials_lock);
}
int kbase_free_phy_pages_helper(
struct kbase_mem_phy_alloc *alloc,
size_t nr_pages_to_free)
{
- struct kbase_context *kctx = alloc->imported.kctx;
+ struct kbase_context *kctx = alloc->imported.native.kctx;
bool syncback;
bool reclaimed = (alloc->evicted != 0);
struct tagged_addr *start_free;
@@ -1890,7 +2027,7 @@
size_t freed = 0;
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
- KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+ KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
/* early out if nothing to do */
@@ -1976,13 +2113,13 @@
lockdep_assert_held(&pool->pool_lock);
lockdep_assert_held(&kctx->mem_partials_lock);
- p = phys_to_page(as_phys_addr_t(tp));
+ p = as_page(tp);
head_page = (struct page *)p->lru.prev;
sa = (struct kbase_sub_alloc *)head_page->lru.next;
clear_bit(p - head_page, sa->sub_pages);
if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
list_del(&sa->link);
- kbase_mem_pool_free(pool, head_page, true);
+ kbase_mem_pool_free_locked(pool, head_page, true);
kfree(sa);
} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
SZ_2M / SZ_4K - 1) {
@@ -1995,14 +2132,14 @@
struct kbase_mem_pool *pool, struct tagged_addr *pages,
size_t nr_pages_to_free)
{
- struct kbase_context *kctx = alloc->imported.kctx;
+ struct kbase_context *kctx = alloc->imported.native.kctx;
bool syncback;
bool reclaimed = (alloc->evicted != 0);
struct tagged_addr *start_free;
size_t freed = 0;
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
- KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+ KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
lockdep_assert_held(&pool->pool_lock);
@@ -2093,14 +2230,26 @@
switch (alloc->type) {
case KBASE_MEM_TYPE_NATIVE: {
- if (!WARN_ON(!alloc->imported.kctx)) {
+
+ if (!WARN_ON(!alloc->imported.native.kctx)) {
+ if (alloc->permanent_map)
+ kbase_phy_alloc_mapping_term(
+ alloc->imported.native.kctx,
+ alloc);
+
/*
* The physical allocation must have been removed from
* the eviction list before trying to free it.
*/
- mutex_lock(&alloc->imported.kctx->jit_evict_lock);
+ mutex_lock(
+ &alloc->imported.native.kctx->jit_evict_lock);
WARN_ON(!list_empty(&alloc->evict_node));
- mutex_unlock(&alloc->imported.kctx->jit_evict_lock);
+ mutex_unlock(
+ &alloc->imported.native.kctx->jit_evict_lock);
+
+ kbase_process_page_usage_dec(
+ alloc->imported.native.kctx,
+ alloc->imported.native.nr_struct_pages);
}
kbase_free_phy_pages_helper(alloc, alloc->nents);
break;
@@ -2134,14 +2283,6 @@
mmdrop(alloc->imported.user_buf.mm);
kfree(alloc->imported.user_buf.pages);
break;
- case KBASE_MEM_TYPE_TB:{
- void *tb;
-
- tb = alloc->imported.kctx->jctx.tb;
- kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
- vfree(tb);
- break;
- }
default:
WARN(1, "Unexecpted free of type %d\n", alloc->type);
break;
@@ -2220,6 +2361,14 @@
BASE_MEM_TILER_ALIGN_TOP)))
return false;
+ /* To have an allocation lie within a 4GB chunk is required only for
+ * TLS memory, which will never be used to contain executable code
+ * and also used for Tiler heap.
+ */
+ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
+ (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP)))
+ return false;
+
/* GPU should have at least read or write access otherwise there is no
reason for allocating. */
if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
@@ -2290,9 +2439,8 @@
return -EINVAL;
}
- if (va_pages > (U64_MAX / PAGE_SIZE)) {
- /* 64-bit address range is the max */
- dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than 64-bit address range!",
+ if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+ dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
(unsigned long long)va_pages);
return -ENOMEM;
}
@@ -2351,6 +2499,13 @@
#undef KBASE_MSG_PRE_FLAG
}
+ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) &&
+ (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) {
+ dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space",
+ (unsigned long long)va_pages);
+ return -EINVAL;
+ }
+
return 0;
#undef KBASE_MSG_PRE
}
@@ -2687,7 +2842,7 @@
if (reg->cpu_alloc != reg->gpu_alloc)
pages_required *= 2;
- mutex_lock(&kctx->mem_partials_lock);
+ spin_lock(&kctx->mem_partials_lock);
kbase_mem_pool_lock(pool);
/* As we can not allocate memory from the kernel with the vm_lock held,
@@ -2699,14 +2854,14 @@
int pool_delta = pages_required - kbase_mem_pool_size(pool);
kbase_mem_pool_unlock(pool);
- mutex_unlock(&kctx->mem_partials_lock);
+ spin_unlock(&kctx->mem_partials_lock);
kbase_gpu_vm_unlock(kctx);
if (kbase_mem_pool_grow(pool, pool_delta))
goto update_failed_unlocked;
kbase_gpu_vm_lock(kctx);
- mutex_lock(&kctx->mem_partials_lock);
+ spin_lock(&kctx->mem_partials_lock);
kbase_mem_pool_lock(pool);
}
@@ -2714,7 +2869,7 @@
delta, &prealloc_sas[0]);
if (!gpu_pages) {
kbase_mem_pool_unlock(pool);
- mutex_unlock(&kctx->mem_partials_lock);
+ spin_unlock(&kctx->mem_partials_lock);
goto update_failed;
}
@@ -2727,12 +2882,12 @@
kbase_free_phy_pages_helper_locked(reg->gpu_alloc,
pool, gpu_pages, delta);
kbase_mem_pool_unlock(pool);
- mutex_unlock(&kctx->mem_partials_lock);
+ spin_unlock(&kctx->mem_partials_lock);
goto update_failed;
}
}
kbase_mem_pool_unlock(pool);
- mutex_unlock(&kctx->mem_partials_lock);
+ spin_unlock(&kctx->mem_partials_lock);
ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
old_size);
@@ -2982,7 +3137,10 @@
void kbase_jit_backing_lost(struct kbase_va_region *reg)
{
- struct kbase_context *kctx = reg->kctx;
+ struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+
+ if (WARN_ON(!kctx))
+ return;
lockdep_assert_held(&kctx->jit_evict_lock);
@@ -3156,9 +3314,9 @@
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask);
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ pa, kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr);
if (err == 0)
return 0;
@@ -3270,11 +3428,12 @@
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
kbase_get_gpu_phy_pages(reg),
count,
(reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) &
- gwt_mask);
+ gwt_mask,
+ kctx->as_nr);
if (err)
goto err_unmap_attachment;
@@ -3292,7 +3451,8 @@
return 0;
err_teardown_orig_pages:
- kbase_mmu_teardown_pages(kctx, reg->start_pfn, count);
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ count, kctx->as_nr);
err_unmap_attachment:
dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
@@ -3372,9 +3532,11 @@
int err;
err = kbase_mmu_teardown_pages(
- kctx,
+ kctx->kbdev,
+ &kctx->mmu,
reg->start_pfn,
- alloc->nents);
+ alloc->nents,
+ kctx->as_nr);
WARN_ON(err);
}
@@ -3391,9 +3553,11 @@
if (reg && reg->gpu_alloc == alloc)
kbase_mmu_teardown_pages(
- kctx,
+ kctx->kbdev,
+ &kctx->mmu,
reg->start_pfn,
- kbase_reg_current_backed_size(reg));
+ kbase_reg_current_backed_size(reg),
+ kctx->as_nr);
if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
writeable = false;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h
index e55a8fb..901f1cf 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -45,6 +45,9 @@
/* Required for kbase_mem_evictable_unmake */
#include "mali_kbase_mem_linux.h"
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
+ int pages);
+
/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */
@@ -77,7 +80,6 @@
KBASE_MEM_TYPE_IMPORTED_UMM,
KBASE_MEM_TYPE_IMPORTED_USER_BUF,
KBASE_MEM_TYPE_ALIAS,
- KBASE_MEM_TYPE_TB,
KBASE_MEM_TYPE_RAW
};
@@ -125,6 +127,9 @@
/* type of buffer */
enum kbase_memory_type type;
+ /* Kernel side mapping of the alloc */
+ struct kbase_vmap_struct *permanent_map;
+
unsigned long properties;
/* member in union valid based on @a type */
@@ -142,8 +147,13 @@
size_t nents;
struct kbase_aliased *aliased;
} alias;
- /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
- struct kbase_context *kctx;
+ struct {
+ struct kbase_context *kctx;
+ /* Number of pages in this structure, including *pages.
+ * Used for kernel memory tracking.
+ */
+ size_t nr_struct_pages;
+ } native;
struct kbase_alloc_import_user_buf {
unsigned long address;
unsigned long size;
@@ -226,7 +236,7 @@
struct rb_node rblink;
struct list_head link;
- struct kbase_context *kctx; /* Backlink to base context */
+ struct rb_root *rbtree; /* Backlink to rb tree */
u64 start_pfn; /* The PFN in GPU space */
size_t nr_pages;
@@ -244,14 +254,18 @@
#define KBASE_REG_GPU_NX (1ul << 3)
/* Is CPU cached? */
#define KBASE_REG_CPU_CACHED (1ul << 4)
-/* Is GPU cached? */
+/* Is GPU cached?
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ */
#define KBASE_REG_GPU_CACHED (1ul << 5)
#define KBASE_REG_GROWABLE (1ul << 6)
/* Can grow on pf? */
#define KBASE_REG_PF_GROW (1ul << 7)
-/* Bit 8 is unused */
+/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */
+#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8)
/* inner shareable coherency */
#define KBASE_REG_SHARE_IN (1ul << 9)
@@ -291,32 +305,26 @@
/* Memory is handled by JIT - user space should not be able to free it */
#define KBASE_REG_JIT (1ul << 24)
+/* Memory has permanent kernel side mapping */
+#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
+
#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0)
/* only used with 32-bit clients */
/*
- * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * On a 32bit platform, custom VA should be wired from 4GB
* to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
* limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
* So we put the default limit to the maximum possible on Linux and shrink
* it down, if required by the GPU, during initialization.
*/
-/*
- * Dedicated 16MB region for shader code:
- * VA range 0x101000000-0x102000000
- */
-#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1)
-#define KBASE_REG_ZONE_EXEC_BASE (0x101000000ULL >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
-
-#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2)
-/* Starting after KBASE_REG_ZONE_EXEC */
-#define KBASE_REG_ZONE_CUSTOM_VA_BASE \
- (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE)
+#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT)
#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
/* end 32-bit clients only */
+
unsigned long flags;
size_t extent; /* nr of pages alloc'd on PF */
@@ -371,7 +379,9 @@
#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
-static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(
+ struct kbase_context *kctx, size_t nr_pages,
+ enum kbase_memory_type type)
{
struct kbase_mem_phy_alloc *alloc;
size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
@@ -401,6 +411,13 @@
if (!alloc)
return ERR_PTR(-ENOMEM);
+ if (type == KBASE_MEM_TYPE_NATIVE) {
+ alloc->imported.native.nr_struct_pages =
+ (alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ kbase_process_page_usage_inc(kctx,
+ alloc->imported.native.nr_struct_pages);
+ }
+
/* Store allocation method */
if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
@@ -427,23 +444,23 @@
KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
- reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+ reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
KBASE_MEM_TYPE_NATIVE);
if (IS_ERR(reg->cpu_alloc))
return PTR_ERR(reg->cpu_alloc);
else if (!reg->cpu_alloc)
return -ENOMEM;
- reg->cpu_alloc->imported.kctx = kctx;
+ reg->cpu_alloc->imported.native.kctx = kctx;
if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
&& (reg->flags & KBASE_REG_CPU_CACHED)) {
- reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+ reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
KBASE_MEM_TYPE_NATIVE);
if (IS_ERR_OR_NULL(reg->gpu_alloc)) {
kbase_mem_phy_alloc_put(reg->cpu_alloc);
return -ENOMEM;
}
- reg->gpu_alloc->imported.kctx = kctx;
+ reg->gpu_alloc->imported.native.kctx = kctx;
} else {
reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
}
@@ -776,18 +793,39 @@
u8 max_allocations, u8 trim_level);
void kbase_region_tracker_term(struct kbase_context *kctx);
-struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+/**
+ * kbase_region_tracker_term_rbtree - Free memory for a region tracker
+ *
+ * This will free all the regions within the region tracker
+ *
+ * @rbtree: Region tracker tree root
+ */
+void kbase_region_tracker_term_rbtree(struct rb_root *rbtree);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
+ struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_find_region_enclosing_address(
+ struct rb_root *rbtree, u64 gpu_addr);
/**
* @brief Check that a pointer is actually a valid region.
*
* Must be called with context lock held.
*/
-struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(
+ struct kbase_context *kctx, u64 gpu_addr);
+struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
+ u64 gpu_addr);
-struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
+ u64 start_pfn, size_t nr_pages, int zone);
void kbase_free_alloced_region(struct kbase_va_region *reg);
-int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
+ u64 addr, size_t nr_pages, size_t align);
+int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
+ struct kbase_va_region *reg, u64 addr, size_t nr_pages,
+ size_t align);
+int kbase_remove_va_region(struct kbase_va_region *reg);
bool kbase_check_alloc_flags(unsigned long flags);
bool kbase_check_import_flags(unsigned long flags);
@@ -831,25 +869,44 @@
int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
-int kbase_mmu_init(struct kbase_context *kctx);
-void kbase_mmu_term(struct kbase_context *kctx);
+/**
+ * kbase_mmu_init - Initialise an object representing GPU page tables
+ *
+ * The structure should be terminated using kbase_mmu_term()
+ *
+ * @kbdev: kbase device
+ * @mmut: structure to initialise
+ * @kctx: optional kbase context, may be NULL if this set of MMU tables is not
+ * associated with a context
+ */
+int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ struct kbase_context *kctx);
+/**
+ * kbase_mmu_term - Terminate an object representing GPU page tables
+ *
+ * This will free any page tables that have been allocated
+ *
+ * @kbdev: kbase device
+ * @mmut: kbase_mmu_table to be destroyed
+ */
+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut);
-phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
-void kbase_mmu_free_pgd(struct kbase_context *kctx);
-int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags);
-int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags);
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ const u64 start_vpfn,
+ struct tagged_addr *phys, size_t nr,
+ unsigned long flags);
+int kbase_mmu_insert_pages(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr);
int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr phys, size_t nr,
unsigned long flags);
-int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
-int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags);
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, u64 vpfn,
+ size_t nr, int as_nr);
int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags);
@@ -869,11 +926,19 @@
int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
/**
+ * kbase_mmu_update - Configure an address space on the GPU to the specified
+ * MMU tables
+ *
* The caller has the following locking conditions:
* - It must hold kbase_device->mmu_hw_mutex
* - It must hold the hwaccess_lock
+ *
+ * @kbdev: Kbase device structure
+ * @mmut: The set of MMU tables to be configured on the address space
+ * @as_nr: The address space to be configured
*/
-void kbase_mmu_update(struct kbase_context *kctx);
+void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ int as_nr);
/**
* kbase_mmu_disable() - Disable the MMU for a previously active kbase context.
@@ -1046,6 +1111,8 @@
* Note : The caller must not hold vm_lock, as this could cause a deadlock if
* the kernel OoM killer runs. If the caller must allocate pages while holding
* this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
+ *
+ * This function cannot be used from interrupt context
*/
int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
size_t nr_pages_requested);
@@ -1056,7 +1123,9 @@
* @pool: Memory pool to allocate from
* @nr_pages_requested: number of physical pages to allocate
* @prealloc_sa: Information about the partial allocation if the amount
- * of memory requested is not a multiple of 2MB.
+ * of memory requested is not a multiple of 2MB. One
+ * instance of struct kbase_sub_alloc must be allocated by
+ * the caller iff CONFIG_MALI_2MB_ALLOC is enabled.
*
* Allocates \a nr_pages_requested and updates the alloc object. This function
* does not allocate new pages from the kernel, and therefore will never trigger
@@ -1083,10 +1152,13 @@
* allocation can complete without another thread using the newly grown pages.
*
* If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then
- * @pool must be alloc->imported.kctx->lp_mem_pool. Otherwise it must be
- * alloc->imported.kctx->mem_pool.
- *
- * @prealloc_sa shall be set to NULL if it has been consumed by this function.
+ * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be
+ * alloc->imported.native.kctx->mem_pool.
+ * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be
+ * pre-allocated because we must not sleep (due to the usage of kmalloc())
+ * whilst holding pool->pool_lock.
+ * @prealloc_sa shall be set to NULL if it has been consumed by this function
+ * to indicate that the caller must not free it.
*
* Return: Pointer to array of allocated pages. NULL on failure.
*
@@ -1358,4 +1430,5 @@
*/
void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc);
+
#endif /* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
index 59cc035..bc95a0f 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -47,18 +47,193 @@
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
-#include <mali_kbase_config_defaults.h>
#include <mali_kbase_tlstream.h>
#include <mali_kbase_ioctl.h>
+
+static int kbase_vmap_phy_pages(struct kbase_context *kctx,
+ struct kbase_va_region *reg, u64 offset_bytes, size_t size,
+ struct kbase_vmap_struct *map);
+static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
+ struct kbase_vmap_struct *map);
+
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+/* Retrieve the associated region pointer if the GPU address corresponds to
+ * one of the event memory pages. The enclosing region, if found, shouldn't
+ * have been marked as free.
+ */
+static struct kbase_va_region *kbase_find_event_mem_region(
+ struct kbase_context *kctx, u64 gpu_addr)
+{
+
+ return NULL;
+}
+
+/**
+ * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping
+ * of the physical allocation belonging to a
+ * region
+ * @kctx: The kernel base context @reg belongs to.
+ * @reg: The region whose physical allocation is to be mapped
+ * @vsize: The size of the requested region, in pages
+ * @size: The size in pages initially committed to the region
+ *
+ * Return: 0 on success, otherwise an error code indicating failure
+ *
+ * Maps the physical allocation backing a non-free @reg, so it may be
+ * accessed directly from the kernel. This is only supported for physical
+ * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of
+ * physical allocation.
+ *
+ * The mapping is stored directly in the allocation that backs @reg. The
+ * refcount is not incremented at this point. Instead, use of the mapping should
+ * be surrounded by kbase_phy_alloc_mapping_get() and
+ * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the
+ * client is accessing it.
+ *
+ * Both cached and uncached regions are allowed, but any sync operations are the
+ * responsibility of the client using the permanent mapping.
+ *
+ * A number of checks are made to ensure that a region that needs a permanent
+ * mapping can actually be supported:
+ * - The region must be created as fully backed
+ * - The region must not be growable
+ *
+ * This function will fail if those checks are not satisfied.
+ *
+ * On success, the region will also be forced into a certain kind:
+ * - It will no longer be growable
+ */
+static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx,
+ struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+ size_t size_bytes = (size << PAGE_SHIFT);
+ struct kbase_vmap_struct *kern_mapping;
+ int err = 0;
+
+ /* Can only map in regions that are always fully committed
+ * Don't setup the mapping twice
+ * Only support KBASE_MEM_TYPE_NATIVE allocations
+ */
+ if (vsize != size || reg->cpu_alloc->permanent_map != NULL ||
+ reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+ return -EINVAL;
+
+ if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
+ kctx->permanent_mapped_pages)) {
+ dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %lu pages",
+ (u64)size,
+ KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
+ kctx->permanent_mapped_pages);
+ return -ENOMEM;
+ }
+
+ kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL);
+ if (!kern_mapping)
+ return -ENOMEM;
+
+ err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping);
+ if (err < 0)
+ goto vmap_fail;
+
+ /* No support for growing or shrinking mapped regions */
+ reg->flags &= ~KBASE_REG_GROWABLE;
+
+ reg->cpu_alloc->permanent_map = kern_mapping;
+ kctx->permanent_mapped_pages += size;
+
+ return 0;
+vmap_fail:
+ kfree(kern_mapping);
+ return err;
+}
+
+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx,
+ struct kbase_mem_phy_alloc *alloc)
+{
+ WARN_ON(!alloc->permanent_map);
+ kbase_vunmap_phy_pages(kctx, alloc->permanent_map);
+ kfree(alloc->permanent_map);
+
+ alloc->permanent_map = NULL;
+
+ /* Mappings are only done on cpu_alloc, so don't need to worry about
+ * this being reduced a second time if a separate gpu_alloc is
+ * freed
+ */
+ WARN_ON(alloc->nents > kctx->permanent_mapped_pages);
+ kctx->permanent_mapped_pages -= alloc->nents;
+}
+
+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx,
+ u64 gpu_addr,
+ struct kbase_vmap_struct **out_kern_mapping)
+{
+ struct kbase_va_region *reg;
+ void *kern_mem_ptr = NULL;
+ struct kbase_vmap_struct *kern_mapping;
+ u64 mapping_offset;
+
+ WARN_ON(!kctx);
+ WARN_ON(!out_kern_mapping);
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* First do a quick lookup in the list of event memory regions */
+ reg = kbase_find_event_mem_region(kctx, gpu_addr);
+
+ if (!reg) {
+ reg = kbase_region_tracker_find_region_enclosing_address(
+ kctx, gpu_addr);
+ }
+
+ if (reg == NULL || (reg->flags & KBASE_REG_FREE) != 0)
+ goto out_unlock;
+
+ kern_mapping = reg->cpu_alloc->permanent_map;
+ if (kern_mapping == NULL)
+ goto out_unlock;
+
+ mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+
+ /* Refcount the allocations to prevent them disappearing */
+ WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc);
+ WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc);
+ (void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc);
+ (void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc);
+
+ kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset);
+ *out_kern_mapping = kern_mapping;
+out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+ return kern_mem_ptr;
+}
+
+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
+ struct kbase_vmap_struct *kern_mapping)
+{
+ WARN_ON(!kctx);
+ WARN_ON(!kern_mapping);
+
+ WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx);
+ WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map);
+
+ kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc);
+ kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc);
+
+ /* kern_mapping and the gpu/cpu phy allocs backing it must not be used
+ * from now on
+ */
+}
+
struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
u64 *gpu_va)
{
int zone;
struct kbase_va_region *reg;
+ struct rb_root *rbtree;
struct device *dev;
KBASE_DEBUG_ASSERT(kctx);
@@ -75,6 +250,11 @@
goto bad_flags;
}
+ if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
+ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
+ /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
+ *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+ }
if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
!kbase_device_is_cpu_coherent(kctx->kbdev)) {
dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
@@ -90,14 +270,15 @@
goto bad_sizes;
/* find out which VA zone to use */
- if (*flags & BASE_MEM_SAME_VA)
+ if (*flags & BASE_MEM_SAME_VA) {
+ rbtree = &kctx->reg_rbtree_same;
zone = KBASE_REG_ZONE_SAME_VA;
- else if (*flags & BASE_MEM_PROT_GPU_EX)
- zone = KBASE_REG_ZONE_EXEC;
- else
+ } else {
+ rbtree = &kctx->reg_rbtree_custom;
zone = KBASE_REG_ZONE_CUSTOM_VA;
+ }
- reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+ reg = kbase_alloc_free_region(rbtree, 0, va_pages, zone);
if (!reg) {
dev_err(dev, "Failed to allocate free region");
goto no_region;
@@ -129,6 +310,21 @@
kbase_gpu_vm_lock(kctx);
+ if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) {
+ /* Permanent kernel mappings must happen as soon as
+ * reg->cpu_alloc->pages is ready. Currently this happens after
+ * kbase_alloc_phy_pages(). If we move that to setup pages
+ * earlier, also move this call too
+ */
+ int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages,
+ commit_pages);
+ if (err < 0) {
+ kbase_gpu_vm_unlock(kctx);
+ goto no_kern_mapping;
+ }
+ }
+
+
/* mmap needed to setup VA? */
if (*flags & BASE_MEM_SAME_VA) {
unsigned long prot = PROT_NONE;
@@ -197,6 +393,7 @@
no_mmap:
no_cookie:
+no_kern_mapping:
no_mem:
kbase_mem_phy_alloc_put(reg->cpu_alloc);
kbase_mem_phy_alloc_put(reg->gpu_alloc);
@@ -279,6 +476,10 @@
}
if (KBASE_REG_TILER_ALIGN_TOP & reg->flags)
*out |= BASE_MEM_TILER_ALIGN_TOP;
+ if (!(KBASE_REG_GPU_CACHED & reg->flags))
+ *out |= BASE_MEM_UNCACHED_GPU;
+ if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
+ *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE,
"BASE_MEM_FLAGS_QUERYABLE needs updating\n");
@@ -441,7 +642,7 @@
*/
void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
{
- struct kbase_context *kctx = alloc->imported.kctx;
+ struct kbase_context *kctx = alloc->imported.native.kctx;
int __maybe_unused new_page_count;
kbase_process_page_usage_dec(kctx, alloc->nents);
@@ -461,7 +662,7 @@
static
void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
{
- struct kbase_context *kctx = alloc->imported.kctx;
+ struct kbase_context *kctx = alloc->imported.native.kctx;
int __maybe_unused new_page_count;
new_page_count = kbase_atomic_add_pages(alloc->nents,
@@ -480,7 +681,7 @@
int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
{
- struct kbase_context *kctx = gpu_alloc->imported.kctx;
+ struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
lockdep_assert_held(&kctx->reg_lock);
@@ -505,7 +706,7 @@
bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
{
- struct kbase_context *kctx = gpu_alloc->imported.kctx;
+ struct kbase_context *kctx = gpu_alloc->imported.native.kctx;
int err = 0;
lockdep_assert_held(&kctx->reg_lock);
@@ -619,6 +820,12 @@
if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)
goto out_unlock;
+ /* shareability flags are ignored for GPU uncached memory */
+ if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
+ ret = 0;
+ goto out_unlock;
+ }
+
/* no change? */
if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
ret = 0;
@@ -697,15 +904,18 @@
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
+ 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
} else {
- reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+ reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+ 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
}
if (!reg)
goto no_region;
- reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+ reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
+ KBASE_MEM_TYPE_IMPORTED_UMM);
if (IS_ERR_OR_NULL(reg->gpu_alloc))
goto no_alloc_obj;
@@ -720,7 +930,6 @@
reg->flags &= ~KBASE_REG_FREE;
reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */
reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */
- reg->flags |= KBASE_REG_GPU_CACHED;
if (*flags & BASE_MEM_SECURE)
reg->flags |= KBASE_REG_SECURE;
@@ -769,6 +978,7 @@
{
long i;
struct kbase_va_region *reg;
+ struct rb_root *rbtree;
long faulted_pages;
int zone = KBASE_REG_ZONE_CUSTOM_VA;
bool shared_zone = false;
@@ -778,6 +988,12 @@
if ((address & (cache_line_alignment - 1)) != 0 ||
(size & (cache_line_alignment - 1)) != 0) {
+ if (*flags & BASE_MEM_UNCACHED_GPU) {
+ dev_warn(kctx->kbdev->dev,
+ "User buffer is not cache line aligned and marked as GPU uncached\n");
+ goto bad_size;
+ }
+
/* Coherency must be enabled to handle partial cache lines */
if (*flags & (BASE_MEM_COHERENT_SYSTEM |
BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
@@ -820,14 +1036,16 @@
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
zone = KBASE_REG_ZONE_SAME_VA;
- }
+ rbtree = &kctx->reg_rbtree_same;
+ } else
+ rbtree = &kctx->reg_rbtree_custom;
- reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+ reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
if (!reg)
goto no_region;
- reg->gpu_alloc = kbase_alloc_create(*va_pages,
+ reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
KBASE_MEM_TYPE_IMPORTED_USER_BUF);
if (IS_ERR_OR_NULL(reg->gpu_alloc))
goto no_alloc_obj;
@@ -994,21 +1212,23 @@
/* 64-bit tasks must MMAP anyway, but not expose this address to
* clients */
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(kctx, 0, *num_pages,
- KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
+ *num_pages,
+ KBASE_REG_ZONE_SAME_VA);
} else {
#else
if (1) {
#endif
- reg = kbase_alloc_free_region(kctx, 0, *num_pages,
- KBASE_REG_ZONE_CUSTOM_VA);
+ reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
+ 0, *num_pages,
+ KBASE_REG_ZONE_CUSTOM_VA);
}
if (!reg)
goto no_reg;
/* zero-sized page array, as we don't need one/can support one */
- reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+ reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS);
if (IS_ERR_OR_NULL(reg->gpu_alloc))
goto no_alloc_obj;
@@ -1052,6 +1272,8 @@
goto bad_handle; /* Free region */
if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
goto bad_handle; /* Ephemeral region */
+ if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED))
+ goto bad_handle; /* GPU uncached memory */
if (!aliasing_reg->gpu_alloc)
goto bad_handle; /* No alloc */
if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
@@ -1284,8 +1506,8 @@
/* Map the new pages into the GPU */
phy_pages = kbase_get_gpu_phy_pages(reg);
- ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
- phy_pages + old_pages, delta, reg->flags);
+ ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
+ phy_pages + old_pages, delta, reg->flags, kctx->as_nr);
return ret;
}
@@ -1312,8 +1534,8 @@
u64 delta = old_pages - new_pages;
int ret = 0;
- ret = kbase_mmu_teardown_pages(kctx,
- reg->start_pfn + new_pages, delta);
+ ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+ reg->start_pfn + new_pages, delta, kctx->as_nr);
return ret;
}
@@ -1545,7 +1767,13 @@
.fault = kbase_cpu_vm_fault
};
-static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+static int kbase_cpu_mmap(struct kbase_context *kctx,
+ struct kbase_va_region *reg,
+ struct vm_area_struct *vma,
+ void *kaddr,
+ size_t nr_pages,
+ unsigned long aligned_offset,
+ int free_on_close)
{
struct kbase_cpu_mapping *map;
struct tagged_addr *page_array;
@@ -1627,7 +1855,7 @@
map->region = reg;
map->free_on_close = free_on_close;
- map->kctx = reg->kctx;
+ map->kctx = kctx;
map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
map->count = 1; /* start with one ref */
@@ -1640,91 +1868,6 @@
return err;
}
-static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
-{
- struct kbase_va_region *new_reg;
- u32 nr_pages;
- size_t size;
- int err = 0;
- u32 *tb;
- int owns_tb = 1;
-
- dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
- size = (vma->vm_end - vma->vm_start);
- nr_pages = size >> PAGE_SHIFT;
-
- if (!kctx->jctx.tb) {
- KBASE_DEBUG_ASSERT(0 != size);
- tb = vmalloc_user(size);
-
- if (NULL == tb) {
- err = -ENOMEM;
- goto out;
- }
-
- err = kbase_device_trace_buffer_install(kctx, tb, size);
- if (err) {
- vfree(tb);
- goto out;
- }
- } else {
- err = -EINVAL;
- goto out;
- }
-
- *kaddr = kctx->jctx.tb;
-
- new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
- if (!new_reg) {
- err = -ENOMEM;
- WARN_ON(1);
- goto out_no_region;
- }
-
- new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
- if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
- err = -ENOMEM;
- new_reg->cpu_alloc = NULL;
- WARN_ON(1);
- goto out_no_alloc;
- }
-
- new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
-
- new_reg->cpu_alloc->imported.kctx = kctx;
- new_reg->flags &= ~KBASE_REG_FREE;
- new_reg->flags |= KBASE_REG_CPU_CACHED;
-
- /* alloc now owns the tb */
- owns_tb = 0;
-
- if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
- err = -ENOMEM;
- WARN_ON(1);
- goto out_no_va_region;
- }
-
- *reg = new_reg;
-
- /* map read only, noexec */
- vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
- /* the rest of the flags is added by the cpu_mmap handler */
-
- dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
- return 0;
-
-out_no_va_region:
-out_no_alloc:
- kbase_free_alloced_region(new_reg);
-out_no_region:
- if (owns_tb) {
- kbase_device_trace_buffer_uninstall(kctx);
- vfree(tb);
- }
-out:
- return err;
-}
-
static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
{
struct kbase_va_region *new_reg;
@@ -1744,14 +1887,15 @@
goto out;
}
- new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+ new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
+ KBASE_REG_ZONE_SAME_VA);
if (!new_reg) {
err = -ENOMEM;
WARN_ON(1);
goto out;
}
- new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+ new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW);
if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
err = -ENOMEM;
new_reg->cpu_alloc = NULL;
@@ -1918,14 +2062,6 @@
/* Illegal handle for direct map */
err = -EINVAL;
goto out_unlock;
- case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
- err = kbase_trace_buffer_mmap(kctx, vma, ®, &kaddr);
- if (0 != err)
- goto out_unlock;
- dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
- /* free the region on munmap */
- free_on_close = 1;
- break;
case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
/* MMU dump */
err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr);
@@ -1997,7 +2133,8 @@
} /* default */
} /* switch */
- err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+ err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset,
+ free_on_close);
if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
/* MMU dump - userspace should now have a reference on
@@ -2016,11 +2153,11 @@
KBASE_EXPORT_TEST_API(kbase_mmap);
-static void kbasep_sync_mem_regions(struct kbase_context *kctx,
+void kbase_sync_mem_regions(struct kbase_context *kctx,
struct kbase_vmap_struct *map, enum kbase_sync_type dest)
{
size_t i;
- off_t const offset = (uintptr_t)map->gpu_addr & ~PAGE_MASK;
+ off_t const offset = map->offset_in_page;
size_t const page_count = PFN_UP(offset + map->size);
/* Sync first page */
@@ -2046,66 +2183,55 @@
}
}
-void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
- unsigned long prot_request, struct kbase_vmap_struct *map)
+static int kbase_vmap_phy_pages(struct kbase_context *kctx,
+ struct kbase_va_region *reg, u64 offset_bytes, size_t size,
+ struct kbase_vmap_struct *map)
{
- struct kbase_va_region *reg;
unsigned long page_index;
- unsigned int offset = gpu_addr & ~PAGE_MASK;
- size_t page_count = PFN_UP(offset + size);
+ unsigned int offset_in_page = offset_bytes & ~PAGE_MASK;
+ size_t page_count = PFN_UP(offset_in_page + size);
struct tagged_addr *page_array;
struct page **pages;
void *cpu_addr = NULL;
pgprot_t prot;
size_t i;
- if (!size || !map)
- return NULL;
+ if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc)
+ return -EINVAL;
/* check if page_count calculation will wrap */
if (size > ((size_t)-1 / PAGE_SIZE))
- return NULL;
+ return -EINVAL;
- kbase_gpu_vm_lock(kctx);
-
- reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
- if (!reg || (reg->flags & KBASE_REG_FREE))
- goto out_unlock;
-
- page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+ page_index = offset_bytes >> PAGE_SHIFT;
/* check if page_index + page_count will wrap */
if (-1UL - page_count < page_index)
- goto out_unlock;
+ return -EINVAL;
if (page_index + page_count > kbase_reg_current_backed_size(reg))
- goto out_unlock;
+ return -ENOMEM;
if (reg->flags & KBASE_REG_DONT_NEED)
- goto out_unlock;
-
- /* check access permissions can be satisfied
- * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */
- if ((reg->flags & prot_request) != prot_request)
- goto out_unlock;
-
- page_array = kbase_get_cpu_phy_pages(reg);
- if (!page_array)
- goto out_unlock;
-
- pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
- if (!pages)
- goto out_unlock;
-
- for (i = 0; i < page_count; i++)
- pages[i] = phys_to_page(as_phys_addr_t(page_array[page_index +
- i]));
+ return -EINVAL;
prot = PAGE_KERNEL;
if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
/* Map uncached */
prot = pgprot_writecombine(prot);
}
+
+ page_array = kbase_get_cpu_phy_pages(reg);
+ if (!page_array)
+ return -ENOMEM;
+
+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ for (i = 0; i < page_count; i++)
+ pages[i] = as_page(page_array[page_index + i]);
+
/* Note: enforcing a RO prot_request onto prot is not done, since:
* - CPU-arch-specific integration required
* - kbase_vmap() requires no access checks to be made/enforced */
@@ -2115,26 +2241,66 @@
kfree(pages);
if (!cpu_addr)
- goto out_unlock;
+ return -ENOMEM;
- map->gpu_addr = gpu_addr;
- map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+ map->offset_in_page = offset_in_page;
+ map->cpu_alloc = reg->cpu_alloc;
map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
- map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+ map->gpu_alloc = reg->gpu_alloc;
map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
- map->addr = (void *)((uintptr_t)cpu_addr + offset);
+ map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page);
map->size = size;
map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) &&
!kbase_mem_is_imported(map->gpu_alloc->type);
if (map->sync_needed)
- kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
- kbase_gpu_vm_unlock(kctx);
+ kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU);
- return map->addr;
+ return 0;
+}
+
+void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+ unsigned long prot_request, struct kbase_vmap_struct *map)
+{
+ struct kbase_va_region *reg;
+ void *addr = NULL;
+ u64 offset_bytes;
+ struct kbase_mem_phy_alloc *cpu_alloc;
+ struct kbase_mem_phy_alloc *gpu_alloc;
+ int err;
+
+ kbase_gpu_vm_lock(kctx);
+
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+ gpu_addr);
+ if (!reg || (reg->flags & KBASE_REG_FREE))
+ goto out_unlock;
+
+ /* check access permissions can be satisfied
+ * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR}
+ */
+ if ((reg->flags & prot_request) != prot_request)
+ goto out_unlock;
+
+ offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT);
+ cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+ gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+ err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map);
+ if (err < 0)
+ goto fail_vmap_phy_pages;
+
+ addr = map->addr;
out_unlock:
kbase_gpu_vm_unlock(kctx);
+ return addr;
+
+fail_vmap_phy_pages:
+ kbase_gpu_vm_unlock(kctx);
+ kbase_mem_phy_alloc_put(cpu_alloc);
+ kbase_mem_phy_alloc_put(gpu_alloc);
+
return NULL;
}
@@ -2150,22 +2316,29 @@
}
KBASE_EXPORT_TEST_API(kbase_vmap);
-void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
+ struct kbase_vmap_struct *map)
{
void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
vunmap(addr);
if (map->sync_needed)
- kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
- map->gpu_addr = 0;
- map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
- map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+ kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
+
+ map->offset_in_page = 0;
map->cpu_pages = NULL;
map->gpu_pages = NULL;
map->addr = NULL;
map->size = 0;
map->sync_needed = false;
}
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+ kbase_vunmap_phy_pages(kctx, map);
+ map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+ map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+}
KBASE_EXPORT_TEST_API(kbase_vunmap);
void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
@@ -2295,7 +2468,8 @@
handle->size = size;
- reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+ reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, pages,
+ KBASE_REG_ZONE_SAME_VA);
if (!reg)
goto no_reg;
@@ -2303,7 +2477,7 @@
if (kbase_update_region_flags(kctx, reg, flags) != 0)
goto invalid_flags;
- reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+ reg->cpu_alloc = kbase_alloc_create(kctx, pages, KBASE_MEM_TYPE_RAW);
if (IS_ERR_OR_NULL(reg->cpu_alloc))
goto no_alloc;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
index a14826e..a8a52a7 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -206,7 +206,7 @@
bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
struct kbase_vmap_struct {
- u64 gpu_addr;
+ off_t offset_in_page;
struct kbase_mem_phy_alloc *cpu_alloc;
struct kbase_mem_phy_alloc *gpu_alloc;
struct tagged_addr *cpu_pages;
@@ -322,6 +322,20 @@
extern const struct vm_operations_struct kbase_vm_ops;
/**
+ * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode
+ * CPU mapping.
+ * @kctx: Context the CPU mapping belongs to.
+ * @map: Structure describing the CPU mapping, setup previously by the
+ * kbase_vmap() call.
+ * @dest: Indicates the type of maintenance required (i.e. flush or invalidate)
+ *
+ * Note: The caller shall ensure that CPU mapping is not revoked & remains
+ * active whilst the maintenance is in progress.
+ */
+void kbase_sync_mem_regions(struct kbase_context *kctx,
+ struct kbase_vmap_struct *map, enum kbase_sync_type dest);
+
+/**
* kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
* @kctx: Context the region belongs to
* @reg: The GPU region
@@ -352,4 +366,82 @@
struct kbase_va_region *reg,
u64 new_pages, u64 old_pages);
+/**
+ * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a
+ * physical allocation
+ * @kctx: The kernel base context associated with the mapping
+ * @alloc: Pointer to the allocation to terminate
+ *
+ * This function will unmap the kernel mapping, and free any structures used to
+ * track it.
+ */
+void kbase_phy_alloc_mapping_term(struct kbase_context *kctx,
+ struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent
+ * mapping of a physical allocation
+ * @kctx: The kernel base context @gpu_addr will be looked up in
+ * @gpu_addr: The gpu address to lookup for the kernel-side CPU mapping
+ * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer
+ * which will be used for a call to
+ * kbase_phy_alloc_mapping_put()
+ *
+ * Return: Pointer to a kernel-side accessible location that directly
+ * corresponds to @gpu_addr, or NULL on failure
+ *
+ * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access
+ * that location kernel-side. Only certain kinds of memory have a permanent
+ * kernel mapping, refer to the internal functions
+ * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more
+ * information.
+ *
+ * If this function succeeds, a CPU access to the returned pointer will access
+ * the actual location represented by @gpu_addr. That is, the return value does
+ * not require any offset added to it to access the location specified in
+ * @gpu_addr
+ *
+ * The client must take care to either apply any necessary sync operations when
+ * accessing the data, or ensure that the enclosing region was coherent with
+ * the GPU, or uncached in the CPU.
+ *
+ * The refcount on the physical allocations backing the region are taken, so
+ * that they do not disappear whilst the client is accessing it. Once the
+ * client has finished accessing the memory, it must be released with a call to
+ * kbase_phy_alloc_mapping_put()
+ *
+ * Whilst this is expected to execute quickly (the mapping was already setup
+ * when the physical allocation was created), the call is not IRQ-safe due to
+ * the region lookup involved.
+ *
+ * An error code may indicate that:
+ * - a userside process has freed the allocation, and so @gpu_addr is no longer
+ * valid
+ * - the region containing @gpu_addr does not support a permanent kernel mapping
+ */
+void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr,
+ struct kbase_vmap_struct **out_kern_mapping);
+
+/**
+ * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a
+ * physical allocation
+ * @kctx: The kernel base context associated with the mapping
+ * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained
+ * from a call to kbase_phy_alloc_mapping_get()
+ *
+ * Releases the reference to the allocations backing @kern_mapping that was
+ * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used
+ * when the client no longer needs to access the kernel-side CPU pointer.
+ *
+ * If this was the last reference on the underlying physical allocations, they
+ * will go through the normal allocation free steps, which also includes an
+ * unmap of the permanent kernel mapping for those allocations.
+ *
+ * Due to these operations, the function is not IRQ-safe. However it is
+ * expected to execute quickly in the normal case, i.e. when the region holding
+ * the physical allocation is still present.
+ */
+void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
+ struct kbase_vmap_struct *kern_mapping);
+
#endif /* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
index 6581ecf..7011603 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -75,6 +75,17 @@
}
/**
+ * as_page - Retrieve the struct page from a tagged address
+ * @t: tagged address to be translated.
+ *
+ * Return: pointer to struct page corresponding to tagged address.
+ */
+static inline struct page *as_page(struct tagged_addr t)
+{
+ return phys_to_page(as_phys_addr_t(t));
+}
+
+/**
* as_tagged - Convert the physical address to tagged address type though
* there is no tag info present, the lower order 12 bits will be 0
* @phys: physical address to be converted to tagged type
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
index 1255df0..0f91be1 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -414,9 +414,10 @@
void kbase_mem_pool_term(struct kbase_mem_pool *pool)
{
struct kbase_mem_pool *next_pool = pool->next_pool;
- struct page *p;
+ struct page *p, *tmp;
size_t nr_to_spill = 0;
LIST_HEAD(spill_list);
+ LIST_HEAD(free_list);
int i;
pool_dbg(pool, "terminate()\n");
@@ -434,7 +435,6 @@
/* Zero pages first without holding the next_pool lock */
for (i = 0; i < nr_to_spill; i++) {
p = kbase_mem_pool_remove_locked(pool);
- kbase_mem_pool_zero_page(pool, p);
list_add(&p->lru, &spill_list);
}
}
@@ -442,18 +442,26 @@
while (!kbase_mem_pool_is_empty(pool)) {
/* Free remaining pages to kernel */
p = kbase_mem_pool_remove_locked(pool);
- kbase_mem_pool_free_page(pool, p);
+ list_add(&p->lru, &free_list);
}
kbase_mem_pool_unlock(pool);
if (next_pool && nr_to_spill) {
+ list_for_each_entry(p, &spill_list, lru)
+ kbase_mem_pool_zero_page(pool, p);
+
/* Add new page list to next_pool */
kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill);
pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill);
}
+ list_for_each_entry_safe(p, tmp, &free_list, lru) {
+ list_del_init(&p->lru);
+ kbase_mem_pool_free_page(pool, p);
+ }
+
pool_dbg(pool, "terminated\n");
}
@@ -678,7 +686,7 @@
continue;
if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
- p = phys_to_page(as_phys_addr_t(pages[i]));
+ p = as_page(pages[i]);
if (zero)
kbase_mem_pool_zero_page(pool, p);
else if (sync)
@@ -720,7 +728,7 @@
continue;
if (is_huge_head(pages[i]) || !is_huge(pages[i])) {
- p = phys_to_page(as_phys_addr_t(pages[i]));
+ p = as_page(pages[i]);
if (zero)
kbase_mem_pool_zero_page(pool, p);
else if (sync)
@@ -780,7 +788,7 @@
continue;
}
- p = phys_to_page(as_phys_addr_t(pages[i]));
+ p = as_page(pages[i]);
kbase_mem_pool_free_page(pool, p);
pages[i] = as_tagged(0);
@@ -824,7 +832,7 @@
continue;
}
- p = phys_to_page(as_phys_addr_t(pages[i]));
+ p = as_page(pages[i]);
kbase_mem_pool_free_page(pool, p);
pages[i] = as_tagged(0);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
index 7f44d81..43b0f6c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -33,7 +33,7 @@
* @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
*/
#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \
- ((size_t) (64 + ((80 + (56 * 64)) * 31) + 56))
+ ((size_t) (64 + ((80 + (56 * 64)) * 34) + 56))
#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
index a998930..3ba861d 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -72,6 +72,19 @@
u64 vpfn, size_t nr, bool sync);
/**
+ * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches.
+ * @kbdev: Device pointer.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ * @sync: Set if the operation should be synchronous or not.
+ * @as_nr: GPU address space number for which flush + invalidate is required.
+ *
+ * This is used for MMU tables which do not belong to a user space context.
+ */
+static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
+ u64 vpfn, size_t nr, bool sync, int as_nr);
+
+/**
* kbase_mmu_sync_pgd - sync page directory to memory
* @kbdev: Device pointer.
* @handle: Address of DMA region.
@@ -103,6 +116,9 @@
static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
struct kbase_as *as, const char *reason_str);
+static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr *phys, size_t nr,
+ unsigned long flags);
/**
* reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
@@ -120,7 +136,8 @@
*
* Return: the number of backed pages to increase by
*/
-static size_t reg_grow_calc_extra_pages(struct kbase_va_region *reg, size_t fault_rel_pfn)
+static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
+ struct kbase_va_region *reg, size_t fault_rel_pfn)
{
size_t multiple = reg->extent;
size_t reg_current_size = kbase_reg_current_backed_size(reg);
@@ -128,7 +145,7 @@
size_t remainder;
if (!multiple) {
- dev_warn(reg->kctx->kbdev->dev,
+ dev_warn(kbdev->dev,
"VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
return minimum_extra;
@@ -172,21 +189,21 @@
}
#ifdef CONFIG_MALI_JOB_DUMP
-static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_context *kctx,
+static void kbase_gpu_mmu_handle_write_faulting_as(
struct kbase_device *kbdev,
struct kbase_as *faulting_as,
u64 start_pfn, size_t nr, u32 op)
{
mutex_lock(&kbdev->mmu_hw_mutex);
- kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
- kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx, start_pfn,
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn,
nr, op, 1);
mutex_unlock(&kbdev->mmu_hw_mutex);
- kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
}
@@ -263,7 +280,7 @@
else
op = AS_COMMAND_FLUSH_PT;
- kbase_gpu_mmu_handle_write_faulting_as(kctx, kbdev, faulting_as,
+ kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as,
fault_pfn, 1, op);
kbase_gpu_vm_unlock(kctx);
@@ -297,6 +314,203 @@
}
#endif
+#define MAX_POOL_LEVEL 2
+
+/**
+ * page_fault_try_alloc - Try to allocate memory from a context pool
+ * @kctx: Context pointer
+ * @region: Region to grow
+ * @new_pages: Number of 4 kB pages to allocate
+ * @pages_to_grow: Pointer to variable to store number of outstanding pages on
+ * failure. This can be either 4 kB or 2 MB pages, depending on
+ * the number of pages requested.
+ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true
+ * for 2 MB, false for 4 kB.
+ * @prealloc_sas: Pointer to kbase_sub_alloc structures
+ *
+ * This function will try to allocate as many pages as possible from the context
+ * pool, then if required will try to allocate the remaining pages from the
+ * device pool.
+ *
+ * This function will not allocate any new memory beyond that that is already
+ * present in the context or device pools. This is because it is intended to be
+ * called with the vm_lock held, which could cause recursive locking if the
+ * allocation caused the out-of-memory killer to run.
+ *
+ * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be
+ * a count of 2 MB pages, otherwise it will be a count of 4 kB pages.
+ *
+ * Return: true if successful, false on failure
+ */
+static bool page_fault_try_alloc(struct kbase_context *kctx,
+ struct kbase_va_region *region, size_t new_pages,
+ int *pages_to_grow, bool *grow_2mb_pool,
+ struct kbase_sub_alloc **prealloc_sas)
+{
+ struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL};
+ struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL};
+ size_t pages_alloced[MAX_POOL_LEVEL] = {0};
+ struct kbase_mem_pool *pool, *root_pool;
+ int pool_level = 0;
+ bool alloc_failed = false;
+ size_t pages_still_required;
+
+#ifdef CONFIG_MALI_2MB_ALLOC
+ if (new_pages >= (SZ_2M / SZ_4K)) {
+ root_pool = &kctx->lp_mem_pool;
+ *grow_2mb_pool = true;
+ } else {
+#endif
+ root_pool = &kctx->mem_pool;
+ *grow_2mb_pool = false;
+#ifdef CONFIG_MALI_2MB_ALLOC
+ }
+#endif
+
+ if (region->gpu_alloc != region->cpu_alloc)
+ new_pages *= 2;
+
+ pages_still_required = new_pages;
+
+ /* Determine how many pages are in the pools before trying to allocate.
+ * Don't attempt to allocate & free if the allocation can't succeed.
+ */
+ for (pool = root_pool; pool != NULL; pool = pool->next_pool) {
+ size_t pool_size_4k;
+
+ kbase_mem_pool_lock(pool);
+
+ pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+ if (pool_size_4k >= pages_still_required)
+ pages_still_required = 0;
+ else
+ pages_still_required -= pool_size_4k;
+
+ kbase_mem_pool_unlock(pool);
+
+ if (!pages_still_required)
+ break;
+ }
+
+ if (pages_still_required) {
+ /* Insufficient pages in pools. Don't try to allocate - just
+ * request a grow.
+ */
+ *pages_to_grow = pages_still_required;
+
+ return false;
+ }
+
+ /* Since we've dropped the pool locks, the amount of memory in the pools
+ * may change between the above check and the actual allocation.
+ */
+ pool = root_pool;
+ for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) {
+ size_t pool_size_4k;
+ size_t pages_to_alloc_4k;
+ size_t pages_to_alloc_4k_per_alloc;
+
+ kbase_mem_pool_lock(pool);
+
+ /* Allocate as much as possible from this pool*/
+ pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+ pages_to_alloc_4k = MIN(new_pages, pool_size_4k);
+ if (region->gpu_alloc == region->cpu_alloc)
+ pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
+ else
+ pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
+
+ pages_alloced[pool_level] = pages_to_alloc_4k;
+ if (pages_to_alloc_4k) {
+ gpu_pages[pool_level] =
+ kbase_alloc_phy_pages_helper_locked(
+ region->gpu_alloc, pool,
+ pages_to_alloc_4k_per_alloc,
+ &prealloc_sas[0]);
+
+ if (!gpu_pages[pool_level]) {
+ alloc_failed = true;
+ } else if (region->gpu_alloc != region->cpu_alloc) {
+ cpu_pages[pool_level] =
+ kbase_alloc_phy_pages_helper_locked(
+ region->cpu_alloc, pool,
+ pages_to_alloc_4k_per_alloc,
+ &prealloc_sas[1]);
+
+ if (!cpu_pages[pool_level])
+ alloc_failed = true;
+ }
+ }
+
+ kbase_mem_pool_unlock(pool);
+
+ if (alloc_failed) {
+ WARN_ON(!new_pages);
+ WARN_ON(pages_to_alloc_4k >= new_pages);
+ WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages);
+ break;
+ }
+
+ new_pages -= pages_to_alloc_4k;
+
+ if (!new_pages)
+ break;
+
+ pool = pool->next_pool;
+ if (!pool)
+ break;
+ }
+
+ if (new_pages) {
+ /* Allocation was unsuccessful */
+ int max_pool_level = pool_level;
+
+ pool = root_pool;
+
+ /* Free memory allocated so far */
+ for (pool_level = 0; pool_level <= max_pool_level;
+ pool_level++) {
+ kbase_mem_pool_lock(pool);
+
+ if (region->gpu_alloc != region->cpu_alloc) {
+ if (pages_alloced[pool_level] &&
+ cpu_pages[pool_level])
+ kbase_free_phy_pages_helper_locked(
+ region->cpu_alloc,
+ pool, cpu_pages[pool_level],
+ pages_alloced[pool_level]);
+ }
+
+ if (pages_alloced[pool_level] && gpu_pages[pool_level])
+ kbase_free_phy_pages_helper_locked(
+ region->gpu_alloc,
+ pool, gpu_pages[pool_level],
+ pages_alloced[pool_level]);
+
+ kbase_mem_pool_unlock(pool);
+
+ pool = pool->next_pool;
+ }
+
+ /*
+ * If the allocation failed despite there being enough memory in
+ * the pool, then just fail. Otherwise, try to grow the memory
+ * pool.
+ */
+ if (alloc_failed)
+ *pages_to_grow = 0;
+ else
+ *pages_to_grow = new_pages;
+
+ return false;
+ }
+
+ /* Allocation was successful. No pages to grow, return success. */
+ *pages_to_grow = 0;
+
+ return true;
+}
+
void page_fault_worker(struct work_struct *data)
{
u64 fault_pfn;
@@ -310,10 +524,8 @@
struct kbase_va_region *region;
int err;
bool grown = false;
- size_t min_pool_size;
- struct kbase_mem_pool *pool;
int pages_to_grow;
- struct tagged_addr *gpu_pages, *cpu_pages;
+ bool grow_2mb_pool;
struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
int i;
@@ -337,7 +549,7 @@
if (unlikely(faulting_as->protected_mode)) {
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
"Protected mode fault");
- kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
goto fault_done;
@@ -400,23 +612,19 @@
goto fault_done;
}
-page_fault_retry:
#ifdef CONFIG_MALI_2MB_ALLOC
/* Preallocate memory for the sub-allocation structs if necessary */
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
if (!prealloc_sas[i]) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]),
- GFP_KERNEL);
- if (!prealloc_sas[i]) {
- kbase_mmu_report_fault_and_kill(
- kctx, faulting_as,
- "Failed pre-allocating memory for sub-allocations' metadata");
- goto fault_done;
- }
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Failed pre-allocating memory for sub-allocations' metadata");
+ goto fault_done;
}
}
#endif /* CONFIG_MALI_2MB_ALLOC */
+page_fault_retry:
/* so we have a translation fault, let's see if it is for growable
* memory */
kbase_gpu_vm_lock(kctx);
@@ -465,7 +673,7 @@
mutex_lock(&kbdev->mmu_hw_mutex);
- kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
/* [1] in case another page fault occurred while we were
* handling the (duplicate) page fault we need to ensure we
@@ -475,19 +683,19 @@
* transaction (which should cause the other page fault to be
* raised again).
*/
- kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
AS_COMMAND_UNLOCK, 1);
mutex_unlock(&kbdev->mmu_hw_mutex);
- kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
kbase_gpu_vm_unlock(kctx);
goto fault_done;
}
- new_pages = reg_grow_calc_extra_pages(region, fault_rel_pfn);
+ new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn);
/* cap to max vsize */
new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region));
@@ -496,74 +704,26 @@
mutex_lock(&kbdev->mmu_hw_mutex);
/* Duplicate of a fault we've already handled, nothing to do */
- kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
/* See comment [1] about UNLOCK usage */
- kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0,
AS_COMMAND_UNLOCK, 1);
mutex_unlock(&kbdev->mmu_hw_mutex);
- kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
kbase_gpu_vm_unlock(kctx);
goto fault_done;
}
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (new_pages >= (SZ_2M / SZ_4K)) {
- pool = &kctx->lp_mem_pool;
- /* Round up to number of 2 MB pages required */
- min_pool_size = new_pages + ((SZ_2M / SZ_4K) - 1);
- min_pool_size /= (SZ_2M / SZ_4K);
- } else {
-#endif
- pool = &kctx->mem_pool;
- min_pool_size = new_pages;
-#ifdef CONFIG_MALI_2MB_ALLOC
- }
-#endif
-
- if (region->gpu_alloc != region->cpu_alloc)
- min_pool_size *= 2;
-
pages_to_grow = 0;
- mutex_lock(&kctx->mem_partials_lock);
- kbase_mem_pool_lock(pool);
- /* We can not allocate memory from the kernel with the vm_lock held, so
- * check that there is enough memory in the pool. If not then calculate
- * how much it has to grow by, grow the pool when the vm_lock is
- * dropped, and retry the allocation.
- */
- if (kbase_mem_pool_size(pool) >= min_pool_size) {
- gpu_pages = kbase_alloc_phy_pages_helper_locked(
- region->gpu_alloc, pool, new_pages,
- &prealloc_sas[0]);
-
- if (gpu_pages) {
- if (region->gpu_alloc != region->cpu_alloc) {
- cpu_pages = kbase_alloc_phy_pages_helper_locked(
- region->cpu_alloc, pool,
- new_pages, &prealloc_sas[1]);
-
- if (cpu_pages) {
- grown = true;
- } else {
- kbase_free_phy_pages_helper_locked(
- region->gpu_alloc,
- pool, gpu_pages,
- new_pages);
- }
- } else {
- grown = true;
- }
- }
- } else {
- pages_to_grow = min_pool_size - kbase_mem_pool_size(pool);
- }
- kbase_mem_pool_unlock(pool);
- mutex_unlock(&kctx->mem_partials_lock);
+ spin_lock(&kctx->mem_partials_lock);
+ grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow,
+ &grow_2mb_pool, prealloc_sas);
+ spin_unlock(&kctx->mem_partials_lock);
if (grown) {
u64 pfn_offset;
@@ -582,7 +742,7 @@
* so the no_flush version of insert_pages is used which allows
* us to unlock the MMU as we see fit.
*/
- err = kbase_mmu_insert_pages_no_flush(kctx,
+ err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
region->start_pfn + pfn_offset,
&kbase_get_gpu_phy_pages(region)[pfn_offset],
new_pages, region->flags);
@@ -619,10 +779,10 @@
* this stage a new IRQ might not be raised when the GPU finds
* a MMU IRQ is already pending.
*/
- kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
- kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_do_operation(kbdev, faulting_as,
faulting_as->fault_addr >> PAGE_SHIFT,
new_pages,
op, 1);
@@ -631,7 +791,7 @@
/* AS transaction end */
/* reenable this in the mask */
- kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
#ifdef CONFIG_MALI_JOB_DUMP
@@ -662,9 +822,23 @@
/* If the memory pool was insufficient then grow it and retry.
* Otherwise fail the allocation.
*/
- if (pages_to_grow > 0)
- ret = kbase_mem_pool_grow(pool, pages_to_grow);
-
+ if (pages_to_grow > 0) {
+#ifdef CONFIG_MALI_2MB_ALLOC
+ if (grow_2mb_pool) {
+ /* Round page requirement up to nearest 2 MB */
+ pages_to_grow = (pages_to_grow +
+ ((1 << kctx->lp_mem_pool.order) - 1))
+ >> kctx->lp_mem_pool.order;
+ ret = kbase_mem_pool_grow(&kctx->lp_mem_pool,
+ pages_to_grow);
+ } else {
+#endif
+ ret = kbase_mem_pool_grow(&kctx->mem_pool,
+ pages_to_grow);
+#ifdef CONFIG_MALI_2MB_ALLOC
+ }
+#endif
+ }
if (ret < 0) {
/* failed to extend, handle as a normal PF */
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
@@ -687,54 +861,57 @@
atomic_dec(&kbdev->faults_pending);
}
-phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut)
{
u64 *page;
int i;
struct page *p;
- int new_page_count __maybe_unused;
- KBASE_DEBUG_ASSERT(NULL != kctx);
- new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
- kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
-
- p = kbase_mem_pool_alloc(&kctx->mem_pool);
+ p = kbase_mem_pool_alloc(&kbdev->mem_pool);
if (!p)
- goto sub_pages;
-
- KBASE_TLSTREAM_AUX_PAGESALLOC(
- kctx->id,
- (u64)new_page_count);
+ return 0;
page = kmap(p);
if (NULL == page)
goto alloc_free;
- kbase_process_page_usage_inc(kctx, 1);
+ /* If the MMU tables belong to a context then account the memory usage
+ * to that context, otherwise the MMU tables are device wide and are
+ * only accounted to the device.
+ */
+ if (mmut->kctx) {
+ int new_page_count;
+
+ new_page_count = kbase_atomic_add_pages(1,
+ &mmut->kctx->used_pages);
+ KBASE_TLSTREAM_AUX_PAGESALLOC(
+ mmut->kctx->id,
+ (u64)new_page_count);
+ kbase_process_page_usage_inc(mmut->kctx, 1);
+ }
+
+ kbase_atomic_add_pages(1, &kbdev->memdev.used_pages);
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
- kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+ kbdev->mmu_mode->entry_invalidate(&page[i]);
- kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+ kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
kunmap(p);
return page_to_phys(p);
alloc_free:
- kbase_mem_pool_free(&kctx->mem_pool, p, false);
-sub_pages:
- kbase_atomic_sub_pages(1, &kctx->used_pages);
- kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+ kbase_mem_pool_free(&kbdev->mem_pool, p, false);
return 0;
}
-KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
-
/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
* new table from the pool if needed and possible
*/
-static int mmu_get_next_pgd(struct kbase_context *kctx,
+static int mmu_get_next_pgd(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
phys_addr_t *pgd, u64 vpfn, int level)
{
u64 *page;
@@ -742,9 +919,8 @@
struct page *p;
KBASE_DEBUG_ASSERT(*pgd);
- KBASE_DEBUG_ASSERT(NULL != kctx);
- lockdep_assert_held(&kctx->mmu_lock);
+ lockdep_assert_held(&mmut->mmu_lock);
/*
* Architecture spec defines level-0 as being the top-most.
@@ -756,23 +932,24 @@
p = pfn_to_page(PFN_DOWN(*pgd));
page = kmap(p);
if (NULL == page) {
- dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+ dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
return -EINVAL;
}
- target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+ target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
if (!target_pgd) {
- target_pgd = kbase_mmu_alloc_pgd(kctx);
+ target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
if (!target_pgd) {
- dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+ dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
+ __func__);
kunmap(p);
return -ENOMEM;
}
- kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+ kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
- kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
+ kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
/* Rely on the caller to update the address space flags. */
}
@@ -785,7 +962,8 @@
/*
* Returns the PGD for the specified level of translation
*/
-static int mmu_get_pgd_at_level(struct kbase_context *kctx,
+static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
u64 vpfn,
unsigned int level,
phys_addr_t *out_pgd)
@@ -793,14 +971,14 @@
phys_addr_t pgd;
int l;
- lockdep_assert_held(&kctx->mmu_lock);
- pgd = kctx->pgd;
+ lockdep_assert_held(&mmut->mmu_lock);
+ pgd = mmut->pgd;
for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
- int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l);
+ int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
/* Handle failure condition */
if (err) {
- dev_dbg(kctx->kbdev->dev,
+ dev_dbg(kbdev->dev,
"%s: mmu_get_next_pgd failure at level %d\n",
__func__, l);
return err;
@@ -812,27 +990,30 @@
return 0;
}
-#define mmu_get_bottom_pgd(kctx, vpfn, out_pgd) \
- mmu_get_pgd_at_level((kctx), (vpfn), MIDGARD_MMU_BOTTOMLEVEL, (out_pgd))
+static int mmu_get_bottom_pgd(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ u64 vpfn,
+ phys_addr_t *out_pgd)
+{
+ return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL,
+ out_pgd);
+}
-
-static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx,
- u64 from_vpfn, u64 to_vpfn)
+static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ u64 from_vpfn, u64 to_vpfn)
{
phys_addr_t pgd;
u64 vpfn = from_vpfn;
struct kbase_mmu_mode const *mmu_mode;
- KBASE_DEBUG_ASSERT(NULL != kctx);
- KBASE_DEBUG_ASSERT(0 != vpfn);
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
- lockdep_assert_held(&kctx->mmu_lock);
- lockdep_assert_held(&kctx->reg_lock);
+ lockdep_assert_held(&mmut->mmu_lock);
- mmu_mode = kctx->kbdev->mmu_mode;
+ mmu_mode = kbdev->mmu_mode;
while (vpfn < to_vpfn) {
unsigned int i;
@@ -847,7 +1028,7 @@
count = left;
/* need to check if this is a 2MB page or a 4kB */
- pgd = kctx->pgd;
+ pgd = mmut->pgd;
for (level = MIDGARD_MMU_TOPLEVEL;
level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
@@ -869,7 +1050,7 @@
pcount = count;
break;
default:
- dev_warn(kctx->kbdev->dev, "%sNo support for ATEs at level %d\n",
+ dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
__func__, level);
goto next;
}
@@ -878,7 +1059,7 @@
for (i = 0; i < pcount; i++)
mmu_mode->entry_invalidate(&page[idx + i]);
- kbase_mmu_sync_pgd(kctx->kbdev,
+ kbase_mmu_sync_pgd(kbdev,
kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
8 * pcount);
kunmap(phys_to_page(pgd));
@@ -907,7 +1088,6 @@
struct kbase_mmu_mode const *mmu_mode;
KBASE_DEBUG_ASSERT(NULL != kctx);
- KBASE_DEBUG_ASSERT(0 != vpfn);
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
@@ -917,7 +1097,7 @@
if (nr == 0)
return 0;
- mutex_lock(&kctx->mmu_lock);
+ mutex_lock(&kctx->mmu.mmu_lock);
while (remain) {
unsigned int i;
@@ -936,27 +1116,27 @@
* 256 pages at once (on average). Do we really care?
*/
do {
- err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+ err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu,
+ vpfn, &pgd);
if (err != -ENOMEM)
break;
/* Fill the memory pool with enough pages for
* the page walk to succeed
*/
- mutex_unlock(&kctx->mmu_lock);
- err = kbase_mem_pool_grow(&kctx->mem_pool,
+ mutex_unlock(&kctx->mmu.mmu_lock);
+ err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool,
MIDGARD_MMU_BOTTOMLEVEL);
- mutex_lock(&kctx->mmu_lock);
+ mutex_lock(&kctx->mmu.mmu_lock);
} while (!err);
if (err) {
dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
if (recover_required) {
/* Invalidate the pages we have partially
* completed */
- mmu_insert_pages_failure_recovery(kctx,
- recover_vpfn,
- recover_vpfn +
- recover_count
- );
+ mmu_insert_pages_failure_recovery(kctx->kbdev,
+ &kctx->mmu,
+ recover_vpfn,
+ recover_vpfn + recover_count);
}
goto fail_unlock;
}
@@ -968,11 +1148,10 @@
if (recover_required) {
/* Invalidate the pages we have partially
* completed */
- mmu_insert_pages_failure_recovery(kctx,
- recover_vpfn,
- recover_vpfn +
- recover_count
- );
+ mmu_insert_pages_failure_recovery(kctx->kbdev,
+ &kctx->mmu,
+ recover_vpfn,
+ recover_vpfn + recover_count);
}
err = -ENOMEM;
goto fail_unlock;
@@ -1003,30 +1182,38 @@
recover_required = true;
recover_count += count;
}
- mutex_unlock(&kctx->mmu_lock);
+ mutex_unlock(&kctx->mmu.mmu_lock);
kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
return 0;
fail_unlock:
- mutex_unlock(&kctx->mmu_lock);
+ mutex_unlock(&kctx->mmu.mmu_lock);
kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
return err;
}
-static inline void cleanup_empty_pte(struct kbase_context *kctx, u64 *pte)
+static inline void cleanup_empty_pte(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, u64 *pte)
{
phys_addr_t tmp_pgd;
struct page *tmp_p;
- tmp_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(*pte);
+ tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte);
tmp_p = phys_to_page(tmp_pgd);
- kbase_mem_pool_free(&kctx->mem_pool, tmp_p, false);
- kbase_process_page_usage_dec(kctx, 1);
- kbase_atomic_sub_pages(1, &kctx->used_pages);
- kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+ kbase_mem_pool_free(&kbdev->mem_pool, tmp_p, false);
+
+ /* If the MMU tables belong to a context then we accounted the memory
+ * usage to that context, so decrement here.
+ */
+ if (mmut->kctx) {
+ kbase_process_page_usage_dec(mmut->kctx, 1);
+ kbase_atomic_sub_pages(1, &mmut->kctx->used_pages);
+ }
+ kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages);
}
-int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx,
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
const u64 start_vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags)
@@ -1038,18 +1225,17 @@
int err;
struct kbase_mmu_mode const *mmu_mode;
- KBASE_DEBUG_ASSERT(kctx);
- KBASE_DEBUG_ASSERT(start_vpfn);
+ /* Note that 0 is a valid start_vpfn */
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
- mmu_mode = kctx->kbdev->mmu_mode;
+ mmu_mode = kbdev->mmu_mode;
/* Early out if there is nothing to do */
if (nr == 0)
return 0;
- mutex_lock(&kctx->mmu_lock);
+ mutex_lock(&mmut->mmu_lock);
while (remain) {
unsigned int i;
@@ -1074,28 +1260,27 @@
* 256 pages at once (on average). Do we really care?
*/
do {
- err = mmu_get_pgd_at_level(kctx, insert_vpfn, cur_level,
- &pgd);
+ err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn,
+ cur_level, &pgd);
if (err != -ENOMEM)
break;
/* Fill the memory pool with enough pages for
* the page walk to succeed
*/
- mutex_unlock(&kctx->mmu_lock);
- err = kbase_mem_pool_grow(&kctx->mem_pool,
+ mutex_unlock(&mmut->mmu_lock);
+ err = kbase_mem_pool_grow(&kbdev->mem_pool,
cur_level);
- mutex_lock(&kctx->mmu_lock);
+ mutex_lock(&mmut->mmu_lock);
} while (!err);
if (err) {
- dev_warn(kctx->kbdev->dev,
+ dev_warn(kbdev->dev,
"%s: mmu_get_bottom_pgd failure\n", __func__);
if (insert_vpfn != start_vpfn) {
/* Invalidate the pages we have partially
* completed */
- mmu_insert_pages_failure_recovery(kctx,
- start_vpfn,
- insert_vpfn);
+ mmu_insert_pages_failure_recovery(kbdev,
+ mmut, start_vpfn, insert_vpfn);
}
goto fail_unlock;
}
@@ -1103,14 +1288,13 @@
p = pfn_to_page(PFN_DOWN(pgd));
pgd_page = kmap(p);
if (!pgd_page) {
- dev_warn(kctx->kbdev->dev, "%s: kmap failure\n",
+ dev_warn(kbdev->dev, "%s: kmap failure\n",
__func__);
if (insert_vpfn != start_vpfn) {
/* Invalidate the pages we have partially
* completed */
- mmu_insert_pages_failure_recovery(kctx,
- start_vpfn,
- insert_vpfn);
+ mmu_insert_pages_failure_recovery(kbdev,
+ mmut, start_vpfn, insert_vpfn);
}
err = -ENOMEM;
goto fail_unlock;
@@ -1121,7 +1305,7 @@
u64 *target = &pgd_page[level_index];
if (mmu_mode->pte_is_valid(*target, cur_level))
- cleanup_empty_pte(kctx, target);
+ cleanup_empty_pte(kbdev, mmut, target);
mmu_mode->entry_set_ate(target, *phys, flags,
cur_level);
} else {
@@ -1129,18 +1313,16 @@
unsigned int ofs = vindex + i;
u64 *target = &pgd_page[ofs];
- /* Fail if the current page is a valid ATE entry
- * unless gwt_was_enabled as in that case all
- * pages will be valid from when
- * kbase_gpu_gwt_start() cleared the gpu
- * write flag.
+ /* Warn if the current page is a valid ATE
+ * entry. The page table shouldn't have anything
+ * in the place where we are trying to put a
+ * new entry. Modification to page table entries
+ * should be performed with
+ * kbase_mmu_update_pages()
*/
-#ifdef CONFIG_MALI_JOB_DUMP
- if (!kctx->gwt_was_enabled)
-#endif
- KBASE_DEBUG_ASSERT
- (0 == (*target & 1UL));
- kctx->kbdev->mmu_mode->entry_set_ate(target,
+ WARN_ON((*target & 1UL) != 0);
+
+ kbdev->mmu_mode->entry_set_ate(target,
phys[i], flags, cur_level);
}
}
@@ -1149,32 +1331,39 @@
insert_vpfn += count;
remain -= count;
- kbase_mmu_sync_pgd(kctx->kbdev,
+ kbase_mmu_sync_pgd(kbdev,
kbase_dma_addr(p) + (vindex * sizeof(u64)),
count * sizeof(u64));
kunmap(p);
}
- mutex_unlock(&kctx->mmu_lock);
- return 0;
+ err = 0;
fail_unlock:
- mutex_unlock(&kctx->mmu_lock);
+ mutex_unlock(&mmut->mmu_lock);
return err;
}
/*
- * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
+ * number 'as_nr'.
*/
-int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags)
+int kbase_mmu_insert_pages(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr)
{
int err;
- err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags);
- kbase_mmu_flush_invalidate(kctx, vpfn, nr, false);
+ err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn,
+ phys, nr, flags);
+
+ if (mmut->kctx)
+ kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false);
+ else
+ kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr);
+
return err;
}
@@ -1209,7 +1398,7 @@
err = kbase_mmu_hw_do_operation(kbdev,
&kbdev->as[kctx->as_nr],
- kctx, vpfn, nr, op, 0);
+ vpfn, nr, op, 0);
#if KBASE_GPU_RESET_EN
if (err) {
/* Flush failed to complete, assume the
@@ -1234,14 +1423,83 @@
#endif /* !CONFIG_MALI_NO_MALI */
}
+/* Perform a flush/invalidate on a particular address space
+ */
+static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
+ struct kbase_as *as,
+ u64 vpfn, size_t nr, bool sync, bool drain_pending)
+{
+ int err;
+ u32 op;
+
+ if (kbase_pm_context_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+ /* GPU is off so there's no need to perform flush/invalidate */
+ return;
+ }
+
+ /* AS transaction begin */
+ mutex_lock(&kbdev->mmu_hw_mutex);
+
+ if (sync)
+ op = AS_COMMAND_FLUSH_MEM;
+ else
+ op = AS_COMMAND_FLUSH_PT;
+
+ err = kbase_mmu_hw_do_operation(kbdev,
+ as, vpfn, nr, op, 0);
+
+#if KBASE_GPU_RESET_EN
+ if (err) {
+ /* Flush failed to complete, assume the GPU has hung and
+ * perform a reset to recover
+ */
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+
+ if (kbase_prepare_to_reset_gpu(kbdev))
+ kbase_reset_gpu(kbdev);
+ }
+#endif /* KBASE_GPU_RESET_EN */
+
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+ /* AS transaction end */
+
+#ifndef CONFIG_MALI_NO_MALI
+ /*
+ * The transaction lock must be dropped before here
+ * as kbase_wait_write_flush could take it if
+ * the GPU was powered down (static analysis doesn't
+ * know this can't happen).
+ */
+ drain_pending |= (!err) && sync &&
+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367);
+ if (drain_pending) {
+ /* Wait for GPU to flush write buffer */
+ kbase_wait_write_flush(kbdev);
+ }
+#endif /* !CONFIG_MALI_NO_MALI */
+
+ kbase_pm_context_idle(kbdev);
+}
+
+static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev,
+ u64 vpfn, size_t nr, bool sync, int as_nr)
+{
+ /* Skip if there is nothing to do */
+ if (nr) {
+ kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn,
+ nr, sync, false);
+ }
+}
+
static void kbase_mmu_flush_invalidate(struct kbase_context *kctx,
u64 vpfn, size_t nr, bool sync)
{
struct kbase_device *kbdev;
bool ctx_is_in_runpool;
-#ifndef CONFIG_MALI_NO_MALI
bool drain_pending = false;
+#ifndef CONFIG_MALI_NO_MALI
if (atomic_xchg(&kctx->drain_pending, 0))
drain_pending = true;
#endif /* !CONFIG_MALI_NO_MALI */
@@ -1258,71 +1516,22 @@
if (ctx_is_in_runpool) {
KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
- if (!kbase_pm_context_active_handle_suspend(kbdev,
- KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
- int err;
- u32 op;
+ kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
+ vpfn, nr, sync, drain_pending);
- /* AS transaction begin */
- mutex_lock(&kbdev->mmu_hw_mutex);
-
- if (sync)
- op = AS_COMMAND_FLUSH_MEM;
- else
- op = AS_COMMAND_FLUSH_PT;
-
- err = kbase_mmu_hw_do_operation(kbdev,
- &kbdev->as[kctx->as_nr],
- kctx, vpfn, nr, op, 0);
-
-#if KBASE_GPU_RESET_EN
- if (err) {
- /* Flush failed to complete, assume the
- * GPU has hung and perform a reset to
- * recover */
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
-
- if (kbase_prepare_to_reset_gpu(kbdev))
- kbase_reset_gpu(kbdev);
- }
-#endif /* KBASE_GPU_RESET_EN */
-
- mutex_unlock(&kbdev->mmu_hw_mutex);
- /* AS transaction end */
-
-#ifndef CONFIG_MALI_NO_MALI
- /*
- * The transaction lock must be dropped before here
- * as kbase_wait_write_flush could take it if
- * the GPU was powered down (static analysis doesn't
- * know this can't happen).
- */
- drain_pending |= (!err) && sync &&
- kbase_hw_has_issue(kctx->kbdev,
- BASE_HW_ISSUE_6367);
- if (drain_pending) {
- /* Wait for GPU to flush write buffer */
- kbase_wait_write_flush(kctx);
- }
-#endif /* !CONFIG_MALI_NO_MALI */
-
- kbase_pm_context_idle(kbdev);
- }
kbasep_js_runpool_release_ctx(kbdev, kctx);
}
}
-void kbase_mmu_update(struct kbase_context *kctx)
+void kbase_mmu_update(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ int as_nr)
{
- lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
- /* ASSERT that the context has a valid as_nr, which is only the case
- * when it's scheduled in.
- *
- * as_nr won't change because the caller has the hwaccess_lock */
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ lockdep_assert_held(&kbdev->mmu_hw_mutex);
+ KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID);
- kctx->kbdev->mmu_mode->update(kctx);
+ kbdev->mmu_mode->update(kbdev, mmut, as_nr);
}
KBASE_EXPORT_TEST_API(kbase_mmu_update);
@@ -1369,24 +1578,22 @@
* already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
* information.
*/
-int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr)
{
phys_addr_t pgd;
size_t requested_nr = nr;
struct kbase_mmu_mode const *mmu_mode;
int err = -EFAULT;
- KBASE_DEBUG_ASSERT(NULL != kctx);
- beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
-
if (0 == nr) {
/* early out if nothing to do */
return 0;
}
- mutex_lock(&kctx->mmu_lock);
+ mutex_lock(&mmut->mmu_lock);
- mmu_mode = kctx->kbdev->mmu_mode;
+ mmu_mode = kbdev->mmu_mode;
while (nr) {
unsigned int i;
@@ -1400,7 +1607,7 @@
count = nr;
/* need to check if this is a 2MB or a 4kB page */
- pgd = kctx->pgd;
+ pgd = mmut->pgd;
for (level = MIDGARD_MMU_TOPLEVEL;
level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
@@ -1438,7 +1645,7 @@
switch (level) {
case MIDGARD_MMU_LEVEL(0):
case MIDGARD_MMU_LEVEL(1):
- dev_warn(kctx->kbdev->dev,
+ dev_warn(kbdev->dev,
"%s: No support for ATEs at level %d\n",
__func__, level);
kunmap(phys_to_page(pgd));
@@ -1448,7 +1655,7 @@
if (count >= 512) {
pcount = 1;
} else {
- dev_warn(kctx->kbdev->dev,
+ dev_warn(kbdev->dev,
"%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
__func__, count);
pcount = 0;
@@ -1459,7 +1666,7 @@
pcount = count;
break;
default:
- dev_err(kctx->kbdev->dev,
+ dev_err(kbdev->dev,
"%s: found non-mapped memory, early out\n",
__func__);
vpfn += count;
@@ -1471,7 +1678,7 @@
for (i = 0; i < pcount; i++)
mmu_mode->entry_invalidate(&page[index + i]);
- kbase_mmu_sync_pgd(kctx->kbdev,
+ kbase_mmu_sync_pgd(kbdev,
kbase_dma_addr(phys_to_page(pgd)) +
8 * index, 8*pcount);
@@ -1482,26 +1689,35 @@
}
err = 0;
out:
- mutex_unlock(&kctx->mmu_lock);
- kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true);
+ mutex_unlock(&mmut->mmu_lock);
+
+ if (mmut->kctx)
+ kbase_mmu_flush_invalidate(mmut->kctx, vpfn, requested_nr, true);
+ else
+ kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, requested_nr, true, as_nr);
+
return err;
}
KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
/**
- * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
- * This call is being triggered as a response to the changes of the mem attributes
+ * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU
*
- * @pre : The caller is responsible for validating the memory attributes
+ * This will update page table entries that already exist on the GPU based on
+ * the new flags that are passed. It is used as a response to the changes of
+ * the memory attributes
*
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
+ * The caller is responsible for validating the memory attributes
+ *
+ * @kctx: Kbase context
+ * @vpfn: Virtual PFN (Page Frame Number) of the first page to update
+ * @phys: Tagged physical addresses of the physical pages to replace the
+ * current mappings
+ * @nr: Number of pages to update
+ * @flags: Flags
*/
-int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
+static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr *phys, size_t nr,
unsigned long flags)
{
@@ -1511,14 +1727,13 @@
int err;
KBASE_DEBUG_ASSERT(NULL != kctx);
- KBASE_DEBUG_ASSERT(0 != vpfn);
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
/* Early out if there is nothing to do */
if (nr == 0)
return 0;
- mutex_lock(&kctx->mmu_lock);
+ mutex_lock(&kctx->mmu.mmu_lock);
mmu_mode = kctx->kbdev->mmu_mode;
@@ -1532,16 +1747,17 @@
count = nr;
do {
- err = mmu_get_bottom_pgd(kctx, vpfn, &pgd);
+ err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu,
+ vpfn, &pgd);
if (err != -ENOMEM)
break;
/* Fill the memory pool with enough pages for
* the page walk to succeed
*/
- mutex_unlock(&kctx->mmu_lock);
- err = kbase_mem_pool_grow(&kctx->mem_pool,
+ mutex_unlock(&kctx->mmu.mmu_lock);
+ err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool,
MIDGARD_MMU_BOTTOMLEVEL);
- mutex_lock(&kctx->mmu_lock);
+ mutex_lock(&kctx->mmu.mmu_lock);
} while (!err);
if (err) {
dev_warn(kctx->kbdev->dev,
@@ -1572,11 +1788,11 @@
kunmap(pfn_to_page(PFN_DOWN(pgd)));
}
- mutex_unlock(&kctx->mmu_lock);
+ mutex_unlock(&kctx->mmu.mmu_lock);
return 0;
fail_unlock:
- mutex_unlock(&kctx->mmu_lock);
+ mutex_unlock(&kctx->mmu.mmu_lock);
return err;
}
@@ -1591,8 +1807,9 @@
return err;
}
-static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd,
- int level, u64 *pgd_page_buffer)
+static void mmu_teardown_level(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, phys_addr_t pgd,
+ int level, u64 *pgd_page_buffer)
{
phys_addr_t target_pgd;
struct page *p;
@@ -1600,9 +1817,7 @@
int i;
struct kbase_mmu_mode const *mmu_mode;
- KBASE_DEBUG_ASSERT(NULL != kctx);
- lockdep_assert_held(&kctx->mmu_lock);
- lockdep_assert_held(&kctx->reg_lock);
+ lockdep_assert_held(&mmut->mmu_lock);
pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
/* kmap_atomic should NEVER fail. */
@@ -1613,14 +1828,14 @@
kunmap_atomic(pgd_page);
pgd_page = pgd_page_buffer;
- mmu_mode = kctx->kbdev->mmu_mode;
+ mmu_mode = kbdev->mmu_mode;
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
if (target_pgd) {
if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
- mmu_teardown_level(kctx,
+ mmu_teardown_level(kbdev, mmut,
target_pgd,
level + 1,
pgd_page_buffer +
@@ -1630,56 +1845,69 @@
}
p = pfn_to_page(PFN_DOWN(pgd));
- kbase_mem_pool_free(&kctx->mem_pool, p, true);
- kbase_process_page_usage_dec(kctx, 1);
- kbase_atomic_sub_pages(1, &kctx->used_pages);
- kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+ kbase_mem_pool_free(&kbdev->mem_pool, p, true);
+ kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages);
+
+ /* If MMU tables belong to a context then pages will have been accounted
+ * against it, so we must decrement the usage counts here.
+ */
+ if (mmut->kctx) {
+ kbase_process_page_usage_dec(mmut->kctx, 1);
+ kbase_atomic_sub_pages(1, &mmut->kctx->used_pages);
+ }
}
-int kbase_mmu_init(struct kbase_context *kctx)
+int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ struct kbase_context *kctx)
{
- KBASE_DEBUG_ASSERT(NULL != kctx);
- KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
-
- mutex_init(&kctx->mmu_lock);
+ mutex_init(&mmut->mmu_lock);
+ mmut->kctx = kctx;
/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
- kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+ mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
- if (NULL == kctx->mmu_teardown_pages)
+ if (mmut->mmu_teardown_pages == NULL)
return -ENOMEM;
+ mmut->pgd = 0;
+ /* We allocate pages into the kbdev memory pool, then
+ * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
+ * avoid allocations from the kernel happening with the lock held.
+ */
+ while (!mmut->pgd) {
+ int err;
+
+ err = kbase_mem_pool_grow(&kbdev->mem_pool,
+ MIDGARD_MMU_BOTTOMLEVEL);
+ if (err) {
+ kbase_mmu_term(kbdev, mmut);
+ return -ENOMEM;
+ }
+
+ mutex_lock(&mmut->mmu_lock);
+ mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
+ mutex_unlock(&mmut->mmu_lock);
+ }
+
return 0;
}
-void kbase_mmu_term(struct kbase_context *kctx)
+void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
{
- KBASE_DEBUG_ASSERT(NULL != kctx);
- KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+ if (mmut->pgd) {
+ mutex_lock(&mmut->mmu_lock);
+ mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL,
+ mmut->mmu_teardown_pages);
+ mutex_unlock(&mmut->mmu_lock);
- kfree(kctx->mmu_teardown_pages);
- kctx->mmu_teardown_pages = NULL;
+ if (mmut->kctx)
+ KBASE_TLSTREAM_AUX_PAGESALLOC(mmut->kctx->id, 0);
+ }
+
+ kfree(mmut->mmu_teardown_pages);
+ mutex_destroy(&mmut->mmu_lock);
}
-void kbase_mmu_free_pgd(struct kbase_context *kctx)
-{
- int new_page_count = 0;
-
- KBASE_DEBUG_ASSERT(NULL != kctx);
- KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
-
- mutex_lock(&kctx->mmu_lock);
- mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL,
- kctx->mmu_teardown_pages);
- mutex_unlock(&kctx->mmu_lock);
-
- KBASE_TLSTREAM_AUX_PAGESALLOC(
- kctx->id,
- (u64)new_page_count);
-}
-
-KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
-
static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
{
phys_addr_t target_pgd;
@@ -1690,7 +1918,7 @@
struct kbase_mmu_mode const *mmu_mode;
KBASE_DEBUG_ASSERT(NULL != kctx);
- lockdep_assert_held(&kctx->mmu_lock);
+ lockdep_assert_held(&kctx->mmu.mmu_lock);
mmu_mode = kctx->kbdev->mmu_mode;
@@ -1755,7 +1983,7 @@
KBASE_DEBUG_ASSERT(0 != size_left);
kaddr = vmalloc_user(size_left);
- mutex_lock(&kctx->mmu_lock);
+ mutex_lock(&kctx->mmu.mmu_lock);
if (kaddr) {
u64 end_marker = 0xFFULL;
@@ -1770,7 +1998,8 @@
if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
struct kbase_mmu_setup as_setup;
- kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
+ kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu,
+ &as_setup);
config[0] = as_setup.transtab;
config[1] = as_setup.memattr;
config[2] = as_setup.transcfg;
@@ -1781,7 +2010,7 @@
}
dump_size = kbasep_mmu_dump_level(kctx,
- kctx->pgd,
+ kctx->mmu.pgd,
MIDGARD_MMU_TOPLEVEL,
&mmu_dump_buffer,
&size_left);
@@ -1803,12 +2032,12 @@
memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
}
- mutex_unlock(&kctx->mmu_lock);
+ mutex_unlock(&kctx->mmu.mmu_lock);
return kaddr;
fail_free:
vfree(kaddr);
- mutex_unlock(&kctx->mmu_lock);
+ mutex_unlock(&kctx->mmu.mmu_lock);
return NULL;
}
KBASE_EXPORT_TEST_API(kbase_mmu_dump);
@@ -1841,7 +2070,7 @@
if (unlikely(faulting_as->protected_mode)) {
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
"Permission failure");
- kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
kbasep_js_runpool_release_ctx(kbdev, kctx);
atomic_dec(&kbdev->faults_pending);
@@ -1875,9 +2104,9 @@
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
- kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
- kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
kbase_pm_context_idle(kbdev);
@@ -2172,9 +2401,9 @@
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
/* Clear down the fault */
- kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
- kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+ kbase_mmu_hw_enable_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
#if KBASE_GPU_RESET_EN
@@ -2187,7 +2416,6 @@
{
struct kbase_as *as;
struct kbase_device *kbdev;
- struct kbase_context *kctx;
unsigned long flags;
KBASE_DEBUG_ASSERT(work);
@@ -2203,12 +2431,11 @@
* the AS will not be released as before the atom is released this workqueue
* is flushed (in kbase_as_poking_timer_release_atom)
*/
- kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
/* AS transaction begin */
mutex_lock(&kbdev->mmu_hw_mutex);
/* Force a uTLB invalidate */
- kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+ kbase_mmu_hw_do_operation(kbdev, as, 0, 0,
AS_COMMAND_UNLOCK, 0);
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
@@ -2356,14 +2583,14 @@
WARN_ON(as->current_setup.transtab);
if (kbase_as_has_bus_fault(as)) {
- kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
- kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+ kbase_mmu_hw_enable_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
} else if (kbase_as_has_page_fault(as)) {
- kbase_mmu_hw_clear_fault(kbdev, as, kctx,
+ kbase_mmu_hw_clear_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
- kbase_mmu_hw_enable_fault(kbdev, as, kctx,
+ kbase_mmu_hw_enable_fault(kbdev, as,
KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
index 92aa55d..70d5f2b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -70,10 +70,9 @@
*
* @param[in] kbdev kbase device to configure.
* @param[in] as address space to configure.
- * @param[in] kctx kbase context to configure.
*/
void kbase_mmu_hw_configure(struct kbase_device *kbdev,
- struct kbase_as *as, struct kbase_context *kctx);
+ struct kbase_as *as);
/** @brief Issue an operation to the MMU.
*
@@ -82,7 +81,6 @@
*
* @param[in] kbdev kbase device to issue the MMU operation on.
* @param[in] as address space to issue the MMU operation on.
- * @param[in] kctx kbase context to issue the MMU operation on.
* @param[in] vpfn MMU Virtual Page Frame Number to start the
* operation on.
* @param[in] nr Number of pages to work on.
@@ -93,7 +91,7 @@
* @return Zero if the operation was successful, non-zero otherwise.
*/
int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
- struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+ u64 vpfn, u32 nr, u32 type,
unsigned int handling_irq);
/** @brief Clear a fault that has been previously reported by the MMU.
@@ -102,11 +100,10 @@
*
* @param[in] kbdev kbase device to clear the fault from.
* @param[in] as address space to clear the fault from.
- * @param[in] kctx kbase context to clear the fault from or NULL.
* @param[in] type The type of fault that needs to be cleared.
*/
void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
- struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+ enum kbase_mmu_fault_type type);
/** @brief Enable fault that has been previously reported by the MMU.
*
@@ -116,11 +113,10 @@
*
* @param[in] kbdev kbase device to again enable the fault from.
* @param[in] as address space to again enable the fault from.
- * @param[in] kctx kbase context to again enable the fault from.
* @param[in] type The type of fault that needs to be enabled again.
*/
void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
- struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+ enum kbase_mmu_fault_type type);
/** @} *//* end group mali_kbase_mmu_hw */
/** @} *//* end group base_kbase_api */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
index aa0c403..38ca456 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2014, 2016-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -68,7 +68,7 @@
#endif
}
-static void mmu_get_as_setup(struct kbase_context *kctx,
+static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
struct kbase_mmu_setup * const setup)
{
/* Set up the required caching policies at the correct indices
@@ -84,22 +84,30 @@
(AS_MEMATTR_AARCH64_OUTER_IMPL_DEF <<
(AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) |
(AS_MEMATTR_AARCH64_OUTER_WA <<
- (AS_MEMATTR_INDEX_OUTER_WA * 8));
+ (AS_MEMATTR_INDEX_OUTER_WA * 8)) |
+ (AS_MEMATTR_AARCH64_NON_CACHEABLE <<
+ (AS_MEMATTR_INDEX_NON_CACHEABLE * 8));
- setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK;
+ setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK;
setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K;
}
-static void mmu_update(struct kbase_context *kctx)
+static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ int as_nr)
{
- struct kbase_device * const kbdev = kctx->kbdev;
- struct kbase_as * const as = &kbdev->as[kctx->as_nr];
- struct kbase_mmu_setup * const current_setup = &as->current_setup;
+ struct kbase_as *as;
+ struct kbase_mmu_setup *current_setup;
- mmu_get_as_setup(kctx, current_setup);
+ if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
+ return;
+
+ as = &kbdev->as[as_nr];
+ current_setup = &as->current_setup;
+
+ mmu_get_as_setup(mmut, current_setup);
/* Apply the address space setting */
- kbase_mmu_hw_configure(kbdev, as, kctx);
+ kbase_mmu_hw_configure(kbdev, as);
}
static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
@@ -111,7 +119,7 @@
current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED;
/* Apply the address space setting */
- kbase_mmu_hw_configure(kbdev, as, NULL);
+ kbase_mmu_hw_configure(kbdev, as);
}
static phys_addr_t pte_to_phy_addr(u64 entry)
@@ -205,7 +213,8 @@
.pte_is_valid = pte_is_valid,
.entry_set_ate = entry_set_ate,
.entry_set_pte = entry_set_pte,
- .entry_invalidate = entry_invalidate
+ .entry_invalidate = entry_invalidate,
+ .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE
};
struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
index 7dc38fc..f6bdf91 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -66,7 +66,7 @@
#endif
}
-static void mmu_get_as_setup(struct kbase_context *kctx,
+static void mmu_get_as_setup(struct kbase_mmu_table *mmut,
struct kbase_mmu_setup * const setup)
{
/* Set up the required caching policies at the correct indices
@@ -84,7 +84,7 @@
(AS_MEMATTR_INDEX_OUTER_WA * 8)) |
0; /* The other indices are unused for now */
- setup->transtab = ((u64)kctx->pgd &
+ setup->transtab = ((u64)mmut->pgd &
((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
AS_TRANSTAB_LPAE_ADRMODE_TABLE |
AS_TRANSTAB_LPAE_READ_INNER;
@@ -92,16 +92,23 @@
setup->transcfg = 0;
}
-static void mmu_update(struct kbase_context *kctx)
+static void mmu_update(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ int as_nr)
{
- struct kbase_device * const kbdev = kctx->kbdev;
- struct kbase_as * const as = &kbdev->as[kctx->as_nr];
- struct kbase_mmu_setup * const current_setup = &as->current_setup;
+ struct kbase_as *as;
+ struct kbase_mmu_setup *current_setup;
- mmu_get_as_setup(kctx, current_setup);
+ if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID))
+ return;
+
+ as = &kbdev->as[as_nr];
+ current_setup = &as->current_setup;
+
+ mmu_get_as_setup(mmut, current_setup);
/* Apply the address space setting */
- kbase_mmu_hw_configure(kbdev, as, kctx);
+ kbase_mmu_hw_configure(kbdev, as);
}
static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
@@ -112,7 +119,7 @@
current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
/* Apply the address space setting */
- kbase_mmu_hw_configure(kbdev, as, NULL);
+ kbase_mmu_hw_configure(kbdev, as);
}
static phys_addr_t pte_to_phy_addr(u64 entry)
@@ -139,9 +146,17 @@
static u64 get_mmu_flags(unsigned long flags)
{
u64 mmu_flags;
+ unsigned long memattr_idx;
- /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
- mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+ memattr_idx = KBASE_REG_MEMATTR_VALUE(flags);
+ if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE,
+ "Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n"))
+ memattr_idx = AS_MEMATTR_INDEX_DEFAULT;
+ /* store mem_attr index as 4:2, noting that:
+ * - macro called above ensures 3 bits already
+ * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits
+ */
+ mmu_flags = memattr_idx << 2;
/* write perm if requested */
mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
@@ -189,7 +204,8 @@
.pte_is_valid = pte_is_valid,
.entry_set_ate = entry_set_ate,
.entry_set_pte = entry_set_pte,
- .entry_invalidate = entry_invalidate
+ .entry_invalidate = entry_invalidate,
+ .flags = 0
};
struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c
index e3cb0b1f..d5b8c77 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_pm.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -52,18 +52,9 @@
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
int c;
- int old_count;
KBASE_DEBUG_ASSERT(kbdev != NULL);
- /* Trace timeline information about how long it took to handle the decision
- * to powerup. Sometimes the event might be missed due to reading the count
- * outside of mutex, but this is necessary to get the trace timing
- * correct. */
- old_count = kbdev->pm.active_count;
- if (old_count == 0)
- kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
-
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
if (kbase_pm_is_suspending(kbdev)) {
@@ -75,8 +66,6 @@
case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
mutex_unlock(&kbdev->pm.lock);
mutex_unlock(&js_devdata->runpool_mutex);
- if (old_count == 0)
- kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
return 1;
case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
@@ -87,13 +76,8 @@
}
}
c = ++kbdev->pm.active_count;
- KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
- /* Trace the event being handled */
- if (old_count == 0)
- kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
-
if (c == 1) {
/* First context active: Power on the GPU and any cores requested by
* the policy */
@@ -116,31 +100,18 @@
{
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
int c;
- int old_count;
KBASE_DEBUG_ASSERT(kbdev != NULL);
- /* Trace timeline information about how long it took to handle the decision
- * to powerdown. Sometimes the event might be missed due to reading the
- * count outside of mutex, but this is necessary to get the trace timing
- * correct. */
- old_count = kbdev->pm.active_count;
- if (old_count == 0)
- kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
mutex_lock(&js_devdata->runpool_mutex);
mutex_lock(&kbdev->pm.lock);
c = --kbdev->pm.active_count;
- KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
KBASE_DEBUG_ASSERT(c >= 0);
- /* Trace the event being handled */
- if (old_count == 0)
- kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
-
if (c == 0) {
/* Last context has gone idle */
kbase_hwaccess_pm_gpu_idle(kbdev);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h
index 8de17e1..59a0314 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_pm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -136,6 +136,10 @@
*/
void kbase_pm_context_idle(struct kbase_device *kbdev);
+/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline
+ * function
+ */
+
/**
* Suspend the GPU and prevent any further register accesses to it from Kernel
* threads.
diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
index 01b3087..b774c3b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -495,17 +495,6 @@
kbase_js_sched_all(katom->kctx->kbdev);
}
-struct kbase_debug_copy_buffer {
- size_t size;
- struct page **pages;
- int nr_pages;
- size_t offset;
- struct kbase_mem_phy_alloc *gpu_alloc;
-
- struct page **extres_pages;
- int nr_extres_pages;
-};
-
static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
{
struct page **pages = buffer->extres_pages;
@@ -713,7 +702,7 @@
return ret;
}
-static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
+void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
void *extres_page, struct page **pages, unsigned int nr_pages,
unsigned int *target_page_nr, size_t offset, size_t *to_copy)
{
@@ -755,7 +744,7 @@
kunmap(pages[*target_page_nr]);
}
-static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+int kbase_mem_copy_from_extres(struct kbase_context *kctx,
struct kbase_debug_copy_buffer *buf_data)
{
unsigned int i;
@@ -867,48 +856,22 @@
return 0;
}
-static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7)
+
+int kbasep_jit_alloc_validate(struct kbase_context *kctx,
+ struct base_jit_alloc_info *info)
{
- __user void *data = (__user void *)(uintptr_t) katom->jc;
- struct base_jit_alloc_info *info;
- struct kbase_context *kctx = katom->kctx;
- int ret;
-
- /* Fail the job if there is no info structure */
- if (!data) {
- ret = -EINVAL;
- goto fail;
- }
-
- /* Copy the information for safe access and future storage */
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info) {
- ret = -ENOMEM;
- goto fail;
- }
-
- if (copy_from_user(info, data, sizeof(*info)) != 0) {
- ret = -EINVAL;
- goto free_info;
- }
-
- /* If the ID is zero then fail the job */
- if (info->id == 0) {
- ret = -EINVAL;
- goto free_info;
- }
+ /* If the ID is zero, then fail the job */
+ if (info->id == 0)
+ return -EINVAL;
/* Sanity check that the PA fits within the VA */
- if (info->va_pages < info->commit_pages) {
- ret = -EINVAL;
- goto free_info;
- }
+ if (info->va_pages < info->commit_pages)
+ return -EINVAL;
/* Ensure the GPU address is correctly aligned */
- if ((info->gpu_alloc_addr & 0x7) != 0) {
- ret = -EINVAL;
- goto free_info;
- }
+ if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0)
+ return -EINVAL;
if (kctx->jit_version == 1) {
/* Old JIT didn't have usage_id, max_allocations, bin_id
@@ -920,24 +883,63 @@
info->flags = 0;
memset(info->padding, 0, sizeof(info->padding));
} else {
- int i;
+ int j;
/* Check padding is all zeroed */
- for (i = 0; i < sizeof(info->padding); i++) {
- if (info->padding[i] != 0) {
- ret = -EINVAL;
- goto free_info;
+ for (j = 0; j < sizeof(info->padding); j++) {
+ if (info->padding[j] != 0) {
+ return -EINVAL;
}
}
/* No bit other than TILER_ALIGN_TOP shall be set */
if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
- ret = -EINVAL;
- goto free_info;
+ return -EINVAL;
}
}
+ return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+ __user void *data = (__user void *)(uintptr_t) katom->jc;
+ struct base_jit_alloc_info *info;
+ struct kbase_context *kctx = katom->kctx;
+ u32 count;
+ int ret;
+ u32 i;
+
+ /* For backwards compatibility */
+ if (katom->nr_extres == 0)
+ katom->nr_extres = 1;
+ count = katom->nr_extres;
+
+ /* Sanity checks */
+ if (!data || count > kctx->jit_max_allocations ||
+ count > ARRAY_SIZE(kctx->jit_alloc)) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* Copy the information for safe access and future storage */
+ info = kmalloc_array(count, sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ if (copy_from_user(info, data, sizeof(*info)*count) != 0) {
+ ret = -EINVAL;
+ goto free_info;
+ }
katom->softjob_data = info;
+
+ for (i = 0; i < count; i++, info++) {
+ ret = kbasep_jit_alloc_validate(kctx, info);
+ if (ret)
+ goto free_info;
+ }
+
katom->jit_blocked = false;
lockdep_assert_held(&kctx->jctx.lock);
@@ -957,18 +959,38 @@
return 0;
free_info:
- kfree(info);
+ kfree(katom->softjob_data);
+ katom->softjob_data = NULL;
fail:
return ret;
}
-static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom)
+static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom)
{
if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) !=
BASE_JD_REQ_SOFT_JIT_FREE))
- return 0;
+ return NULL;
- return (u8) katom->jc;
+ return (u8 *) katom->softjob_data;
+}
+
+static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx = katom->kctx;
+ struct list_head *target_list_head = NULL;
+ struct kbase_jd_atom *entry;
+
+ list_for_each_entry(entry, &kctx->jit_pending_alloc, queue) {
+ if (katom->age < entry->age) {
+ target_list_head = &entry->queue;
+ break;
+ }
+ }
+
+ if (target_list_head == NULL)
+ target_list_head = &kctx->jit_pending_alloc;
+
+ list_add_tail(&katom->queue, target_list_head);
}
static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
@@ -978,6 +1000,8 @@
struct kbase_va_region *reg;
struct kbase_vmap_struct mapping;
u64 *ptr, new_addr;
+ u32 count = katom->nr_extres;
+ u32 i;
if (katom->jit_blocked) {
list_del(&katom->queue);
@@ -985,97 +1009,130 @@
}
info = katom->softjob_data;
-
if (WARN_ON(!info)) {
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return 0;
}
- /* The JIT ID is still in use so fail the allocation */
- if (kctx->jit_alloc[info->id]) {
- katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
- return 0;
+ for (i = 0; i < count; i++, info++) {
+ /* The JIT ID is still in use so fail the allocation */
+ if (kctx->jit_alloc[info->id]) {
+ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+ return 0;
+ }
}
- /* Create a JIT allocation */
- reg = kbase_jit_allocate(kctx, info);
- if (!reg) {
- struct kbase_jd_atom *jit_atom;
- bool can_block = false;
-
- lockdep_assert_held(&kctx->jctx.lock);
-
- jit_atom = list_first_entry(&kctx->jit_atoms_head,
- struct kbase_jd_atom, jit_node);
-
- list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
- if (jit_atom == katom)
- break;
- if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
- BASE_JD_REQ_SOFT_JIT_FREE) {
- u8 free_id = kbase_jit_free_get_id(jit_atom);
-
- if (free_id && kctx->jit_alloc[free_id]) {
- /* A JIT free which is active and
- * submitted before this atom
- */
- can_block = true;
- break;
- }
- }
- }
-
- if (!can_block) {
- /* Mark the allocation so we know it's in use even if
- * the allocation itself fails.
+ for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
+ if (kctx->jit_alloc[info->id]) {
+ /* The JIT ID is duplicated in this atom. Roll back
+ * previous allocations and fail.
*/
- kctx->jit_alloc[info->id] =
- (struct kbase_va_region *) -1;
+ u32 j;
+
+ info = katom->softjob_data;
+ for (j = 0; j < i; j++, info++) {
+ kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
+ kctx->jit_alloc[info->id] =
+ (struct kbase_va_region *) -1;
+ }
katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
return 0;
}
- /* There are pending frees for an active allocation
- * so we should wait to see whether they free the memory.
- * Add to the beginning of the list to ensure that the atom is
- * processed only once in kbase_jit_free_finish
- */
- list_add(&katom->queue, &kctx->jit_pending_alloc);
- katom->jit_blocked = true;
+ /* Create a JIT allocation */
+ reg = kbase_jit_allocate(kctx, info);
+ if (!reg) {
+ struct kbase_jd_atom *jit_atom;
+ bool can_block = false;
- return 1;
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ jit_atom = list_first_entry(&kctx->jit_atoms_head,
+ struct kbase_jd_atom, jit_node);
+
+ list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) {
+ if (jit_atom == katom)
+ break;
+
+ if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
+ BASE_JD_REQ_SOFT_JIT_FREE) {
+ u8 *free_ids = kbase_jit_free_get_ids(jit_atom);
+
+ if (free_ids && *free_ids &&
+ kctx->jit_alloc[*free_ids]) {
+ /* A JIT free which is active and
+ * submitted before this atom
+ */
+ can_block = true;
+ break;
+ }
+ }
+ }
+
+ if (!can_block) {
+ /* Mark the failed allocation as well as the
+ * other un-attempted allocations in the set,
+ * so we know they are in use even if the
+ * allocation itself failed.
+ */
+ for (; i < count; i++, info++) {
+ kctx->jit_alloc[info->id] =
+ (struct kbase_va_region *) -1;
+ }
+
+ katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+ return 0;
+ }
+
+ /* There are pending frees for an active allocation
+ * so we should wait to see whether they free the
+ * memory. Add to the list of atoms for which JIT
+ * allocation is pending.
+ */
+ kbase_jit_add_to_pending_alloc_list(katom);
+ katom->jit_blocked = true;
+
+ /* Rollback, the whole set will be re-attempted */
+ while (i-- > 0) {
+ info--;
+ kbase_jit_free(kctx, kctx->jit_alloc[info->id]);
+ kctx->jit_alloc[info->id] = NULL;
+ }
+
+ return 1;
+ }
+
+ /* Bind it to the user provided ID. */
+ kctx->jit_alloc[info->id] = reg;
}
- /*
- * Write the address of the JIT allocation to the user provided
- * GPU allocation.
- */
- ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
- &mapping);
- if (!ptr) {
+ for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
/*
- * Leave the allocation "live" as the JIT free jit will be
- * submitted anyway.
+ * Write the address of the JIT allocation to the user provided
+ * GPU allocation.
*/
- katom->event_code = BASE_JD_EVENT_JOB_INVALID;
- return 0;
- }
+ ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+ &mapping);
+ if (!ptr) {
+ /*
+ * Leave the allocations "live" as the JIT free atom
+ * will be submitted anyway.
+ */
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ return 0;
+ }
- new_addr = reg->start_pfn << PAGE_SHIFT;
- *ptr = new_addr;
- KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
- katom, info->gpu_alloc_addr, new_addr);
- kbase_vunmap(kctx, &mapping);
+ reg = kctx->jit_alloc[info->id];
+ new_addr = reg->start_pfn << PAGE_SHIFT;
+ *ptr = new_addr;
+ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
+ katom, info->gpu_alloc_addr, new_addr);
+ kbase_vunmap(kctx, &mapping);
+ }
katom->event_code = BASE_JD_EVENT_DONE;
- /*
- * Bind it to the user provided ID. Do this last so we can check for
- * the JIT free racing this JIT alloc job.
- */
- kctx->jit_alloc[info->id] = reg;
-
return 0;
}
@@ -1085,6 +1142,9 @@
lockdep_assert_held(&katom->kctx->jctx.lock);
+ if (WARN_ON(!katom->softjob_data))
+ return;
+
/* Remove atom from jit_atoms_head list */
list_del(&katom->jit_node);
@@ -1101,34 +1161,76 @@
static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
{
struct kbase_context *kctx = katom->kctx;
+ __user void *data = (__user void *)(uintptr_t) katom->jc;
+ u8 *ids;
+ u32 count = MAX(katom->nr_extres, 1);
+ int ret;
+
+ /* Sanity checks */
+ if (count > ARRAY_SIZE(kctx->jit_alloc)) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* Copy the information for safe access and future storage */
+ ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL);
+ if (!ids) {
+ ret = -ENOMEM;
+ goto fail;
+ }
lockdep_assert_held(&kctx->jctx.lock);
+ katom->softjob_data = ids;
+
+ /* For backwards compatibility */
+ if (katom->nr_extres) {
+ /* Fail the job if there is no list of ids */
+ if (!data) {
+ ret = -EINVAL;
+ goto free_info;
+ }
+
+ if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) {
+ ret = -EINVAL;
+ goto free_info;
+ }
+ } else {
+ katom->nr_extres = 1;
+ *ids = (u8)katom->jc;
+ }
+
list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
return 0;
+
+free_info:
+ kfree(katom->softjob_data);
+ katom->softjob_data = NULL;
+fail:
+ return ret;
}
static void kbase_jit_free_process(struct kbase_jd_atom *katom)
{
struct kbase_context *kctx = katom->kctx;
- u8 id = kbase_jit_free_get_id(katom);
+ u8 *ids = kbase_jit_free_get_ids(katom);
+ u32 count = katom->nr_extres;
+ u32 i;
- /*
- * If the ID is zero or it is not in use yet then fail the job.
- */
- if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+ if (ids == NULL) {
katom->event_code = BASE_JD_EVENT_JOB_INVALID;
return;
}
- /*
- * If the ID is valid but the allocation request failed still succeed
- * this soft job but don't try and free the allocation.
- */
- if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
- kbase_jit_free(kctx, kctx->jit_alloc[id]);
-
- kctx->jit_alloc[id] = NULL;
+ for (i = 0; i < count; i++, ids++) {
+ /*
+ * If the ID is zero or it is not in use yet then fail the job.
+ */
+ if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) {
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ return;
+ }
+ }
}
static void kbasep_jit_free_finish_worker(struct work_struct *work)
@@ -1151,12 +1253,39 @@
{
struct list_head *i, *tmp;
struct kbase_context *kctx = katom->kctx;
+ LIST_HEAD(jit_pending_alloc_list);
+ u8 *ids;
+ size_t j;
lockdep_assert_held(&kctx->jctx.lock);
+
+ ids = kbase_jit_free_get_ids(katom);
+ if (WARN_ON(ids == NULL)) {
+ return;
+ }
+
/* Remove this atom from the kctx->jit_atoms_head list */
list_del(&katom->jit_node);
- list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) {
+ for (j = 0; j != katom->nr_extres; ++j) {
+ if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) {
+ /*
+ * If the ID is valid but the allocation request failed
+ * still succeed this soft job but don't try and free
+ * the allocation.
+ */
+ if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1)
+ kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]);
+
+ kctx->jit_alloc[ids[j]] = NULL;
+ }
+ }
+ /* Free the list of ids */
+ kfree(ids);
+
+ list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list);
+
+ list_for_each_safe(i, tmp, &jit_pending_alloc_list) {
struct kbase_jd_atom *pending_atom = list_entry(i,
struct kbase_jd_atom, queue);
if (kbase_jit_allocate_process(pending_atom) == 0) {
@@ -1295,11 +1424,14 @@
int kbase_process_soft_job(struct kbase_jd_atom *katom)
{
+ int ret = 0;
+
KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom);
switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
- return kbase_dump_cpu_gpu_time(katom);
+ ret = kbase_dump_cpu_gpu_time(katom);
+ break;
#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
@@ -1309,7 +1441,7 @@
break;
case BASE_JD_REQ_SOFT_FENCE_WAIT:
{
- int ret = kbase_sync_fence_in_wait(katom);
+ ret = kbase_sync_fence_in_wait(katom);
if (ret == 1) {
#ifdef CONFIG_MALI_FENCE_DEBUG
@@ -1318,14 +1450,16 @@
kbasep_add_waiting_soft_job(katom);
#endif
}
- return ret;
+ break;
}
#endif
case BASE_JD_REQ_SOFT_REPLAY:
- return kbase_replay_process(katom);
+ ret = kbase_replay_process(katom);
+ break;
case BASE_JD_REQ_SOFT_EVENT_WAIT:
- return kbasep_soft_event_wait(katom);
+ ret = kbasep_soft_event_wait(katom);
+ break;
case BASE_JD_REQ_SOFT_EVENT_SET:
kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET);
break;
@@ -1341,7 +1475,8 @@
break;
}
case BASE_JD_REQ_SOFT_JIT_ALLOC:
- return kbase_jit_allocate_process(katom);
+ ret = kbase_jit_allocate_process(katom);
+ break;
case BASE_JD_REQ_SOFT_JIT_FREE:
kbase_jit_free_process(katom);
break;
@@ -1354,7 +1489,8 @@
}
/* Atom is complete */
- return 0;
+ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom);
+ return ret;
}
void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
@@ -1460,7 +1596,6 @@
void kbase_finish_soft_job(struct kbase_jd_atom *katom)
{
- KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom);
switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
/* Nothing to do */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h
index bc603a4..7988a74 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
index 9520f5a..5239dae 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,7 @@
#include <linux/workqueue.h>
#include "mali_kbase.h"
+#include "mali_kbase_sync.h"
void kbase_sync_fence_wait_worker(struct work_struct *data)
{
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
index 64ca6d7..bb94aee 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
@@ -73,10 +73,14 @@
if (!fence)
return -ENOMEM;
- /* Take an extra reference to the fence on behalf of the katom.
- * This is needed because sync_file_create() will take ownership of
- * one of these refs */
+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
+ /* Take an extra reference to the fence on behalf of the sync_file.
+ * This is only needed on older kernels where sync_file_create()
+ * does not take its own reference. This was changed in v4.9.68,
+ * where sync_file_create() now takes its own reference.
+ */
dma_fence_get(fence);
+#endif
/* create a sync_file fd representing the fence */
sync_file = sync_file_create(fence);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
index 32fffe0..d7364d5 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -236,14 +236,8 @@
/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
- KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
- KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
- KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
- KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
- KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
- KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
- KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
- KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED),
KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
deleted file mode 100644
index ee6bdf8..0000000
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-#include <mali_kbase.h>
-#include <mali_kbase_jm.h>
-#include <mali_kbase_hwaccess_jm.h>
-
-#define CREATE_TRACE_POINTS
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-#include "mali_timeline.h"
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
-
-struct kbase_trace_timeline_desc {
- char *enum_str;
- char *desc;
- char *format;
- char *format_desc;
-};
-
-static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
- #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
- #include "mali_kbase_trace_timeline_defs.h"
- #undef KBASE_TIMELINE_TRACE_CODE
-};
-
-#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
-
-static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
-{
- if (*pos >= KBASE_NR_TRACE_CODES)
- return NULL;
-
- return &kbase_trace_timeline_desc_table[*pos];
-}
-
-static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
-{
-}
-
-static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
-{
- (*pos)++;
-
- if (*pos == KBASE_NR_TRACE_CODES)
- return NULL;
-
- return &kbase_trace_timeline_desc_table[*pos];
-}
-
-static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
-{
- struct kbase_trace_timeline_desc *trace_desc = data;
-
- seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
- return 0;
-}
-
-
-static const struct seq_operations kbasep_trace_timeline_seq_ops = {
- .start = kbasep_trace_timeline_seq_start,
- .next = kbasep_trace_timeline_seq_next,
- .stop = kbasep_trace_timeline_seq_stop,
- .show = kbasep_trace_timeline_seq_show,
-};
-
-static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &kbasep_trace_timeline_seq_ops);
-}
-
-static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
- .open = kbasep_trace_timeline_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-#ifdef CONFIG_DEBUG_FS
-
-void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
-{
- debugfs_create_file("mali_timeline_defs",
- S_IRUGO, kbdev->mali_debugfs_directory, NULL,
- &kbasep_trace_timeline_debugfs_fops);
-}
-
-#endif /* CONFIG_DEBUG_FS */
-
-void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
- struct kbase_jd_atom *katom, int js)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
- KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
- } else {
- base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
-
- KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
- KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
- }
- ++kbdev->timeline.slot_atoms_submitted[js];
-
- KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
-}
-
-void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
- struct kbase_jd_atom *katom, int js,
- kbasep_js_atom_done_code done_code)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
- KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
- } else {
- /* Job finished in JS_HEAD */
- base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
-
- KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
- KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
-
- /* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
- if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
- struct kbase_jd_atom *next_katom;
- struct kbase_context *next_kctx;
-
- /* Peek the next atom - note that the atom in JS_HEAD will already
- * have been dequeued */
- next_katom = kbase_backend_inspect_head(kbdev, js);
- WARN_ON(!next_katom);
- next_kctx = next_katom->kctx;
- KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
- KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
- KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
- }
- }
-
- --kbdev->timeline.slot_atoms_submitted[js];
-
- KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
-}
-
-void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
-{
- int uid = 0;
- int old_uid;
-
- /* If a producer already exists for the event, try to use their UID (multiple-producers) */
- uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
- old_uid = uid;
-
- /* Get a new non-zero UID if we don't have one yet */
- while (!uid)
- uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
-
- /* Try to use this UID */
- if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
- /* If it changed, raced with another producer: we've lost this UID */
- uid = 0;
-
- KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
-}
-
-void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
-{
- int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
-
- if (uid != 0) {
- if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
- /* If it changed, raced with another consumer: we've lost this UID */
- uid = 0;
-
- KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
- }
-}
-
-void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
-{
- int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
-
- if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
- /* If it changed, raced with another consumer: we've lost this UID */
- uid = 0;
-
- KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
-}
-
-void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
- /* Simply log the start of the transition */
- kbdev->timeline.l2_transitioning = true;
- KBASE_TIMELINE_POWERING_L2(kbdev);
-}
-
-void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
- /* Simply log the end of the transition */
- if (kbdev->timeline.l2_transitioning) {
- kbdev->timeline.l2_transitioning = false;
- KBASE_TIMELINE_POWERED_L2(kbdev);
- }
-}
-
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
deleted file mode 100644
index c1a3dfc..0000000
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
+++ /dev/null
@@ -1,368 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-#if !defined(_KBASE_TRACE_TIMELINE_H)
-#define _KBASE_TRACE_TIMELINE_H
-
-#ifdef CONFIG_MALI_TRACE_TIMELINE
-
-enum kbase_trace_timeline_code {
- #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
- #include "mali_kbase_trace_timeline_defs.h"
- #undef KBASE_TIMELINE_TRACE_CODE
-};
-
-#ifdef CONFIG_DEBUG_FS
-
-/** Initialize Timeline DebugFS entries */
-void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
-
-#else /* CONFIG_DEBUG_FS */
-
-#define kbasep_trace_timeline_debugfs_init CSTD_NOP
-
-#endif /* CONFIG_DEBUG_FS */
-
-/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
- * functions.
- * Output is timestamped by either sched_clock() (default), local_clock(), or
- * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
-#include "mali_timeline.h"
-
-/* Trace number of atoms in flight for kctx (atoms either not completed, or in
- process of being returned to user */
-#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec, \
- (int)kctx->timeline.owner_tgid, \
- count); \
- } while (0)
-
-/* Trace atom_id being Ready to Run */
-#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec, \
- CTX_FLOW_ATOM_READY, \
- (int)kctx->timeline.owner_tgid, \
- atom_id); \
- } while (0)
-
-/* Trace number of atoms submitted to job slot js
- *
- * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
- * so that those actions can be filtered out separately from this
- *
- * This is because this is more useful, as we can use it to calculate general
- * utilization easily and accurately */
-#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec, \
- SW_SET_GPU_SLOT_ACTIVE, \
- (int)kctx->timeline.owner_tgid, \
- js, count); \
- } while (0)
-
-
-/* Trace atoms present in JS_NEXT */
-#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \
- SW_SET_GPU_SLOT_NEXT, \
- (int)kctx->timeline.owner_tgid, \
- js, count); \
- } while (0)
-
-/* Trace atoms present in JS_HEAD */
-#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \
- SW_SET_GPU_SLOT_HEAD, \
- (int)kctx->timeline.owner_tgid, \
- js, count); \
- } while (0)
-
-/* Trace that a soft stop/evict from next is being attempted on a slot */
-#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \
- SW_SET_GPU_SLOT_STOPPING, \
- (kctx) ? (int)kctx->timeline.owner_tgid : 0, \
- js, count); \
- } while (0)
-
-
-
-/* Trace state of overall GPU power */
-#define KBASE_TIMELINE_GPU_POWER(kbdev, active) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \
- SW_SET_GPU_POWER_ACTIVE, active); \
- } while (0)
-
-/* Trace state of tiler power */
-#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \
- SW_SET_GPU_POWER_TILER_ACTIVE, \
- hweight64(bitmap)); \
- } while (0)
-
-/* Trace number of shaders currently powered */
-#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \
- SW_SET_GPU_POWER_SHADER_ACTIVE, \
- hweight64(bitmap)); \
- } while (0)
-
-/* Trace state of L2 power */
-#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \
- SW_SET_GPU_POWER_L2_ACTIVE, \
- hweight64(bitmap)); \
- } while (0)
-
-/* Trace state of L2 cache*/
-#define KBASE_TIMELINE_POWERING_L2(kbdev) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \
- SW_FLOW_GPU_POWER_L2_POWERING, \
- 1); \
- } while (0)
-
-#define KBASE_TIMELINE_POWERED_L2(kbdev) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \
- SW_FLOW_GPU_POWER_L2_ACTIVE, \
- 1); \
- } while (0)
-
-/* Trace kbase_pm_send_event message send */
-#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \
- SW_FLOW_PM_SEND_EVENT, \
- event_type, pm_event_id); \
- } while (0)
-
-/* Trace kbase_pm_worker message receive */
-#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \
- SW_FLOW_PM_HANDLE_EVENT, \
- event_type, pm_event_id); \
- } while (0)
-
-
-/* Trace atom_id starting in JS_HEAD */
-#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \
- HW_START_GPU_JOB_CHAIN_SW_APPROX, \
- (int)kctx->timeline.owner_tgid, \
- js, _consumerof_atom_number); \
- } while (0)
-
-/* Trace atom_id stopping on JS_HEAD */
-#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \
- HW_STOP_GPU_JOB_CHAIN_SW_APPROX, \
- (int)kctx->timeline.owner_tgid, \
- js, _producerof_atom_number_completed); \
- } while (0)
-
-/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
- * certin caller */
-#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec, \
- trace_code, 1); \
- } while (0)
-
-/* Trace number of contexts active */
-#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) \
- do { \
- struct timespec ts; \
- getrawmonotonic(&ts); \
- trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec, \
- count); \
- } while (0)
-
-/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
-
-/**
- * Trace that an atom is starting on a job slot
- *
- * The caller must be holding hwaccess_lock
- */
-void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
- struct kbase_jd_atom *katom, int js);
-
-/**
- * Trace that an atom has done on a job slot
- *
- * 'Done' in this sense can occur either because:
- * - the atom in JS_HEAD finished
- * - the atom in JS_NEXT was evicted
- *
- * Whether the atom finished or was evicted is passed in @a done_code
- *
- * It is assumed that the atom has already been removed from the submit slot,
- * with either:
- * - kbasep_jm_dequeue_submit_slot()
- * - kbasep_jm_dequeue_tail_submit_slot()
- *
- * The caller must be holding hwaccess_lock
- */
-void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
- struct kbase_jd_atom *katom, int js,
- kbasep_js_atom_done_code done_code);
-
-
-/** Trace a pm event starting */
-void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
- enum kbase_timeline_pm_event event_sent);
-
-/** Trace a pm event finishing */
-void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
-
-/** Check whether a pm event was present, and if so trace finishing it */
-void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
-
-/** Trace L2 power-up start */
-void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
-
-/** Trace L2 power-up done */
-void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
-
-#else
-
-#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
-
-#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
-
-#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
-
-#define KBASE_TIMELINE_POWERED_L2(kbdev) CSTD_NOP()
-
-#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
-
-#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
-
-#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
-
-#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
-
-#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
-
-#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
-
-static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
- struct kbase_jd_atom *katom, int js)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-}
-
-static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
- struct kbase_jd_atom *katom, int js,
- kbasep_js_atom_done_code done_code)
-{
- lockdep_assert_held(&kbdev->hwaccess_lock);
-}
-
-static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
-{
-}
-
-static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
-{
-}
-
-static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
-{
-}
-
-static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
-{
-}
-
-static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
-{
-}
-#endif /* CONFIG_MALI_TRACE_TIMELINE */
-
-#endif /* _KBASE_TRACE_TIMELINE_H */
-
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
deleted file mode 100644
index 114bcac..0000000
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE *****
- * ***** DO NOT INCLUDE DIRECTLY *****
- * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
-
-/*
- * Conventions on Event Names:
- *
- * - The prefix determines something about how the timeline should be
- * displayed, and is split up into various parts, separated by underscores:
- * - 'SW' and 'HW' as the first part will be used to determine whether a
- * timeline is to do with Software or Hardware - effectively, separate
- * 'channels' for Software and Hardware
- * - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
- * signify related pairs of events - these are optional.
- * - 'FLOW' indicates a generic event, which can use dependencies
- * - This gives events such as:
- * - 'SW_ENTER_FOO'
- * - 'SW_LEAVE_FOO'
- * - 'SW_FLOW_BAR_1'
- * - 'SW_FLOW_BAR_2'
- * - 'HW_START_BAZ'
- * - 'HW_STOP_BAZ'
- * - And an unadorned HW event:
- * - 'HW_BAZ_FROZBOZ'
- */
-
-/*
- * Conventions on parameter names:
- * - anything with 'instance' in the name will have a separate timeline based
- * on that instances.
- * - underscored-prefixed parameters will by hidden by default on timelines
- *
- * Hence:
- * - Different job slots have their own 'instance', based on the instance value
- * - Per-context info (e.g. atoms on a context) have their own 'instance'
- * (i.e. each context should be on a different timeline)
- *
- * Note that globally-shared resources can be tagged with a tgid, but we don't
- * want an instance per context:
- * - There's no point having separate Job Slot timelines for each context, that
- * would be confusing - there's only really 3 job slots!
- * - There's no point having separate Shader-powered timelines for each
- * context, that would be confusing - all shader cores (whether it be 4, 8,
- * etc) are shared in the system.
- */
-
- /*
- * CTX events
- */
- /* Separate timelines for each context 'instance'*/
- KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT, "CTX: Atoms in flight", "%d,%d", "_instance_tgid,_value_number_of_atoms"),
- KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY, "CTX: Atoms Ready to Run", "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
-
- /*
- * SW Events
- */
- /* Separate timelines for each slot 'instance' */
- KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE, "SW: GPU slot active", "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
- KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT, "SW: GPU atom in NEXT", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
- KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD, "SW: GPU atom in HEAD", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
- KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING, "SW: Try Soft-Stop on GPU slot", "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
- /* Shader and overall power is shared - can't have separate instances of
- * it, just tagging with the context */
- KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE, "SW: GPU power active", "%d,%d", "_tgid,_value_is_power_active"),
- KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE, "SW: GPU tiler powered", "%d,%d", "_tgid,_value_number_of_tilers"),
- KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered", "%d,%d", "_tgid,_value_number_of_shaders"),
- KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powered", "%d,%d", "_tgid,_value_number_of_l2"),
-
- /* SW Power event messaging. _event_type is one from the kbase_pm_event enum */
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT, "SW: PM Send Event", "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT, "SW: PM Handle Event", "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
- /* SW L2 power events */
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING, "SW: GPU L2 powering", "%d,%d", "_tgid,_writerof_l2_transitioning"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powering done", "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
-
- KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE, "SW: Context Active", "%d,%d", "_tgid,_value_active"),
-
- /*
- * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
- */
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
-
- /*
- * Significant Indirect callers of kbase_pm_check_transitions_nolock()
- */
- /* kbase_pm_request_cores */
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
- /* kbase_pm_release_cores */
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
- KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
- /*
- * END: SW Functions that call kbase_pm_check_transitions_nolock()
- */
-
- /*
- * HW Events
- */
- KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT,
-"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"),
- KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,
-"HW: Job Chain start (SW approximated)", "%d,%d,%d",
-"_tgid,job_slot,_consumerof_atom_number_ready"),
- KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,
-"HW: Job Chain stop (SW approximated)", "%d,%d,%d",
-"_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
index 60e1800..df936cf 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -117,13 +117,18 @@
* in progress. Resume worker should queue a suspend.
* @need_resume: when true, a resume has been requested while a suspend is
* in progress. Suspend worker should queue a resume.
+ * @forced_suspend: when true, the suspend of vinstr needs to take place
+ * regardless of the kernel/user space clients attached
+ * to it. In particular, this flag is set when the suspend
+ * of vinstr is requested on entering protected mode or at
+ * the time of device suspend.
*/
struct kbase_vinstr_context {
struct mutex lock;
struct kbase_device *kbdev;
struct kbase_context *kctx;
- struct kbase_vmap_struct vmap;
+ struct kbase_vmap_struct *vmap;
u64 gpu_va;
void *cpu_va;
size_t dump_size;
@@ -151,6 +156,7 @@
bool need_suspend;
bool need_resume;
+ bool forced_suspend;
};
/**
@@ -332,9 +338,15 @@
#endif /* CONFIG_MALI_NO_MALI */
{
/* assume v5 for now */
+#ifdef CONFIG_MALI_NO_MALI
+ u32 nr_l2 = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+ u64 core_mask =
+ (1ULL << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else
base_gpu_props *props = &kbdev->gpu_props.props;
u32 nr_l2 = props->l2_props.num_l2_slices;
u64 core_mask = props->coherency_info.group[0].core_mask;
+#endif
u32 nr_blocks = fls64(core_mask);
/* JM and tiler counter blocks are always present */
@@ -359,7 +371,11 @@
struct kbase_context *kctx = vinstr_ctx->kctx;
u64 flags, nr_pages;
- flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR |
+ BASE_MEM_PERMANENT_KERNEL_MAPPING | BASE_MEM_CACHED_CPU;
+ if (kctx->kbdev->mmu_mode->flags &
+ KBASE_MMU_MODE_HAS_NON_CACHEABLE)
+ flags |= BASE_MEM_UNCACHED_GPU;
vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
nr_pages = PFN_UP(vinstr_ctx->dump_size);
@@ -368,11 +384,9 @@
if (!reg)
return -ENOMEM;
- vinstr_ctx->cpu_va = kbase_vmap(
- kctx,
- vinstr_ctx->gpu_va,
- vinstr_ctx->dump_size,
- &vinstr_ctx->vmap);
+ vinstr_ctx->cpu_va = kbase_phy_alloc_mapping_get(kctx,
+ vinstr_ctx->gpu_va, &vinstr_ctx->vmap);
+
if (!vinstr_ctx->cpu_va) {
kbase_mem_free(kctx, vinstr_ctx->gpu_va);
return -ENOMEM;
@@ -386,7 +400,7 @@
{
struct kbase_context *kctx = vinstr_ctx->kctx;
- kbase_vunmap(kctx, &vinstr_ctx->vmap);
+ kbase_phy_alloc_mapping_put(kctx, vinstr_ctx->vmap);
kbase_mem_free(kctx, vinstr_ctx->gpu_va);
}
@@ -1034,6 +1048,16 @@
rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
WARN_ON(rcode);
+ if (!rcode) {
+ /* Invalidate the kernel buffer before reading from it.
+ * As the vinstr_ctx->lock is already held by the caller, the
+ * unmap of kernel buffer cannot take place simultaneously.
+ */
+ lockdep_assert_held(&vinstr_ctx->lock);
+ kbase_sync_mem_regions(vinstr_ctx->kctx, vinstr_ctx->vmap,
+ KBASE_SYNC_TO_CPU);
+ }
+
spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
switch (vinstr_ctx->state) {
case VINSTR_SUSPENDING:
@@ -2142,6 +2166,7 @@
KBASE_DEBUG_ASSERT(vinstr_ctx);
spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+ vinstr_ctx->forced_suspend = true;
switch (vinstr_ctx->state) {
case VINSTR_SUSPENDED:
vinstr_ctx->suspend_cnt++;
@@ -2246,7 +2271,7 @@
break;
case VINSTR_SUSPENDING:
- if (vinstr_ctx->nclients)
+ if ((vinstr_ctx->nclients) && (!vinstr_ctx->forced_suspend))
vinstr_ctx->need_resume = true;
break;
@@ -2281,6 +2306,7 @@
BUG_ON(0 == vinstr_ctx->suspend_cnt);
vinstr_ctx->suspend_cnt--;
if (0 == vinstr_ctx->suspend_cnt) {
+ vinstr_ctx->forced_suspend = false;
if (vinstr_ctx->clients_present) {
vinstr_ctx->state = VINSTR_RESUMING;
schedule_work(&vinstr_ctx->resume_work);
diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
index da2ffaf..920562e 100644
--- a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
+++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -176,11 +176,10 @@
DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
-DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
-DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
-DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
-DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED);
DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h
index 1808500..8d9f7b6 100644
--- a/drivers/gpu/arm/midgard/mali_midg_regmap.h
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -45,6 +45,7 @@
#define GPU_IRQ_MASK 0x028 /* (RW) */
#define GPU_IRQ_STATUS 0x02C /* (RO) */
+
/* IRQ flags */
#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
@@ -215,6 +216,9 @@
#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
+/* JOB IRQ flags */
+#define JOB_IRQ_GLOBAL_IF (1 << 18) /* Global interface interrupt received */
+
#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
@@ -497,7 +501,7 @@
#define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
#define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */
-/* AS<n>_MEMATTR values: */
+/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
/* Use GPU implementation-defined caching policy. */
#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
/* The attribute set to force all resources to be cached. */
@@ -509,6 +513,12 @@
#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
/* Set to write back memory, outer caching */
#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull
/* Use GPU implementation-defined caching policy. */
#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
@@ -520,6 +530,11 @@
#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull
/* Set to write back memory, outer caching */
#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull
+/* There is no LPAE support for non-cacheable, since the memory type is always
+ * write-back.
+ * Marking this setting as reserved for LPAE
+ */
+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
/* Symbols for default MEMATTR to use
* Default is - HW implementation defined caching */
@@ -536,6 +551,8 @@
#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3
/* Outer coherent, write alloc inner */
#define AS_MEMATTR_INDEX_OUTER_WA 4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE 5
/* JS<n>_FEATURES register */
diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h
deleted file mode 100644
index d0deead..0000000
--- a/drivers/gpu/arm/midgard/mali_timeline.h
+++ /dev/null
@@ -1,401 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM mali_timeline
-
-#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _MALI_TIMELINE_H
-
-#include <linux/tracepoint.h>
-
-TRACE_EVENT(mali_timeline_atoms_in_flight,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int tgid,
- int count),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- tgid,
- count),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, tgid)
- __field(int, count)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->tgid = tgid;
- __entry->count = count;
- ),
-
- TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->tgid,
- __entry->count)
-);
-
-
-TRACE_EVENT(mali_timeline_atom,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int event_type,
- int tgid,
- int atom_id),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- event_type,
- tgid,
- atom_id),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, event_type)
- __field(int, tgid)
- __field(int, atom_id)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->event_type = event_type;
- __entry->tgid = tgid;
- __entry->atom_id = atom_id;
- ),
-
- TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->tgid,
- __entry->atom_id,
- __entry->atom_id)
-);
-
-TRACE_EVENT(mali_timeline_gpu_slot_active,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int event_type,
- int tgid,
- int js,
- int count),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- event_type,
- tgid,
- js,
- count),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, event_type)
- __field(int, tgid)
- __field(int, js)
- __field(int, count)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->event_type = event_type;
- __entry->tgid = tgid;
- __entry->js = js;
- __entry->count = count;
- ),
-
- TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->tgid,
- __entry->js,
- __entry->count)
-);
-
-TRACE_EVENT(mali_timeline_gpu_slot_action,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int event_type,
- int tgid,
- int js,
- int count),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- event_type,
- tgid,
- js,
- count),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, event_type)
- __field(int, tgid)
- __field(int, js)
- __field(int, count)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->event_type = event_type;
- __entry->tgid = tgid;
- __entry->js = js;
- __entry->count = count;
- ),
-
- TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->tgid,
- __entry->js,
- __entry->count)
-);
-
-TRACE_EVENT(mali_timeline_gpu_power_active,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int event_type,
- int active),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- event_type,
- active),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, event_type)
- __field(int, active)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->event_type = event_type;
- __entry->active = active;
- ),
-
- TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->active)
-
-);
-
-TRACE_EVENT(mali_timeline_l2_power_active,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int event_type,
- int state),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- event_type,
- state),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, event_type)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->event_type = event_type;
- __entry->state = state;
- ),
-
- TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->state)
-
-);
-TRACE_EVENT(mali_timeline_pm_event,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int event_type,
- int pm_event_type,
- unsigned int pm_event_id),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- event_type,
- pm_event_type,
- pm_event_id),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, event_type)
- __field(int, pm_event_type)
- __field(unsigned int, pm_event_id)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->event_type = event_type;
- __entry->pm_event_type = pm_event_type;
- __entry->pm_event_id = pm_event_id;
- ),
-
- TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->pm_event_type, __entry->pm_event_id)
-
-);
-
-TRACE_EVENT(mali_timeline_slot_atom,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int event_type,
- int tgid,
- int js,
- int atom_id),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- event_type,
- tgid,
- js,
- atom_id),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, event_type)
- __field(int, tgid)
- __field(int, js)
- __field(int, atom_id)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->event_type = event_type;
- __entry->tgid = tgid;
- __entry->js = js;
- __entry->atom_id = atom_id;
- ),
-
- TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->tgid,
- __entry->js,
- __entry->atom_id)
-);
-
-TRACE_EVENT(mali_timeline_pm_checktrans,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int trans_code,
- int trans_id),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- trans_code,
- trans_id),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, trans_code)
- __field(int, trans_id)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->trans_code = trans_code;
- __entry->trans_id = trans_id;
- ),
-
- TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->trans_id)
-
-);
-
-TRACE_EVENT(mali_timeline_context_active,
-
- TP_PROTO(u64 ts_sec,
- u32 ts_nsec,
- int count),
-
- TP_ARGS(ts_sec,
- ts_nsec,
- count),
-
- TP_STRUCT__entry(
- __field(u64, ts_sec)
- __field(u32, ts_nsec)
- __field(int, count)
- ),
-
- TP_fast_assign(
- __entry->ts_sec = ts_sec;
- __entry->ts_nsec = ts_nsec;
- __entry->count = count;
- ),
-
- TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
- (int)__entry->ts_sec,
- (int)__entry->ts_nsec,
- __entry->count)
-);
-
-#endif /* _MALI_TIMELINE_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
-
diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h
index 961a4a5..c81f404 100644
--- a/drivers/gpu/arm/midgard/mali_uk.h
+++ b/drivers/gpu/arm/midgard/mali_uk.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,7 +44,7 @@
* @defgroup uk_api User-Kernel Interface API
*
* The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
- * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver.
*
* It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
* kernel-side API (UKK) via an OS-specific communication mechanism.
diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript
index 4c38f2a..01c7589 100644
--- a/drivers/gpu/arm/midgard/sconscript
+++ b/drivers/gpu/arm/midgard/sconscript
@@ -52,6 +52,7 @@
'MALI_RELEASE_NAME=%s' % env['mali_release_name'],
'MALI_MOCK_TEST=%s' % mock_test,
'MALI_CUSTOMER_RELEASE=%s' % env['release'],
+ 'MALI_USE_CSF=%s' % env['csf'],
'MALI_COVERAGE=%s' % env['coverage'],
]
diff --git a/drivers/gpu/arm/midgard/tests/Mconfig b/drivers/gpu/arm/midgard/tests/Mconfig
index f692e34..ddd7630 100644
--- a/drivers/gpu/arm/midgard/tests/Mconfig
+++ b/drivers/gpu/arm/midgard/tests/Mconfig
@@ -20,3 +20,8 @@
bool
default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ
default n
+
+config BUILD_CSF_TESTS
+ bool
+ default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF
+ default n
diff --git a/drivers/gpu/arm/midgard/tests/build.bp b/drivers/gpu/arm/midgard/tests/build.bp
index 28a756b..3107062 100644
--- a/drivers/gpu/arm/midgard/tests/build.bp
+++ b/drivers/gpu/arm/midgard/tests/build.bp
@@ -19,7 +19,6 @@
"kernel/drivers/gpu/arm/midgard/backend/gpu",
"kernel/drivers/gpu/arm/midgard/tests/include",
],
- extra_symbols: ["kutf"],
}
subdirs = [
@@ -33,4 +32,5 @@
"mali_kutf_ipa_test",
"mali_kutf_ipa_unit_test",
"mali_kutf_vinstr_test",
+ "mali_kutf_fw_test",
]
diff --git a/drivers/gpu/arm/midgard/tests/kutf/build.bp b/drivers/gpu/arm/midgard/tests/kutf/build.bp
index f6d4c3f..960c8faa 100644
--- a/drivers/gpu/arm/midgard/tests/kutf/build.bp
+++ b/drivers/gpu/arm/midgard/tests/kutf/build.bp
@@ -25,7 +25,7 @@
kbuild_options: ["CONFIG_MALI_KUTF=m"],
include_dirs: ["kernel/drivers/gpu/arm/midgard/tests/include"],
enabled: false,
- unit_test_kernel_modules: {
+ base_build_kutf: {
enabled: true,
},
}
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
index 40df117..9218a40 100644
--- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile
@@ -31,6 +31,7 @@
TEST_CCFLAGS := \
-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+ -DMALI_USE_CSF=$(MALI_USE_CSF) \
$(SCONS_CFLAGS) \
-I$(CURDIR)/../include \
-I$(CURDIR)/../../../../../../include \
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
index e1f77b0..a6669af 100644
--- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
@@ -17,10 +17,13 @@
"Kbuild",
"mali_kutf_irq_test_main.c",
],
- extra_symbols: ["mali_kbase"],
+ extra_symbols: [
+ "mali_kbase",
+ "kutf",
+ ],
install_group: "IG_tests",
enabled: false,
- unit_test_kernel_modules: {
+ base_build_kutf: {
enabled: true,
kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"],
},
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
index 5013a9d..4181b7f 100644
--- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2016, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -90,15 +90,14 @@
struct kbase_device *kbdev = kbase_untag(data);
u32 val;
- val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS));
if (val & TEST_IRQ) {
struct timespec tval;
getnstimeofday(&tval);
irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec);
- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val,
- NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
triggered = true;
wake_up(&wait);
@@ -194,7 +193,7 @@
/* Trigger fake IRQ */
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
- TEST_IRQ, NULL);
+ TEST_IRQ);
ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT);
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
index 0ec5ce7..76e3730 100644
--- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript
@@ -29,7 +29,7 @@
cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [])
env.KernelObjTarget('mali_kutf_irq_test', cmd)
else:
- makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR')
+ makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} MALI_USE_CSF=${csf} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR')
cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction])
env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko')
env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko')
diff --git a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
index 6857eb7..3aab51a 100644
--- a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
+++ b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
@@ -49,12 +49,15 @@
* alignment, length and limits for the allocation
* @is_shader_code: True if the allocation is for shader code (which has
* additional alignment requirements)
+ * @is_same_4gb_page: True if the allocation needs to reside completely within
+ * a 4GB chunk
*
* Return: true if gap_end is now aligned correctly and is still in range,
* false otherwise
*/
static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
- struct vm_unmapped_area_info *info, bool is_shader_code)
+ struct vm_unmapped_area_info *info, bool is_shader_code,
+ bool is_same_4gb_page)
{
/* Compute highest gap address at the desired alignment */
(*gap_end) -= info->length;
@@ -72,6 +75,35 @@
if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
info->length) & BASE_MEM_MASK_4GB))
return false;
+ } else if (is_same_4gb_page) {
+ unsigned long start = *gap_end;
+ unsigned long end = *gap_end + info->length;
+ unsigned long mask = ~((unsigned long)U32_MAX);
+
+ /* Check if 4GB boundary is straddled */
+ if ((start & mask) != ((end - 1) & mask)) {
+ unsigned long offset = end - (end & mask);
+ /* This is to ensure that alignment doesn't get
+ * disturbed in an attempt to prevent straddling at
+ * 4GB boundary. The GPU VA is aligned to 2MB when the
+ * allocation size is > 2MB and there is enough CPU &
+ * GPU virtual space.
+ */
+ unsigned long rounded_offset =
+ ALIGN(offset, info->align_mask + 1);
+
+ start -= rounded_offset;
+ end -= rounded_offset;
+
+ *gap_end = start;
+
+ /* The preceding 4GB boundary shall not get straddled,
+ * even after accounting for the alignment, as the
+ * size of allocation is limited to 4GB and the initial
+ * start location was already aligned.
+ */
+ WARN_ON((start & mask) != ((end - 1) & mask));
+ }
}
@@ -89,6 +121,8 @@
* @is_shader_code: Boolean which denotes whether the allocated area is
* intended for the use by shader core in which case a
* special alignment requirements apply.
+ * @is_same_4gb_page: Boolean which indicates whether the allocated area needs
+ * to reside completely within a 4GB chunk.
*
* The unmapped_area_topdown() function in the Linux kernel is not exported
* using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a
@@ -97,25 +131,26 @@
* of this function and prefixed it with 'kbase_'.
*
* The difference in the call parameter list comes from the fact that
- * kbase_unmapped_area_topdown() is called with additional parameter which
- * is provided to denote whether the allocation is for a shader core memory
- * or not. This is significant since the executable shader core memory has
- * additional alignment requirements.
+ * kbase_unmapped_area_topdown() is called with additional parameters which
+ * are provided to indicate whether the allocation is for a shader core memory,
+ * which has additional alignment requirements, and whether the allocation can
+ * straddle a 4GB boundary.
*
* The modification of the original Linux function lies in how the computation
* of the highest gap address at the desired alignment is performed once the
* gap with desirable properties is found. For this purpose a special function
* is introduced (@ref align_and_check()) which beside computing the gap end
- * at the desired alignment also performs additional alignment check for the
- * case when the memory is executable shader core memory. For such case, it is
- * ensured that the gap does not end on a 4GB boundary.
+ * at the desired alignment also performs additional alignment checks for the
+ * case when the memory is executable shader core memory, for which it is
+ * ensured that the gap does not end on a 4GB boundary, and for the case when
+ * memory needs to be confined within a 4GB chunk.
*
* Return: address of the found gap end (high limit) if area is found;
* -ENOMEM if search is unsuccessful
*/
static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
- *info, bool is_shader_code)
+ *info, bool is_shader_code, bool is_same_4gb_page)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -142,7 +177,8 @@
/* Check highest gap, which does not precede any rbtree node */
gap_start = mm->highest_vm_end;
if (gap_start <= high_limit) {
- if (align_and_check(&gap_end, gap_start, info, is_shader_code))
+ if (align_and_check(&gap_end, gap_start, info,
+ is_shader_code, is_same_4gb_page))
return gap_end;
}
@@ -178,7 +214,7 @@
gap_end = info->high_limit;
if (align_and_check(&gap_end, gap_start, info,
- is_shader_code))
+ is_shader_code, is_same_4gb_page))
return gap_end;
}
@@ -232,6 +268,7 @@
int gpu_pc_bits =
kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
bool is_shader_code = false;
+ bool is_same_4gb_page = false;
unsigned long ret;
/* err on fixed address */
@@ -291,6 +328,8 @@
align_mask = extent_bytes - 1;
align_offset =
extent_bytes - (reg->initial_commit << PAGE_SHIFT);
+ } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+ is_same_4gb_page = true;
}
#ifndef CONFIG_64BIT
} else {
@@ -306,7 +345,8 @@
info.align_offset = align_offset;
info.align_mask = align_mask;
- ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+ ret = kbase_unmapped_area_topdown(&info, is_shader_code,
+ is_same_4gb_page);
if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
high_limit < (kctx->same_va_end << PAGE_SHIFT)) {
@@ -315,7 +355,8 @@
info.high_limit = min_t(u64, TASK_SIZE,
(kctx->same_va_end << PAGE_SHIFT));
- ret = kbase_unmapped_area_topdown(&info, is_shader_code);
+ ret = kbase_unmapped_area_topdown(&info, is_shader_code,
+ is_same_4gb_page);
}
return ret;